Skip to content

Commit c17b8ac

Browse files
authored
Merge pull request #594 from MerginMaps/gpgk_restore_from_checkpoints
For gpkg file restore always use diff checkpoints
2 parents a7aea9c + 6b95656 commit c17b8ac

5 files changed

Lines changed: 170 additions & 72 deletions

File tree

server/mergin/sync/models.py

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -743,22 +743,21 @@ def diffs_chain(
743743
return None, []
744744

745745
diffs = []
746-
cached_items = Checkpoint.get_checkpoints(
747-
basefile.project_version_name, version
748-
)
746+
checkpoints = Checkpoint.get_checkpoints(basefile.project_version_name, version)
749747
expected_diffs = (
750748
FileDiff.query.filter_by(
751749
basefile_id=basefile.id,
752750
)
753751
.filter(
754752
tuple_(FileDiff.rank, FileDiff.version).in_(
755-
[(item.rank, item.end) for item in cached_items]
753+
[(item.rank, item.end) for item in checkpoints]
756754
)
757755
)
758756
.all()
759757
)
760758

761-
for item in cached_items:
759+
for item in checkpoints:
760+
diff_needs_to_be_created = False
762761
diff = next(
763762
(
764763
d
@@ -767,25 +766,38 @@ def diffs_chain(
767766
),
768767
None,
769768
)
770-
if diff and os.path.exists(diff.abs_path):
771-
diffs.append(diff)
772-
elif item.rank > 0:
773-
# fallback if checkpoint does not exist: replace merged diff with individual diffs
774-
individual_diffs = (
775-
FileDiff.query.filter_by(
776-
basefile_id=basefile.id,
777-
rank=0,
769+
if diff:
770+
if os.path.exists(diff.abs_path):
771+
diffs.append(diff)
772+
else:
773+
diff_needs_to_be_created = True
774+
else:
775+
# we do not have record in DB, create a checkpoint if it makes sense
776+
if item.rank > 0 and FileDiff.can_create_checkpoint(file_id, item):
777+
diff = FileDiff(
778+
basefile=basefile,
779+
version=item.end,
780+
rank=item.rank,
781+
path=basefile.file.generate_diff_name(),
782+
size=None,
783+
checksum=None,
778784
)
779-
.filter(
780-
FileDiff.version >= item.start, FileDiff.version <= item.end
785+
db.session.add(diff)
786+
db.session.commit()
787+
diff_needs_to_be_created = True
788+
else:
789+
# we asked for checkpoint where there was no change
790+
continue
791+
792+
if diff_needs_to_be_created:
793+
diff_created = diff.construct_checkpoint()
794+
if diff_created:
795+
diffs.append(diff)
796+
else:
797+
logging.error(
798+
f"Failed to create a diff for file {basefile.file.path} at version {basefile.project_version_name} of rank {item.rank}."
781799
)
782-
.order_by(FileDiff.version)
783-
.all()
784-
)
785-
diffs.extend(individual_diffs)
786-
else:
787-
# we asked for individual diff but there is no such diff as there was not change at that version
788-
continue
800+
return None, []
789801

790802
return basefile, diffs
791803

@@ -924,9 +936,10 @@ def construct_checkpoint(self) -> bool:
924936
return True
925937

926938
if self.rank == 0:
927-
raise ValueError(
939+
logging.error(
928940
"Checkpoint of rank 0 should be created by user upload, cannot be constructed"
929941
)
942+
return False
930943

931944
# merged diffs can only be created for certain versions
932945
if self.version % LOG_BASE:

server/mergin/sync/public_api_controller.py

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os
99
import logging
1010
from dataclasses import asdict
11+
from enum import Enum
1112
from typing import Dict
1213
from datetime import datetime
1314

@@ -80,6 +81,7 @@
8081
generate_location,
8182
is_valid_uuid,
8283
get_device_id,
84+
is_versioned_file,
8385
prepare_download_response,
8486
get_device_id,
8587
wkb2wkt,
@@ -287,6 +289,12 @@ def delete_project(namespace, project_name): # noqa: E501
287289
return NoContent, 200
288290

289291

292+
class DowloadFileAction(Enum):
293+
FULL = "full"
294+
FULL_GPKG = "full_gpkg"
295+
DIFF = "diff"
296+
297+
290298
def download_project_file(
291299
project_name, namespace, file, version=None, diff=None
292300
): # noqa: E501
@@ -307,10 +315,20 @@ def download_project_file(
307315
308316
:rtype: file
309317
"""
310-
project = require_project(namespace, project_name, ProjectPermissions.Read)
311-
if diff and not version:
318+
if not is_versioned_file(file):
319+
action = DowloadFileAction.FULL
320+
elif diff:
321+
action = DowloadFileAction.DIFF
322+
else:
323+
action = DowloadFileAction.FULL_GPKG
324+
325+
if action is DowloadFileAction.DIFF and not version:
312326
abort(400, f"Changeset must be requested for particular file version")
313327

328+
if action is DowloadFileAction.FULL and diff is True:
329+
abort(404, f"No diff in particular file {file})")
330+
331+
project = require_project(namespace, project_name, ProjectPermissions.Read)
314332
lookup_version = (
315333
ProjectVersion.from_v_name(version) if version else project.latest_version
316334
)
@@ -329,24 +347,30 @@ def download_project_file(
329347
if not fh or fh.change == PushChangeType.DELETE.value:
330348
abort(404, f"File {file} not found")
331349

332-
if diff and version:
333-
# get specific version of geodiff file modified in requested version
334-
if not fh.diff:
335-
abort(404, f"No diff in particular file {file} version")
336-
file_path = fh.diff_file.location
337-
else:
338-
file_path = fh.location
339-
340-
if version and not diff:
341-
project.storage.restore_versioned_file(
342-
file, ProjectVersion.from_v_name(version)
343-
)
350+
# user asked for diff, but there is no diff at that version
351+
if action is DowloadFileAction.DIFF and not fh.diff:
352+
abort(404, f"No diff in particular file {file} version")
344353

354+
file_path = (
355+
fh.diff_file.location if action is DowloadFileAction.DIFF else fh.location
356+
)
345357
abs_path = os.path.join(project.storage.project_dir, file_path)
346-
# check file exists (e.g. there might have been issue with restore)
358+
347359
if not os.path.exists(abs_path):
348-
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
349-
abort(404)
360+
if action is DowloadFileAction.FULL_GPKG:
361+
project.storage.restore_versioned_file(
362+
file, ProjectVersion.from_v_name(version)
363+
)
364+
365+
# check again after restore
366+
if not os.path.exists(abs_path):
367+
logging.error(
368+
f"Failed to restore {namespace}/{project_name}/{file_path}"
369+
)
370+
abort(404)
371+
else:
372+
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
373+
abort(404)
350374

351375
response = prepare_download_response(project.storage.project_dir, file_path)
352376
return response

server/mergin/tests/test_file_restore.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@
88

99
from ..app import db
1010
from ..auth.models import User
11-
from ..sync.models import ProjectVersion, Project, GeodiffActionHistory
11+
from ..sync.models import (
12+
FileDiff,
13+
ProjectFilePath,
14+
ProjectVersion,
15+
Project,
16+
GeodiffActionHistory,
17+
)
1218
from . import test_project_dir, TMP_DIR
1319
from .utils import (
1420
create_project,
@@ -163,6 +169,19 @@ def test_version_file_restore(diff_project):
163169
diff_project.storage.restore_versioned_file("base.gpkg", 7)
164170
assert os.path.exists(test_file)
165171
assert gpkgs_are_equal(test_file, test_file + "_backup")
172+
# no merged diffs needed
173+
file_path_id = (
174+
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="base.gpkg")
175+
.first()
176+
.id
177+
)
178+
assert (
179+
FileDiff.query.filter_by(file_path_id=file_path_id)
180+
.filter(FileDiff.rank > 0)
181+
.count()
182+
== 0
183+
)
184+
166185
# check we track performance of reconstruction
167186
gh = GeodiffActionHistory.query.filter_by(
168187
project_id=diff_project.id, target_version="v7"
@@ -198,3 +217,44 @@ def test_version_file_restore(diff_project):
198217
diff_project.storage.restore_versioned_file("test.txt", 1)
199218
assert not os.path.exists(test_file)
200219
assert not os.path.exists(diff_project.storage.geodiff_working_dir)
220+
221+
# let's add some dummy changes to test.gpkg so we can restore full gpkg using checkpoints created on demand
222+
file_path_id = (
223+
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="test.gpkg")
224+
.first()
225+
.id
226+
)
227+
base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg")
228+
shutil.copy(
229+
os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg
230+
)
231+
for i in range(23):
232+
sql = f"UPDATE simple SET rating={i}"
233+
execute_query(base_gpkg, sql)
234+
pv = push_change(
235+
diff_project, "updated", "test.gpkg", diff_project.storage.project_dir
236+
)
237+
assert diff_project.latest_version == pv.name == (11 + i)
238+
file_diff = FileDiff.query.filter_by(
239+
file_path_id=file_path_id, version=pv.name, rank=0
240+
).first()
241+
assert file_diff and os.path.exists(file_diff.abs_path)
242+
243+
assert (
244+
FileDiff.query.filter_by(file_path_id=file_path_id)
245+
.filter(FileDiff.rank > 0)
246+
.count()
247+
== 0
248+
)
249+
250+
test_file = os.path.join(diff_project.storage.project_dir, "v30", "test.gpkg")
251+
os.rename(test_file, test_file + "_backup")
252+
diff_project.storage.restore_versioned_file("test.gpkg", 30)
253+
assert os.path.exists(test_file)
254+
assert gpkgs_are_equal(test_file, test_file + "_backup")
255+
assert (
256+
FileDiff.query.filter_by(file_path_id=file_path_id)
257+
.filter(FileDiff.rank > 0)
258+
.count()
259+
> 0
260+
)

server/mergin/tests/test_project_controller.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,14 @@ def test_download_diff_file(client, diff_project):
880880

881881
# download full version after file was removed
882882
os.remove(os.path.join(diff_project.storage.project_dir, file_change.location))
883+
file_path_id = (
884+
ProjectFilePath.query.filter_by(project_id=diff_project.id, path=test_file)
885+
.first()
886+
.id
887+
)
888+
basefile, diffs = FileHistory.diffs_chain(file_path_id, 4)
889+
# we construct full gpkg from basefile and single diff v4
890+
assert basefile is not None and len(diffs) == 1
883891
resp = client.get(
884892
"/v1/project/raw/{}/{}?file={}&version=v4".format(
885893
test_workspace_name, test_project, test_file
@@ -1902,11 +1910,17 @@ def test_file_diffs_chain(diff_project):
19021910
assert len(diffs) == 1
19031911
assert diffs[0].version == 6
19041912

1905-
# diff was used in v7, nothing happened in v8 (=v7)
1906-
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
1913+
# diff was used in v7
1914+
basefile, diffs = FileHistory.diffs_chain(file_id, 7)
19071915
assert basefile.version.name == 5
19081916
assert len(diffs) == 2
19091917

1918+
# nothing happened in v8 (=v7) but we have now merged diff in chain v5-v8
1919+
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
1920+
assert basefile.version.name == 5
1921+
assert len(diffs) == 1
1922+
assert diffs[0].rank == 1 and diffs[0].version == 8
1923+
19101924
# file was removed in v9
19111925
basefile, diffs = FileHistory.diffs_chain(file_id, 9)
19121926
assert not basefile

server/mergin/tests/test_public_api_v2.py

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -304,45 +304,31 @@ def test_create_diff_checkpoint(diff_project):
304304

305305
basefile, diffs = FileHistory.diffs_chain(file_path_id, 32)
306306
assert basefile.project_version_name == 9
307-
# so far we only have individual diffs
308-
assert len(diffs) == 22
307+
assert len(diffs) == 3
308+
# also a lower diff rank was created
309+
lower_diff = FileDiff.query.filter_by(version=24, rank=1).first()
310+
assert os.path.exists(lower_diff.abs_path)
309311

310312
# diff for v17-v20 from individual diffs
311313
assert FileDiff.can_create_checkpoint(file_path_id, Checkpoint(1, 5)) is True
312-
diff = FileDiff(
313-
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=20, rank=1
314-
)
315-
db.session.add(diff)
316-
db.session.commit()
317-
assert not os.path.exists(diff.abs_path)
314+
diff = FileDiff.query.filter_by(
315+
file_path_id=file_path_id, version=20, rank=1
316+
).first()
317+
os.remove(diff.abs_path)
318318
diff.construct_checkpoint()
319319
assert os.path.exists(diff.abs_path)
320320

321321
basefile, diffs = FileHistory.diffs_chain(file_path_id, 20)
322322
assert basefile.project_version_name == 9
323-
# 6 individual diffs (v11-v16) + merged diff (v17-v20) as the last one
324-
assert len(diffs) == 7
323+
# individual diff v12 + (v13-v16) + (v17-v20) as the last one
324+
assert len(diffs) == 3
325325
assert diffs[-1] == diff
326326

327327
# repeat - nothing to do
328328
mtime = os.path.getmtime(diff.abs_path)
329329
diff.construct_checkpoint()
330330
assert mtime == os.path.getmtime(diff.abs_path)
331331

332-
# some lower rank diffs still missing
333-
assert not FileDiff.query.filter_by(version=24, rank=1).count()
334-
335-
# diff for v17-v32 with merged diffs, this will also create lower missing ranks
336-
diff = FileDiff(
337-
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=32, rank=2
338-
)
339-
db.session.add(diff)
340-
db.session.commit()
341-
diff.construct_checkpoint()
342-
assert os.path.exists(diff.abs_path)
343-
lower_diff = FileDiff.query.filter_by(version=24, rank=1).first()
344-
assert os.path.exists(lower_diff.abs_path)
345-
346332
# assert gpkg diff is the same as it would be from merging all individual diffs
347333
individual_diffs = (
348334
FileDiff.query.filter_by(file_path_id=file_path_id, rank=0)
@@ -368,11 +354,12 @@ def test_create_diff_checkpoint(diff_project):
368354

369355
# geodiff failure
370356
mock.side_effect = GeoDiffLibError
371-
diff = FileDiff(
372-
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=16, rank=1
373-
)
374-
db.session.add(diff)
375-
db.session.commit()
357+
diff = FileDiff.query.filter_by(
358+
file_path_id=file_path_id, version=16, rank=1
359+
).first()
360+
assert os.path.exists(diff.abs_path)
361+
os.remove(diff.abs_path)
362+
376363
diff.construct_checkpoint()
377364
assert mock.called
378365
assert not os.path.exists(diff.abs_path)

0 commit comments

Comments
 (0)