Skip to content

Commit ad973ce

Browse files
authored
Backend part of duplicate finder (#2889)
1 parent 84592d7 commit ad973ce

12 files changed

Lines changed: 621 additions & 42 deletions

File tree

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
<?php
2+
3+
/**
4+
* SPDX-License-Identifier: MIT
5+
* Copyright (c) 2017-2018 Tobias Reich
6+
* Copyright (c) 2018-2025 LycheeOrg.
7+
*/
8+
9+
namespace App\Actions\Photo;
10+
11+
use App\Exceptions\Internal\LycheeLogicException;
12+
use App\Exceptions\Internal\QueryBuilderException;
13+
use App\Models\Photo;
14+
use Illuminate\Database\Query\Builder;
15+
use Illuminate\Support\Collection;
16+
use Illuminate\Support\Facades\DB;
17+
18+
/**
19+
* Look for duplicates in the database.
20+
*/
21+
class DuplicateFinder
22+
{
23+
/**
24+
* Quickly count the number of duplicates candidates.
25+
*
26+
* @param bool $must_be_within_same_album Requires the duplicates to be in the same album
27+
* @param bool $must_have_same_checksum Requires the duplicates to have the same checksum
28+
* @param bool $must_have_same_title Requires the duplicates to have the same title
29+
*
30+
* @return int
31+
*/
32+
public function checkCount(
33+
bool $must_be_within_same_album,
34+
bool $must_have_same_checksum,
35+
bool $must_have_same_title,
36+
): int {
37+
return $this->query($must_be_within_same_album, $must_have_same_checksum, $must_have_same_title)
38+
->count();
39+
}
40+
41+
/**
42+
* Return the list of duplicates candidate.
43+
*
44+
* @param bool $must_be_within_same_album Requires the duplicates to be in the same album
45+
* @param bool $must_have_same_checksum Requires the duplicates to have the same checksum
46+
* @param bool $must_have_same_title Requires the duplicates to have the same title
47+
*
48+
* @return Collection<int,object{album_id:string,album_title:string,photo_id:string,photo_title:string,checksum:string,short_path:string|null,storage_disk:string|null}>
49+
*/
50+
public function search(
51+
bool $must_be_within_same_album,
52+
bool $must_have_same_checksum,
53+
bool $must_have_same_title,
54+
): Collection {
55+
/** @var Collection<int,object{album_id:string,album_title:string,photo_id:string,photo_title:string,checksum:string,short_path:string|null,storage_disk:string|null}> */
56+
return $this->query($must_be_within_same_album, $must_have_same_checksum, $must_have_same_title)
57+
->get();
58+
}
59+
60+
/**
61+
* @param bool $must_be_within_same_album Requires the duplicates to be in the same album
62+
* @param bool $must_have_same_checksum Requires the duplicates to have the same checksum
63+
* @param bool $must_have_same_title Requires the duplicates to have the same title
64+
*
65+
* @return Builder
66+
*
67+
* @throws LycheeLogicException
68+
* @throws QueryBuilderException
69+
*/
70+
private function query(
71+
bool $must_be_within_same_album,
72+
bool $must_have_same_checksum,
73+
bool $must_have_same_title,
74+
): Builder {
75+
if (!$must_be_within_same_album && !$must_have_same_checksum && !$must_have_same_title) {
76+
throw new LycheeLogicException('At least one constraint must be enabled.');
77+
}
78+
79+
return Photo::query()
80+
->join('base_albums', 'base_albums.id', '=', 'photos.album_id')
81+
->join(
82+
'size_variants', 'size_variants.photo_id', '=', 'photos.id', 'left'
83+
)
84+
->whereIn('photos.id', $this->getDuplicatesIdsQuery($must_be_within_same_album, $must_have_same_checksum, $must_have_same_title))
85+
->where('size_variants.type', '=', 4)
86+
->select([
87+
'base_albums.id as album_id',
88+
'base_albums.title as album_title',
89+
'photos.id as photo_id',
90+
'photos.title as photo_title',
91+
'photos.created_at as photo_created_at',
92+
'photos.checksum',
93+
'size_variants.short_path as short_path',
94+
'size_variants.storage_disk as storage_disk',
95+
])
96+
->when($must_have_same_checksum, fn ($q) => $q->orderBy('photos.checksum', 'asc'))
97+
->when(!$must_have_same_checksum, fn ($q) => $q->orderBy('photos.title', 'asc'))
98+
->toBase();
99+
}
100+
101+
private function getDuplicatesIdsQuery(
102+
bool $must_be_within_same_album,
103+
bool $must_have_same_checksum,
104+
bool $must_have_same_title,
105+
): Builder {
106+
return DB::table('photos', 'p1')->select('p1.id')
107+
->join(
108+
'photos as p2',
109+
fn ($join) => $join->on('p1.id', '<>', 'p2.id')
110+
->when($must_have_same_title, fn ($q) => $q->on('p1.title', '=', 'p2.title'))
111+
->when($must_have_same_checksum, fn ($q) => $q->on('p1.checksum', '=', 'p2.checksum'))
112+
->when($must_be_within_same_album, fn ($q) => $q->on('p1.album_id', '=', 'p2.album_id'))
113+
);
114+
}
115+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<?php
2+
3+
/**
4+
* SPDX-License-Identifier: MIT
5+
* Copyright (c) 2017-2018 Tobias Reich
6+
* Copyright (c) 2018-2025 LycheeOrg.
7+
*/
8+
9+
namespace App\Http\Controllers\Admin\Maintenance;
10+
11+
use App\Actions\Photo\DuplicateFinder as PhotoDuplicateFinder;
12+
use App\Http\Requests\Maintenance\MaintenanceRequest;
13+
use App\Http\Requests\Maintenance\SearchDuplicateRequest;
14+
use App\Http\Resources\Models\Duplicates\Duplicate;
15+
use App\Http\Resources\Models\Duplicates\DuplicateCount;
16+
use Illuminate\Routing\Controller;
17+
use Illuminate\Support\Collection;
18+
19+
/**
20+
* Maybe the album tree is broken.
21+
* We fix it here.
22+
*/
23+
class DuplicateFinder extends Controller
24+
{
25+
/**
26+
* Get the number of duplicates.
27+
*
28+
* @return DuplicateCount
29+
*/
30+
public function check(MaintenanceRequest $request, PhotoDuplicateFinder $duplicateFinder): DuplicateCount
31+
{
32+
$pure_duplicates = $duplicateFinder->checkCount(must_be_within_same_album: false, must_have_same_checksum: true, must_have_same_title: false);
33+
$title_duplicates = $duplicateFinder->checkCount(must_be_within_same_album: true, must_have_same_checksum: false, must_have_same_title: true);
34+
$duplicates_with_album = $duplicateFinder->checkCount(must_be_within_same_album: true, must_have_same_checksum: true, must_have_same_title: false);
35+
36+
return new DuplicateCount(
37+
pure_duplicates: $pure_duplicates,
38+
title_duplicates: $title_duplicates,
39+
duplicates_within_album: $duplicates_with_album,
40+
);
41+
}
42+
43+
/**
44+
* Get the actual list of duplicates instead of just the counts.
45+
*
46+
* @param SearchDuplicateRequest $request
47+
* @param PhotoDuplicateFinder $duplicateFinder
48+
*
49+
* @return Collection<int,Duplicate>
50+
*/
51+
public function get(SearchDuplicateRequest $request, PhotoDuplicateFinder $duplicateFinder): Collection
52+
{
53+
return $duplicateFinder->search(
54+
must_be_within_same_album: $request->with_album_constraint, // false,
55+
must_have_same_checksum: $request->with_checksum_constraint, // true,
56+
must_have_same_title: $request->with_title_constraint, // false
57+
)->map(fn (object $model) => Duplicate::fromModel($model));
58+
}
59+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<?php
2+
3+
/**
4+
* SPDX-License-Identifier: MIT
5+
* Copyright (c) 2017-2018 Tobias Reich
6+
* Copyright (c) 2018-2025 LycheeOrg.
7+
*/
8+
9+
namespace App\Http\Requests\Maintenance;
10+
11+
use App\Http\Requests\BaseApiRequest;
12+
use App\Models\Configs;
13+
use App\Policies\SettingsPolicy;
14+
use App\Rules\BooleanRequireSupportRule;
15+
use Illuminate\Http\Request;
16+
use Illuminate\Support\Facades\Gate;
17+
18+
/**
19+
* @mixin Request
20+
*/
21+
class SearchDuplicateRequest extends BaseApiRequest
22+
{
23+
public bool $with_album_constraint;
24+
public bool $with_checksum_constraint;
25+
public bool $with_title_constraint;
26+
27+
/**
28+
* {@inheritDoc}
29+
*/
30+
public function authorize(): bool
31+
{
32+
return Gate::check(SettingsPolicy::CAN_EDIT, Configs::class);
33+
}
34+
35+
/**
36+
* {@inheritDoc}
37+
*/
38+
public function rules(): array
39+
{
40+
return [
41+
'with_album_constraint' => ['required', new BooleanRequireSupportRule(false, $this->verify)], // : false,
42+
'with_checksum_constraint' => ['required', new BooleanRequireSupportRule(true, $this->verify)], // : true,
43+
'with_title_constraint' => ['required', new BooleanRequireSupportRule(false, $this->verify)], // : false
44+
];
45+
}
46+
47+
/**
48+
* {@inheritDoc}
49+
*/
50+
protected function processValidatedValues(array $values, array $files): void
51+
{
52+
$this->with_album_constraint = static::toBoolean($values['with_album_constraint']);
53+
$this->with_checksum_constraint = static::toBoolean($values['with_checksum_constraint']);
54+
$this->with_title_constraint = static::toBoolean($values['with_title_constraint']);
55+
}
56+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
/**
4+
* SPDX-License-Identifier: MIT
5+
* Copyright (c) 2017-2018 Tobias Reich
6+
* Copyright (c) 2018-2025 LycheeOrg.
7+
*/
8+
9+
namespace App\Http\Resources\Models\Duplicates;
10+
11+
use App\Enum\SizeVariantType;
12+
use App\Models\Extensions\HasUrlGenerator;
13+
use Spatie\LaravelData\Data;
14+
use Spatie\TypeScriptTransformer\Attributes\TypeScript;
15+
16+
#[TypeScript()]
17+
class Duplicate extends Data
18+
{
19+
use HasUrlGenerator;
20+
21+
public function __construct(
22+
public string $album_id,
23+
public string $album_title,
24+
public string $photo_id,
25+
public string $photo_title,
26+
public string $checksum,
27+
public ?string $url,
28+
) {
29+
}
30+
31+
/**
32+
* @param object{album_id:string,album_title:string,photo_id:string,photo_title:string,checksum:string,short_path:string|null,storage_disk:string|null} $model
33+
*
34+
* @return Duplicate
35+
*/
36+
public static function fromModel(object $model): Duplicate
37+
{
38+
return new Duplicate(
39+
album_id: $model->album_id,
40+
album_title: $model->album_title,
41+
photo_id: $model->photo_id,
42+
photo_title: $model->photo_title,
43+
checksum: $model->checksum,
44+
url: $model->short_path === null ? null : self::pathToUrl($model->short_path, $model->storage_disk, SizeVariantType::SMALL),
45+
);
46+
}
47+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?php
2+
3+
/**
4+
* SPDX-License-Identifier: MIT
5+
* Copyright (c) 2017-2018 Tobias Reich
6+
* Copyright (c) 2018-2025 LycheeOrg.
7+
*/
8+
9+
namespace App\Http\Resources\Models\Duplicates;
10+
11+
use Spatie\LaravelData\Data;
12+
use Spatie\TypeScriptTransformer\Attributes\TypeScript;
13+
14+
#[TypeScript()]
15+
class DuplicateCount extends Data
16+
{
17+
public function __construct(
18+
public int $pure_duplicates,
19+
public int $title_duplicates,
20+
public int $duplicates_within_album,
21+
) {
22+
}
23+
}

app/Metadata/Cache/RouteCacheManager.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ public function __construct()
6969
'api/v2/Maintenance::missingFileSize' => false,
7070
'api/v2/Maintenance::tree' => false,
7171
'api/v2/Maintenance::update' => false,
72+
'api/v2/Maintenance::countDuplicates' => false,
73+
'api/v2/Maintenance::searchDuplicates' => false,
7274

7375
'api/v2/Map' => new RouteCacheConfig(tag: CacheTag::GALLERY, user_dependant: true, extra: [RequestAttribute::ALBUM_ID_ATTRIBUTE]),
7476
'api/v2/Map::provider' => new RouteCacheConfig(tag: CacheTag::SETTINGS),

0 commit comments

Comments
 (0)