Skip to content

Commit 0dc241c

Browse files
authored
Fix shared drive failures caused by multiple share-interact permission docs being created for the same sharing (#4760)
## Summary The issue could happen when drive sharing invitations were prepared concurrently: several members could trigger interact-code creation at the same time, and CouchDB generated different permission document IDs. Later reads could pick a partial permission doc, causing missing drive tokens and errors like “member was not found” ## Changes - Use a canonical deterministic ID for `share-interact` permission docs. - Repair existing duplicate `share-interact` permission docs by merging codes into the canonical doc and deleting legacy duplicates. - Make `GetForShareInteract` repair duplicates before returning permissions. - Serialize `GetInteractCode` with the existing instance lock to avoid local concurrent writes for the same sharing.
2 parents 1bddc42 + 9b1bb9c commit 0dc241c

6 files changed

Lines changed: 996 additions & 28 deletions

File tree

model/permission/permissions.go

Lines changed: 260 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,24 @@
33
package permission
44

55
import (
6+
"context"
67
"encoding/json"
78
"fmt"
89
"net/http"
10+
"slices"
911
"strings"
1012
"time"
1113

1214
build "github.com/cozy/cozy-stack/pkg/config"
15+
"github.com/cozy/cozy-stack/pkg/config/config"
1316
"github.com/cozy/cozy-stack/pkg/consts"
1417
"github.com/cozy/cozy-stack/pkg/couchdb"
1518
"github.com/cozy/cozy-stack/pkg/couchdb/mango"
1619
"github.com/cozy/cozy-stack/pkg/crypto"
20+
"github.com/cozy/cozy-stack/pkg/logger"
1721
"github.com/cozy/cozy-stack/pkg/metadata"
1822
"github.com/cozy/cozy-stack/pkg/prefixer"
23+
"github.com/cozy/cozy-stack/pkg/utils"
1924
"github.com/labstack/echo/v4"
2025
)
2126

@@ -242,9 +247,206 @@ func GetForSharePreview(db prefixer.Prefixer, sharingID string) (*Permission, er
242247
}
243248

244249
// GetForShareInteract retrieves the Permission doc for a given sharing to
245-
// read/write a note
250+
// read/write a note. It may repair legacy duplicate share-interact permission
251+
// docs as part of the read by creating/updating the canonical permission doc
252+
// and deleting duplicate legacy docs.
246253
func GetForShareInteract(db prefixer.Prefixer, sharingID string) (*Permission, error) {
247-
return getFromSource(db, TypeShareInteract, consts.Sharings, sharingID)
254+
return getOrRepairShareInteractPermissions(db, sharingID)
255+
}
256+
257+
// ShareInteractPermissionID returns the canonical permission document ID for a
258+
// share-interact permission set.
259+
func ShareInteractPermissionID(sharingID string) string {
260+
return TypeShareInteract + "-" + sharingID
261+
}
262+
263+
func shareInteractLockName(sharingID string) string {
264+
return "permissions/share-interact/" + sharingID
265+
}
266+
267+
func getShareInteractPermissions(db prefixer.Prefixer, sharingID string) ([]Permission, error) {
268+
var res []Permission
269+
req := couchdb.FindRequest{
270+
UseIndex: "by-source-and-type",
271+
Selector: mango.And(
272+
mango.Equal("type", TypeShareInteract),
273+
mango.Equal("source_id", consts.Sharings+"/"+sharingID),
274+
),
275+
Limit: 1000,
276+
}
277+
err := couchdb.FindDocs(db, consts.Permissions, &req, &res)
278+
if err != nil {
279+
// With a cluster of couchdb, we can have a race condition where we
280+
// query an index before it has been updated for a doc that has just
281+
// been created. Keep the same fallback as getFromSource.
282+
time.Sleep(1 * time.Second)
283+
err = couchdb.FindDocs(db, consts.Permissions, &req, &res)
284+
if err != nil {
285+
return nil, err
286+
}
287+
}
288+
return res, nil
289+
}
290+
291+
func getOrRepairShareInteractPermissions(db prefixer.Prefixer, sharingID string) (*Permission, error) {
292+
perms, err := getShareInteractPermissions(db, sharingID)
293+
if err != nil {
294+
return nil, err
295+
}
296+
if canonical, needsRepair, err := shareInteractRepairState(perms, sharingID); err != nil || !needsRepair {
297+
return canonical, err
298+
}
299+
300+
return repairShareInteractPermissionsWithLock(db, sharingID)
301+
}
302+
303+
func repairShareInteractPermissionsWithLock(db prefixer.Prefixer, sharingID string) (*Permission, error) {
304+
mu := config.Lock().ReadWrite(db, shareInteractLockName(sharingID))
305+
if err := mu.Lock(); err != nil {
306+
return nil, err
307+
}
308+
defer mu.Unlock()
309+
310+
return utils.RetryWithBackoffValue(context.Background(), shareInteractRetryOptions(), func() (*Permission, error) {
311+
return getOrRepairShareInteractPermissionsOnce(db, sharingID)
312+
})
313+
}
314+
315+
// getOrRepairShareInteractPermissionsOnce performs a single read+repair pass.
316+
// The caller must hold the share-interact write lock for sharingID.
317+
func getOrRepairShareInteractPermissionsOnce(db prefixer.Prefixer, sharingID string) (*Permission, error) {
318+
perms, err := getShareInteractPermissions(db, sharingID)
319+
if err != nil {
320+
return nil, err
321+
}
322+
if canonical, needsRepair, err := shareInteractRepairState(perms, sharingID); err != nil || !needsRepair {
323+
return canonical, err
324+
}
325+
326+
return repairShareInteractPermissions(db, sharingID, perms)
327+
}
328+
329+
func shareInteractRetryOptions() utils.RetryOptions {
330+
return utils.RetryOptions{
331+
Attempts: 5,
332+
Delay: 10 * time.Millisecond,
333+
MaxDelay: 100 * time.Millisecond,
334+
JitterFactor: 0.25,
335+
ShouldRetry: func(err error) bool {
336+
return couchdb.IsConflictError(err) || couchdb.IsFileExists(err)
337+
},
338+
}
339+
}
340+
341+
func shareInteractRepairState(perms []Permission, sharingID string) (*Permission, bool, error) {
342+
if len(perms) == 0 {
343+
return nil, false, &couchdb.Error{
344+
StatusCode: http.StatusNotFound,
345+
Name: "not_found",
346+
Reason: fmt.Sprintf("no permission doc for %v", sharingID),
347+
}
348+
}
349+
350+
canonicalID := ShareInteractPermissionID(sharingID)
351+
var canonical *Permission
352+
usable := 0
353+
hasDuplicate := false
354+
for i := range perms {
355+
p := &perms[i]
356+
isCanonical := p.ID() == canonicalID
357+
if !isCanonical {
358+
hasDuplicate = true
359+
}
360+
if p.Expired() {
361+
continue
362+
}
363+
usable++
364+
if isCanonical {
365+
canonical = p
366+
}
367+
}
368+
if usable == 0 {
369+
return nil, false, ErrExpiredToken
370+
}
371+
if usable == 1 && canonical != nil && !hasDuplicate {
372+
return canonical, false, nil
373+
}
374+
return nil, true, nil
375+
}
376+
377+
// repairShareInteractPermissions merges usable share-interact permission docs
378+
// into the canonical doc and deletes non-canonical duplicates. The caller must
379+
// hold the share-interact write lock for sharingID.
380+
func repairShareInteractPermissions(db prefixer.Prefixer, sharingID string, perms []Permission) (*Permission, error) {
381+
canonicalID := ShareInteractPermissionID(sharingID)
382+
merged := &Permission{
383+
PID: canonicalID,
384+
Type: TypeShareInteract,
385+
SourceID: consts.Sharings + "/" + sharingID,
386+
Codes: make(map[string]string),
387+
}
388+
hasUsablePermission := false
389+
hasCanonical := false
390+
duplicates := make([]*Permission, 0, len(perms))
391+
for i := range perms {
392+
p := &perms[i]
393+
if p.ID() == canonicalID {
394+
hasCanonical = true
395+
// Keep the canonical doc revision and then overwrite the document with content rebuilt from non-expired docs.
396+
merged.SetRev(p.Rev())
397+
} else {
398+
duplicates = append(duplicates, p)
399+
}
400+
if p.Expired() {
401+
continue
402+
}
403+
404+
hasUsablePermission = true
405+
406+
if len(merged.Permissions) == 0 && len(p.Permissions) > 0 {
407+
merged.Permissions = slices.Clone(p.Permissions)
408+
}
409+
if merged.Metadata == nil && p.Metadata != nil {
410+
merged.Metadata = p.Metadata.Clone()
411+
}
412+
if merged.ExpiresAt == nil && p.ExpiresAt != nil {
413+
merged.ExpiresAt = p.ExpiresAt
414+
}
415+
416+
for key, code := range p.Codes {
417+
if key == "" {
418+
continue
419+
}
420+
if existing, ok := merged.Codes[key]; ok && existing != code {
421+
logger.WithDomain(db.DomainName()).WithNamespace("permissions").
422+
Warnf("conflicting share-interact code for %s in sharing %s", key, sharingID)
423+
continue
424+
}
425+
merged.Codes[key] = code
426+
}
427+
}
428+
if !hasUsablePermission {
429+
return nil, ErrExpiredToken
430+
}
431+
432+
if !hasCanonical {
433+
if err := couchdb.CreateNamedDoc(db, merged); err != nil {
434+
return nil, err
435+
}
436+
} else if err := couchdb.UpdateDoc(db, merged); err != nil {
437+
return nil, err
438+
}
439+
440+
for _, p := range duplicates {
441+
if err := couchdb.DeleteDoc(db, p); err != nil {
442+
if couchdb.IsNotFoundError(err) {
443+
continue
444+
}
445+
return nil, err
446+
}
447+
}
448+
449+
return merged, nil
248450
}
249451

250452
func getFromSource(db prefixer.Prefixer, permType, docType, slug string) (*Permission, error) {
@@ -605,21 +807,72 @@ func CreateSharePreviewSet(db prefixer.Prefixer, sharingID string, codes, shortc
605807
return doc, nil
606808
}
607809

608-
// CreateShareInteractSet creates a Permission doc for reading/writing a note
609-
// inside a sharing
810+
// CreateShareInteractSet creates or updates the Permission doc for reading and
811+
// writing a note inside a sharing. When subdoc.Permissions is not empty, it
812+
// replaces the existing permission rules with that full set.
610813
func CreateShareInteractSet(db prefixer.Prefixer, sharingID string, codes map[string]string, subdoc Permission) (*Permission, error) {
611814
doc := &Permission{
815+
PID: ShareInteractPermissionID(sharingID),
612816
Type: TypeShareInteract,
613817
Permissions: subdoc.Permissions,
614818
Codes: codes,
615819
SourceID: consts.Sharings + "/" + sharingID,
616820
Metadata: subdoc.Metadata,
617821
}
618-
err := couchdb.CreateDoc(db, doc)
619-
if err != nil {
822+
823+
mu := config.Lock().ReadWrite(db, shareInteractLockName(sharingID))
824+
if err := mu.Lock(); err != nil {
620825
return nil, err
621826
}
622-
return doc, nil
827+
defer mu.Unlock()
828+
829+
return utils.RetryWithBackoffValue(context.Background(), shareInteractRetryOptions(), func() (*Permission, error) {
830+
existing, err := GetPermissionByIDIncludingExpired(db, ShareInteractPermissionID(sharingID))
831+
if err != nil {
832+
if !couchdb.IsNotFoundError(err) {
833+
return nil, err
834+
}
835+
err = couchdb.CreateNamedDoc(db, doc)
836+
if err == nil {
837+
return doc, nil
838+
}
839+
return nil, err
840+
}
841+
842+
merged := existing.Clone().(*Permission)
843+
merged.Type = TypeShareInteract
844+
merged.SourceID = consts.Sharings + "/" + sharingID
845+
merged.SetID(ShareInteractPermissionID(sharingID))
846+
merged.ExpiresAt = nil
847+
if len(subdoc.Permissions) > 0 {
848+
// Callers pass the complete interact rule set, so replace instead of merging rules.
849+
merged.Permissions = subdoc.Permissions
850+
}
851+
if merged.Metadata == nil && subdoc.Metadata != nil {
852+
merged.Metadata = subdoc.Metadata.Clone()
853+
}
854+
if merged.Metadata != nil {
855+
merged.Metadata.ChangeUpdatedAt()
856+
}
857+
if merged.Codes == nil {
858+
merged.Codes = make(map[string]string)
859+
}
860+
for key, code := range codes {
861+
if key == "" {
862+
continue
863+
}
864+
if existingCode, ok := merged.Codes[key]; ok && existingCode != code {
865+
logger.WithDomain(db.DomainName()).WithNamespace("permissions").
866+
Warnf("keeping existing share-interact code for %s in sharing %s", key, sharingID)
867+
continue
868+
}
869+
merged.Codes[key] = code
870+
}
871+
if err := couchdb.UpdateDoc(db, merged); err != nil {
872+
return nil, err
873+
}
874+
return merged, nil
875+
})
623876
}
624877

625878
// ForceWebapp creates or updates a Permission doc for a given webapp

0 commit comments

Comments
 (0)