Skip to content

Commit 33307fa

Browse files
committed
fuse: Add DLM retry workaround for iomap write failures
When FUSE server returns -EAGAIN during iomap write operations due to DLM lock contention, the write fails with an IO error. This happens because: 1. Page invalidation holds DLM lock and needs folio lock 2. iomap write path holds folio lock and calls fuse_iomap_read_folio_range() 3. FUSE gets -EAGAIN from server (cannot acquire DLM lock - would deadlock) 4. fuse_do_readfolio() converts -EAGAIN to AOP_TRUNCATED_PAGE and unlocks folio (This prevents the deadlock by releasing the folio lock) 5. However, iomap doesn't understand AOP_TRUNCATED_PAGE and treats it as error 6. Result: Write fails with IO error, even though it's just temporary contention This is a FUSE-only workaround until mainline iomap gains AOP_TRUNCATED_PAGE retry support. The solution: 1. Stack-allocate retry state in fuse_cache_write_iter() 2. Register it in fuse_conn xarray before calling iomap (indexed by task pointer) 3. When fuse_iomap_read_folio_range() sees AOP_TRUNCATED_PAGE: - Mark the retry flag in the registered state - Convert to -EAGAIN for iomap 4. After iomap returns, check the retry flag 5. If set, retry the entire write operation 6. Remove from xarray when done (or keep for next retry iteration) This allows writes to succeed by retrying after the DLM lock contention clears, rather than failing with IO error. This uses a single per connection xa_array under the assumption that there won't be too many parallel writers. Signed-off-by: Bernd Schubert <bernd@bsbernd.com>
1 parent 93cf419 commit 33307fa

3 files changed

Lines changed: 102 additions & 1 deletion

File tree

fs/fuse/file.c

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1001,8 +1001,45 @@ static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
10011001
size_t len)
10021002
{
10031003
struct file *file = iter->private;
1004+
struct inode *inode = file_inode(file);
1005+
struct fuse_conn *fc = get_fuse_conn(inode);
10041006
size_t off = offset_in_folio(folio, pos);
1005-
return fuse_do_readfolio(file, folio, off, len);
1007+
int ret;
1008+
ret = fuse_do_readfolio(file, folio, off, len);
1009+
1010+
/*
1011+
* TEMPORARY WORKAROUND for iomap write deadlock:
1012+
*
1013+
* When FUSE server returns -EAGAIN (DLM lock contention),
1014+
* fuse_do_readfolio() converts it to AOP_TRUNCATED_PAGE and
1015+
* unlocks the folio (per AOP_TRUNCATED_PAGE contract).
1016+
*
1017+
* However, iomap doesn't understand AOP_TRUNCATED_PAGE.
1018+
* We need to:
1019+
* 1. Mark the retry flag (caller stored it in xarray)
1020+
* 2. Convert to -EAGAIN so iomap sees an error
1021+
* 3. Let fuse_cache_write_iter() detect and retry
1022+
*
1023+
* This breaks the ABBA deadlock:
1024+
* - Folio is unlocked (page invalidation can proceed)
1025+
* - Write will be retried at higher level
1026+
*
1027+
* Remove this when mainline iomap gains AOP_TRUNCATED_PAGE support.
1028+
*/
1029+
if (ret == AOP_TRUNCATED_PAGE) {
1030+
struct fuse_dlm_retry *retry;
1031+
unsigned long task_key = (unsigned long)current;
1032+
1033+
retry = xa_load(&fc->dlm_retry_tasks, task_key);
1034+
if (retry) {
1035+
retry->retry_needed = true;
1036+
}
1037+
1038+
/* Convert to -EAGAIN for iomap */
1039+
ret = -EAGAIN;
1040+
}
1041+
1042+
return ret;
10061043
}
10071044

10081045
static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
@@ -1604,6 +1641,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
16041641

16051642
inode_lock(inode);
16061643

1644+
retry:
16071645
err = count = generic_write_checks(iocb, from);
16081646
if (err <= 0)
16091647
goto out;
@@ -1621,6 +1659,30 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
16211659
written = direct_write_fallback(iocb, from, written,
16221660
fuse_perform_write(iocb, from));
16231661
} else if (writeback) {
1662+
/*
1663+
* TEMPORARY WORKAROUND for iomap write deadlock:
1664+
*
1665+
* Stack-allocate retry state and register it before calling
1666+
* iomap. If fuse_iomap_read_folio_range() encounters
1667+
* AOP_TRUNCATED_PAGE, it will mark retry_needed.
1668+
*
1669+
* Stack allocation ensures no memory leaks - the state is
1670+
* valid for the duration of this function call and is
1671+
* automatically cleaned up.
1672+
*/
1673+
struct fuse_dlm_retry retry_state = {
1674+
.retry_needed = false,
1675+
};
1676+
unsigned long task_key = (unsigned long)current;
1677+
int xa_ret;
1678+
1679+
xa_ret = xa_err(xa_store(&fc->dlm_retry_tasks, task_key,
1680+
&retry_state, GFP_KERNEL));
1681+
if (xa_ret) {
1682+
err = xa_ret;
1683+
goto out;
1684+
}
1685+
16241686
/*
16251687
* Use iomap so that we can do granular uptodate reads
16261688
* and granular dirty tracking for large folios.
@@ -1629,6 +1691,26 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
16291691
&fuse_iomap_ops,
16301692
&fuse_iomap_write_ops,
16311693
file);
1694+
1695+
/*
1696+
* If DLM lock contention occurred (AOP_TRUNCATED_PAGE),
1697+
* retry the entire write operation.
1698+
*
1699+
* The folio has been unlocked by fuse_do_readfolio(),
1700+
* breaking the ABBA deadlock with page invalidation.
1701+
*
1702+
* Keep the entry in xarray and reuse it for the retry.
1703+
*
1704+
* Remove this when mainline iomap gains AOP_TRUNCATED_PAGE
1705+
* retry support.
1706+
*/
1707+
if (retry_state.retry_needed) {
1708+
retry_state.retry_needed = false;
1709+
goto retry;
1710+
}
1711+
1712+
/* Remove from xarray now that we're done */
1713+
xa_erase(&fc->dlm_retry_tasks, task_key);
16321714
} else {
16331715
written = fuse_perform_write(iocb, from);
16341716
}

fs/fuse/fuse_i.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,17 @@ struct fuse_sync_bucket {
639639
struct rcu_head rcu;
640640
};
641641

642+
/**
643+
* DLM retry tracking for iomap write deadlock workaround.
644+
*
645+
* Temporary workaround until mainline iomap gains AOP_TRUNCATED_PAGE
646+
* retry support. Tracks tasks that need to retry write operations due
647+
* to DLM lock contention (-EAGAIN from FUSE server).
648+
*/
649+
struct fuse_dlm_retry {
650+
bool retry_needed;
651+
};
652+
642653
/**
643654
* A Fuse connection.
644655
*
@@ -1022,6 +1033,12 @@ struct fuse_conn {
10221033
/* The foffset alignment in PAGE */
10231034
unsigned int alignment_pages;
10241035

1036+
/**
1037+
* XArray tracking tasks that need DLM retry.
1038+
* Maps task pointer -> struct fuse_dlm_retry.
1039+
* Temporary workaround for iomap write deadlock.
1040+
*/
1041+
struct xarray dlm_retry_tasks;
10251042
};
10261043

10271044
/*

fs/fuse/inode.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
10561056
fc->initialized = 0;
10571057
fc->connected = 1;
10581058
fc->dlm = 1;
1059+
xa_init(&fc->dlm_retry_tasks);
10591060

10601061
/* module option for now */
10611062
fc->compound_open_getattr = enable_compound;
@@ -1109,6 +1110,7 @@ void fuse_conn_put(struct fuse_conn *fc)
11091110
}
11101111
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
11111112
fuse_backing_files_free(fc);
1113+
xa_destroy(&fc->dlm_retry_tasks);
11121114
call_rcu(&fc->rcu, delayed_release);
11131115
}
11141116
}

0 commit comments

Comments
 (0)