btrfs: fix race between balance and cancel/pause
commit b19c98f237cd76981aaded52c258ce93f7daa8cb upstream. Syzbot reported a panic that looks like this: assertion failed: fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED, in fs/btrfs/ioctl.c:465 ------------[ cut here ]------------ kernel BUG at fs/btrfs/messages.c:259! RIP: 0010:btrfs_assertfail+0x2c/0x30 fs/btrfs/messages.c:259 Call Trace: <TASK> btrfs_exclop_balance fs/btrfs/ioctl.c:465 [inline] btrfs_ioctl_balance fs/btrfs/ioctl.c:3564 [inline] btrfs_ioctl+0x531e/0x5b30 fs/btrfs/ioctl.c:4632 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x197/0x210 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The reproducer is running a balance and a cancel or pause in parallel. The way balance finishes is a bit wonky, if we were paused we need to save the balance_ctl in the fs_info, but clear it otherwise and cleanup. However we rely on the return values being specific errors, or having a cancel request or no pause request. If balance completes and returns 0, but we have a pause or cancel request we won't do the appropriate cleanup, and then the next time we try to start a balance we'll trip this ASSERT. The error handling is just wrong here, we always want to clean up, unless we got -ECANCELLED and we set the appropriate pause flag in the exclusive op. With this patch the reproducer ran for an hour without tripping, previously it would trip in less than a few minutes. Reported-by: syzbot+c0f3acf145cb465426d5@syzkaller.appspotmail.com CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
c828e913c8
commit
ddf7e8984c
1 changed files with 4 additions and 10 deletions
|
|
@ -4092,14 +4092,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
|
|||
return has_single_bit_set(flags);
|
||||
}
|
||||
|
||||
static inline int balance_need_close(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/* cancel requested || normal exit path */
|
||||
return atomic_read(&fs_info->balance_cancel_req) ||
|
||||
(atomic_read(&fs_info->balance_pause_req) == 0 &&
|
||||
atomic_read(&fs_info->balance_cancel_req) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate target profile against allowed profiles and return true if it's OK.
|
||||
* Otherwise print the error message and return false.
|
||||
|
|
@ -4289,6 +4281,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
|
|||
u64 num_devices;
|
||||
unsigned seq;
|
||||
bool reducing_redundancy;
|
||||
bool paused = false;
|
||||
int i;
|
||||
|
||||
if (btrfs_fs_closing(fs_info) ||
|
||||
|
|
@ -4419,6 +4412,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
|
|||
if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
|
||||
btrfs_info(fs_info, "balance: paused");
|
||||
btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
|
||||
paused = true;
|
||||
}
|
||||
/*
|
||||
* Balance can be canceled by:
|
||||
|
|
@ -4447,8 +4441,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
|
|||
btrfs_update_ioctl_balance_args(fs_info, bargs);
|
||||
}
|
||||
|
||||
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
|
||||
balance_need_close(fs_info)) {
|
||||
/* We didn't pause, we can clean everything up. */
|
||||
if (!paused) {
|
||||
reset_balance_state(fs_info);
|
||||
btrfs_exclop_finish(fs_info);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue