Skip to content
This repository was archived by the owner on Jan 25, 2023. It is now read-only.

Commit c3a0958

Browse files
jankaravramyanaidu
authored andcommitted
blktrace: Protect q->blk_trace with RCU
KASAN is reporting that __blk_add_trace() has a use-after-free issue when accessing q->blk_trace. Indeed the switching of block tracing (and thus eventual freeing of q->blk_trace) is completely unsynchronized with the currently running tracing and thus it can happen that the blk_trace structure is being freed just while __blk_add_trace() works on it. Protect accesses to q->blk_trace by RCU during tracing and make sure we wait for the end of RCU grace period when shutting down tracing. Luckily that is rare enough event that we can afford that. Note that postponing the freeing of blk_trace to an RCU callback should better be avoided as it could have unexpected user visible side-effects as debugfs files would be still existing for a short while block tracing has been shut down. Change-Id: I88e522f9fd93427041fa0f01621593bc481ba909 Tracked-On: PKT-3200 Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711 CC: stable@vger.kernel.org Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> Reviewed-by: Ming Lei <ming.lei@redhat.com> Tested-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Reported-by: Tristan Madani <tristmd@gmail.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 12b8452 commit c3a0958

3 files changed

Lines changed: 97 additions & 37 deletions

File tree

include/linux/blkdev.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ struct request_queue {
624624
unsigned int sg_reserved_size;
625625
int node;
626626
#ifdef CONFIG_BLK_DEV_IO_TRACE
627-
struct blk_trace *blk_trace;
627+
struct blk_trace __rcu *blk_trace;
628628
struct mutex blk_trace_mutex;
629629
#endif
630630
/*

include/linux/blktrace_api.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,28 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
5151
**/
5252
#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \
5353
do { \
54-
struct blk_trace *bt = (q)->blk_trace; \
54+
struct blk_trace *bt; \
55+
\
56+
rcu_read_lock(); \
57+
bt = rcu_dereference((q)->blk_trace); \
5558
if (unlikely(bt)) \
5659
__trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
60+
rcu_read_unlock(); \
5761
} while (0)
5862
#define blk_add_trace_msg(q, fmt, ...) \
5963
blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
6064
#define BLK_TN_MAX_MSG 128
6165

6266
static inline bool blk_trace_note_message_enabled(struct request_queue *q)
6367
{
64-
struct blk_trace *bt = q->blk_trace;
65-
if (likely(!bt))
66-
return false;
67-
return bt->act_mask & BLK_TC_NOTIFY;
68+
struct blk_trace *bt;
69+
bool ret;
70+
71+
rcu_read_lock();
72+
bt = rcu_dereference(q->blk_trace);
73+
ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
74+
rcu_read_unlock();
75+
return ret;
6876
}
6977

7078
extern void blk_add_driver_data(struct request_queue *q, struct request *rq,

kernel/trace/blktrace.c

Lines changed: 83 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ static void put_probe_ref(void)
336336

337337
static void blk_trace_cleanup(struct blk_trace *bt)
338338
{
339+
synchronize_rcu();
339340
blk_trace_free(bt);
340341
put_probe_ref();
341342
}
@@ -636,8 +637,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
636637
static int __blk_trace_startstop(struct request_queue *q, int start)
637638
{
638639
int ret;
639-
struct blk_trace *bt = q->blk_trace;
640+
struct blk_trace *bt;
640641

642+
bt = rcu_dereference_protected(q->blk_trace,
643+
lockdep_is_held(&q->blk_trace_mutex));
641644
if (bt == NULL)
642645
return -EINVAL;
643646

@@ -746,8 +749,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
746749
void blk_trace_shutdown(struct request_queue *q)
747750
{
748751
mutex_lock(&q->blk_trace_mutex);
749-
750-
if (q->blk_trace) {
752+
if (rcu_dereference_protected(q->blk_trace,
753+
lockdep_is_held(&q->blk_trace_mutex))) {
751754
__blk_trace_startstop(q, 0);
752755
__blk_trace_remove(q);
753756
}
@@ -759,8 +762,10 @@ void blk_trace_shutdown(struct request_queue *q)
759762
static union kernfs_node_id *
760763
blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
761764
{
762-
struct blk_trace *bt = q->blk_trace;
765+
struct blk_trace *bt;
763766

767+
/* We don't use the 'bt' value here except as an optimization... */
768+
bt = rcu_dereference_protected(q->blk_trace, 1);
764769
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
765770
return NULL;
766771

@@ -805,10 +810,14 @@ static void blk_add_trace_rq(struct request *rq, int error,
805810
unsigned int nr_bytes, u32 what,
806811
union kernfs_node_id *cgid)
807812
{
808-
struct blk_trace *bt = rq->q->blk_trace;
813+
struct blk_trace *bt;
809814

810-
if (likely(!bt))
815+
rcu_read_lock();
816+
bt = rcu_dereference(rq->q->blk_trace);
817+
if (likely(!bt)) {
818+
rcu_read_unlock();
811819
return;
820+
}
812821

813822
if (blk_rq_is_passthrough(rq))
814823
what |= BLK_TC_ACT(BLK_TC_PC);
@@ -817,6 +826,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
817826

818827
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
819828
rq->cmd_flags, what, error, 0, NULL, cgid);
829+
rcu_read_unlock();
820830
}
821831

822832
static void blk_add_trace_rq_insert(void *ignore,
@@ -862,14 +872,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
862872
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
863873
u32 what, int error)
864874
{
865-
struct blk_trace *bt = q->blk_trace;
875+
struct blk_trace *bt;
866876

867-
if (likely(!bt))
877+
rcu_read_lock();
878+
bt = rcu_dereference(q->blk_trace);
879+
if (likely(!bt)) {
880+
rcu_read_unlock();
868881
return;
882+
}
869883

870884
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
871885
bio_op(bio), bio->bi_opf, what, error, 0, NULL,
872886
blk_trace_bio_get_cgid(q, bio));
887+
rcu_read_unlock();
873888
}
874889

875890
static void blk_add_trace_bio_bounce(void *ignore,
@@ -914,11 +929,14 @@ static void blk_add_trace_getrq(void *ignore,
914929
if (bio)
915930
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
916931
else {
917-
struct blk_trace *bt = q->blk_trace;
932+
struct blk_trace *bt;
918933

934+
rcu_read_lock();
935+
bt = rcu_dereference(q->blk_trace);
919936
if (bt)
920937
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
921938
NULL, NULL);
939+
rcu_read_unlock();
922940
}
923941
}
924942

@@ -930,27 +948,35 @@ static void blk_add_trace_sleeprq(void *ignore,
930948
if (bio)
931949
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
932950
else {
933-
struct blk_trace *bt = q->blk_trace;
951+
struct blk_trace *bt;
934952

953+
rcu_read_lock();
954+
bt = rcu_dereference(q->blk_trace);
935955
if (bt)
936956
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
937957
0, 0, NULL, NULL);
958+
rcu_read_unlock();
938959
}
939960
}
940961

941962
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
942963
{
943-
struct blk_trace *bt = q->blk_trace;
964+
struct blk_trace *bt;
944965

966+
rcu_read_lock();
967+
bt = rcu_dereference(q->blk_trace);
945968
if (bt)
946969
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL);
970+
rcu_read_unlock();
947971
}
948972

949973
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
950974
unsigned int depth, bool explicit)
951975
{
952-
struct blk_trace *bt = q->blk_trace;
976+
struct blk_trace *bt;
953977

978+
rcu_read_lock();
979+
bt = rcu_dereference(q->blk_trace);
954980
if (bt) {
955981
__be64 rpdu = cpu_to_be64(depth);
956982
u32 what;
@@ -962,14 +988,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
962988

963989
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL);
964990
}
991+
rcu_read_unlock();
965992
}
966993

967994
static void blk_add_trace_split(void *ignore,
968995
struct request_queue *q, struct bio *bio,
969996
unsigned int pdu)
970997
{
971-
struct blk_trace *bt = q->blk_trace;
998+
struct blk_trace *bt;
972999

1000+
rcu_read_lock();
1001+
bt = rcu_dereference(q->blk_trace);
9731002
if (bt) {
9741003
__be64 rpdu = cpu_to_be64(pdu);
9751004

@@ -978,6 +1007,7 @@ static void blk_add_trace_split(void *ignore,
9781007
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
9791008
&rpdu, blk_trace_bio_get_cgid(q, bio));
9801009
}
1010+
rcu_read_unlock();
9811011
}
9821012

9831013
/**
@@ -997,11 +1027,15 @@ static void blk_add_trace_bio_remap(void *ignore,
9971027
struct request_queue *q, struct bio *bio,
9981028
dev_t dev, sector_t from)
9991029
{
1000-
struct blk_trace *bt = q->blk_trace;
1030+
struct blk_trace *bt;
10011031
struct blk_io_trace_remap r;
10021032

1003-
if (likely(!bt))
1033+
rcu_read_lock();
1034+
bt = rcu_dereference(q->blk_trace);
1035+
if (likely(!bt)) {
1036+
rcu_read_unlock();
10041037
return;
1038+
}
10051039

10061040
r.device_from = cpu_to_be32(dev);
10071041
r.device_to = cpu_to_be32(bio_dev(bio));
@@ -1010,6 +1044,7 @@ static void blk_add_trace_bio_remap(void *ignore,
10101044
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
10111045
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
10121046
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
1047+
rcu_read_unlock();
10131048
}
10141049

10151050
/**
@@ -1030,11 +1065,15 @@ static void blk_add_trace_rq_remap(void *ignore,
10301065
struct request *rq, dev_t dev,
10311066
sector_t from)
10321067
{
1033-
struct blk_trace *bt = q->blk_trace;
1068+
struct blk_trace *bt;
10341069
struct blk_io_trace_remap r;
10351070

1036-
if (likely(!bt))
1071+
rcu_read_lock();
1072+
bt = rcu_dereference(q->blk_trace);
1073+
if (likely(!bt)) {
1074+
rcu_read_unlock();
10371075
return;
1076+
}
10381077

10391078
r.device_from = cpu_to_be32(dev);
10401079
r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
@@ -1043,6 +1082,7 @@ static void blk_add_trace_rq_remap(void *ignore,
10431082
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
10441083
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
10451084
sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
1085+
rcu_read_unlock();
10461086
}
10471087

10481088
/**
@@ -1060,14 +1100,19 @@ void blk_add_driver_data(struct request_queue *q,
10601100
struct request *rq,
10611101
void *data, size_t len)
10621102
{
1063-
struct blk_trace *bt = q->blk_trace;
1103+
struct blk_trace *bt;
10641104

1065-
if (likely(!bt))
1105+
rcu_read_lock();
1106+
bt = rcu_dereference(q->blk_trace);
1107+
if (likely(!bt)) {
1108+
rcu_read_unlock();
10661109
return;
1110+
}
10671111

10681112
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
10691113
BLK_TA_DRV_DATA, 0, len, data,
10701114
blk_trace_request_get_cgid(q, rq));
1115+
rcu_read_unlock();
10711116
}
10721117
EXPORT_SYMBOL_GPL(blk_add_driver_data);
10731118

@@ -1594,6 +1639,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
15941639
return -EINVAL;
15951640

15961641
put_probe_ref();
1642+
synchronize_rcu();
15971643
blk_trace_free(bt);
15981644
return 0;
15991645
}
@@ -1755,6 +1801,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
17551801
struct hd_struct *p = dev_to_part(dev);
17561802
struct request_queue *q;
17571803
struct block_device *bdev;
1804+
struct blk_trace *bt;
17581805
ssize_t ret = -ENXIO;
17591806

17601807
bdev = bdget(part_devt(p));
@@ -1767,21 +1814,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
17671814

17681815
mutex_lock(&q->blk_trace_mutex);
17691816

1817+
bt = rcu_dereference_protected(q->blk_trace,
1818+
lockdep_is_held(&q->blk_trace_mutex));
17701819
if (attr == &dev_attr_enable) {
1771-
ret = sprintf(buf, "%u\n", !!q->blk_trace);
1820+
ret = sprintf(buf, "%u\n", !!bt);
17721821
goto out_unlock_bdev;
17731822
}
17741823

1775-
if (q->blk_trace == NULL)
1824+
if (bt == NULL)
17761825
ret = sprintf(buf, "disabled\n");
17771826
else if (attr == &dev_attr_act_mask)
1778-
ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1827+
ret = blk_trace_mask2str(buf, bt->act_mask);
17791828
else if (attr == &dev_attr_pid)
1780-
ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1829+
ret = sprintf(buf, "%u\n", bt->pid);
17811830
else if (attr == &dev_attr_start_lba)
1782-
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1831+
ret = sprintf(buf, "%llu\n", bt->start_lba);
17831832
else if (attr == &dev_attr_end_lba)
1784-
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1833+
ret = sprintf(buf, "%llu\n", bt->end_lba);
17851834

17861835
out_unlock_bdev:
17871836
mutex_unlock(&q->blk_trace_mutex);
@@ -1798,6 +1847,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
17981847
struct block_device *bdev;
17991848
struct request_queue *q;
18001849
struct hd_struct *p;
1850+
struct blk_trace *bt;
18011851
u64 value;
18021852
ssize_t ret = -EINVAL;
18031853

@@ -1828,8 +1878,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
18281878

18291879
mutex_lock(&q->blk_trace_mutex);
18301880

1881+
bt = rcu_dereference_protected(q->blk_trace,
1882+
lockdep_is_held(&q->blk_trace_mutex));
18311883
if (attr == &dev_attr_enable) {
1832-
if (!!value == !!q->blk_trace) {
1884+
if (!!value == !!bt) {
18331885
ret = 0;
18341886
goto out_unlock_bdev;
18351887
}
@@ -1841,18 +1893,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
18411893
}
18421894

18431895
ret = 0;
1844-
if (q->blk_trace == NULL)
1896+
if (bt == NULL)
18451897
ret = blk_trace_setup_queue(q, bdev);
18461898

18471899
if (ret == 0) {
18481900
if (attr == &dev_attr_act_mask)
1849-
q->blk_trace->act_mask = value;
1901+
bt->act_mask = value;
18501902
else if (attr == &dev_attr_pid)
1851-
q->blk_trace->pid = value;
1903+
bt->pid = value;
18521904
else if (attr == &dev_attr_start_lba)
1853-
q->blk_trace->start_lba = value;
1905+
bt->start_lba = value;
18541906
else if (attr == &dev_attr_end_lba)
1855-
q->blk_trace->end_lba = value;
1907+
bt->end_lba = value;
18561908
}
18571909

18581910
out_unlock_bdev:

0 commit comments

Comments
 (0)