midl-website-2021/papers.json at master · MIDL-Conference/midl-website-2021 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{
    "A1": {
        "abstract": "In follow-up CT examinations of cancer patients, therapy success is evaluated by estimating the change in tumor size. This process is time-consuming and error-prone. We present a pipeline that automates the segmentation and measurement of matching lesions, given a point annotation in the baseline lesion. First, a region around the point annotation is extracted, in which a deep-learning-based segmentation of the lesion is performed. Afterward, a registration algorithm finds the corresponding image region in the follow-up scan and the convolutional neural network segments lesions inside this region. In the final step, the corresponding lesion is selected. We evaluate our pipeline on clinical follow-up data comprising 125 soft-tissue lesions from 43 patients with metastatic melanoma. Our pipeline succeeded for 96% of the baseline and 80% of the follow-up lesions, showing that we have laid the foundation for an efficient quantitative follow-up assessment in clinical routine.",
        "authors": "Alessa Hering, Felix Peisen, Teresa Amaral, Sergios Gatidis, Thomas Eigentler, Ahmed Othman, Jan Hendrik Moltz",
        "award": "",
        "chairs": "Minjeong Kim, Jelmer Wolterink",
        "cloudflare_video_id": "2d0c1ea43a7ac5fbcec5405753fb26dc",
        "id": "A1",
        "or_id": "hzbuHGhU02Z",
        "oral": "True",
        "pdf": "/proceedings/hering21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/hering21a.html",
        "schedule": " Wednesday 7th July\nA1-3 (long): Segmentation - 13:00 - 13:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_12_poster.pdf",
        "title": "Whole-Body Soft-Tissue Lesion Tracking and Segmentation in Longitudinal CT Imaging Studies",
        "url": "papers/A1.html",
        "video": "/videos/full_12_video.mp4",
        "youtube_video_id": "7E6UA7B49so"
    },
    "A10": {
        "abstract": "Accurate segmentation of volumetric scans like MRI and CT scans is highly demanded for surgery planning in clinical practice, quantitative analysis, and identification of disease. However, accurate segmentation is challenging because of the irregular shape of given organ and large variation in appearances across the slices. In such problems, 3D features are desired in nature which can be extracted using 3D convolutional neural network (CNN). However, 3D CNN is compute and memory intensive to implement due to large number of parameters and can easily over fit, especially in medical imaging where training data is limited. In order to address these problems, we propose a distillation-based depth shift module (Distill DSM). It is designed to enable 2D convolutions to make use of information from neighbouring frames more efficiently. Specifically, in each layer of the network, Distill DSM learns to extract information from a part of the channels and shares it with neighbouring slices, thus facilitating information exchange among neighbouring slices. This approach can be incorporated with any 2D CNN model to enable it to use information across the slices with introducing very few extra learn-able parameters. We have evaluated our model on BRATS 2020, heart, hippocampus, pancreas and prostate dataset. Our model achieves better performance than 3D CNN for heart and prostate datasets and comparable performance on BRATS 2020, pancreas and hippocampus dataset with simply 28\\% of parameters compared to 3D CNN model.",
        "authors": "Harsh Maheshwari, Vidit Goel, Ramanathan Sethuraman, Debdoot Sheet",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "85e0e69196c99f3450f3ceecf31f067d",
        "id": "A10",
        "or_id": "_n48l6YKc6d",
        "oral": "False",
        "pdf": "/proceedings/maheshwari21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/maheshwari21a.html",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_106_poster.pdf",
        "title": "Distill DSM: Computationally efficient method for segmentation of medical imaging volumes",
        "url": "papers/A10.html",
        "video": "/videos/full_106_video.mp4",
        "youtube_video_id": "8cPCSlLsWzo"
    },
    "A11": {
        "abstract": "Acute ischemic stroke is caused by a blockage in the cerebral arteries, resulting in long-term disability and sometimes death. To determine the optimal treatment strategy, a patient-specific assessment is often based on advanced neuroimaging data, such as spatio-temporal (4D) CT Perfusion (CTP) imaging. To date, perfusion maps are typically calculated from 4D CTP data and then thresholded to localize and quantify the stroke lesion core and tissue-at-risk. A few studies have recently developed deep learning methods to predict stroke lesion outcomes from perfusion maps. The basic idea of these is to train a model, using perfusion maps acquired at baseline and their corresponding follow-up images acquired several days after treatment, to automatically estimate the final lesion location and volume in new patients. Nevertheless, model training based on the original 4D CTP scans might be desirable, as they could contain more valuable information not directly represented in perfusion maps. Therefore, we aimed to develop and evaluate a temporal convolutional neural network (TCN) to predict stroke lesion outcomes directly from 4D CTP datasets acquired at admission, without computing any perfusion maps. Using a total of 176 CTP scans, we investigated the impact of the time window size by training the proposed TCN on various numbers of CTP frames: 8, 16, and 32 time points. For comparison purposes, we also trained a convolutional neural network based on perfusion maps. The results show that the model trained on 32 time points yielded significantly higher Dice values (0.33 +/- 0.21) than the models trained on 8 time points (0.25 +/- 0.20; P<0.05), 16 time points (0.28 +/- 0.21; P<0.001), and perfusion maps (0.23 +/- 0.18; P<0.05). These experiments demonstrate that the proposed model effectively extracts spatio-temporal data from CTP scans to predict stroke lesion outcomes, which leads to better results than using perfusion maps.",
        "authors": "Kimberly Amador, Matthias Wilms, Anthony Winder, Jens Fiehler, Nils Forkert",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "f85cefe2af4e22cdfe7cf9314fc39d0d",
        "id": "A11",
        "or_id": "0YDEgvfwEW",
        "oral": "False",
        "pdf": "/proceedings/amador21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/amador21a.html",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_85_poster.pdf",
        "title": "Stroke Lesion Outcome Prediction Based on 4D CT Perfusion Data Using Temporal Convolutional Networks",
        "url": "papers/A11.html",
        "video": "/videos/full_85_video.mp4",
        "youtube_video_id": "neL6wuxa5gE"
    },
    "A12": {
        "abstract": "With in-line holography, it is possible to record biological cells over time in a three-dimensional hydrogel without the need for staining, providing the capability of observing cell behavior in a minimally invasive manner. However, this setup currently requires computationally intensive image-reconstruction algorithms to determine the required cell statistics. In this work, we directly extract cell positions from the holographic data by using deep neural networks and thus avoid several reconstruction steps. We show that our method is capable of substantially decreasing the time needed to extract information from the raw data without loss in quality.",
        "authors": "Philipp Gruening, Falk Nette, Noah Heldt, Ana Cristina Guerra de Souza, Erhardt Barth",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "6ba9e78a03b27ac6192ae28d9437af14",
        "id": "A12",
        "or_id": "2fpsTsvCgc0",
        "oral": "False",
        "pdf": "/proceedings/gruening21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/gruening21a.html",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_70_poster.pdf",
        "title": "Direct Inference of Cell Positions using Lens-Free Microscopy and Deep Learning",
        "url": "papers/A12.html",
        "video": "/videos/full_70_video.mp4",
        "youtube_video_id": "9JYnU0bRMAo"
    },
    "A2": {
        "abstract": "Automatic detection and segmentation of objects in 2D and 3D microscopy data is important for countless biomedical applications.\nIn the natural image domain, spatial embedding-based instance segmentation methods are known to yield high-quality results, but their utility for segmenting microscopy data is currently little researched. Here we introduce EmbedSeg, an embedding-based instance segmentation method which outperforms existing state-of-the-art baselines on 2D as well as 3D microscopy datasets.\nAdditionally, we show that EmbedSeg has a GPU memory footprint small enough to train even on laptop GPUs, making it accessible to virtually everyone. Finally, we introduce four new 3D microscopy datasets, which we make publicly available alongside ground truth training labels. Our open-source implementation is available at https://github.com/juglab/EmbedSeg.",
        "authors": "Manan Lalit, Pavel Tomancak, Florian Jug",
        "award": "",
        "chairs": "Minjeong Kim, Jelmer Wolterink",
        "cloudflare_video_id": "83eca510fc92b56ac2ad18b55d973b59",
        "id": "A2",
        "or_id": "JM6GuFGayL5",
        "oral": "True",
        "pdf": "/proceedings/lalit21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/lalit21a.html",
        "schedule": " Wednesday 7th July\nA1-3 (long): Segmentation - 13:00 - 13:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_129_poster.pdf",
        "title": "Embedding-based Instance Segmentation in Microscopy",
        "url": "papers/A2.html",
        "video": "/videos/full_129_video.mp4",
        "youtube_video_id": "trETG5zf3PI"
    },
    "A3": {
        "abstract": "Standard losses for training deep segmentation networks could be seen as individual classifications of pixels, instead of supervising the global shape of the predicted segmentations. While effective, they require exact knowledge of the label of each pixel in an image. \n\nThis study investigates how effective global geometric shape descriptors could be, when used on their own as segmentation losses for training deep networks. Not only interesting theoretically, there exist deeper motivations to posing segmentation problems as a reconstruction of shape descriptors: First, annotations to obtain approximations of low-order shape moments could be much less cumbersome than their full-mask counterparts, and anatomical priors could be readily encoded into invariant shape descriptions, which might alleviate the annotation burden. Also, some shape descriptors could be readily used to ``encode'' biomarkers, leading to better interpretability. Finally, and most importantly, we hypothesize that, given a task, certain shape descriptions might be invariant across image acquisition protocols/modalities and subject populations, which might open interesting research avenues for generalization in medical image segmentation.\n\nWe introduce and formulate a few shape descriptors in the context of deep segmentation, and evaluate their potential as stand-alone losses on two different, challenging tasks. Inspired by recent works in constrained optimization for deep networks, we propose a way to use those descriptors to supervise segmentation, without any pixel-level label. Very surprisingly, as little as 4 descriptors values per class can approach the performance of a segmentation mask with 65k individual discrete labels. We also found that shape descriptors can be a valid way to encode anatomical priors about the task, enabling to leverage expert knowledge without requiring additional annotations. Our implementation is publicly available and can be easily extended.",
        "authors": "Hoel Kervadec, Houda Bahig, Laurent Letourneau-Guillon, Jose Dolz, Ismail Ben Ayed",
        "award": "",
        "chairs": "Minjeong Kim, Jelmer Wolterink",
        "cloudflare_video_id": "24984e7d6bdd670b79d9df850906b41b",
        "id": "A3",
        "or_id": "nqe6e0oJ_fL",
        "oral": "True",
        "pdf": "/proceedings/kervadec21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/kervadec21a.html",
        "schedule": " Wednesday 7th July\nA1-3 (long): Segmentation - 13:00 - 13:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_139_poster.pdf",
        "title": "Beyond pixel-wise supervision: semantic segmentation with higher-order shape descriptors",
        "url": "papers/A3.html",
        "video": "/videos/full_139_video.mp4",
        "youtube_video_id": "ApKsoowlEGQ"
    },
    "A4": {
        "abstract": "While the importance of automatic biomedical image analysis is increasing at an enormous pace, recent meta-research revealed major flaws with respect to algorithm validation. Performance metrics are key for objective, transparent and comparative performance assessment, but little attention has been given to their pitfalls. Under the umbrella of the Helmholtz Imaging Platform (HIP), three international initiatives - the MICCAI Society's challenge working group, the Biomedical Image Analysis Challenges (BIAS) initiative, as well as the benchmarking working group of the MONAI framework - have now joined forces with the mission to generate best practice recommendations with respect to metrics in medical image analysis. Consensus building is achieved via a Delphi process, a popular tool for integrating opinions in large international consortia. The current document serves as a teaser for the results presentation and focuses on the pitfalls of the most commonly used metric in biomedical image analysis, the Dice Similarity Coefficient (DSC), in the categories of (1) mathematical properties/edge cases, (2) task/metric fit and (3) metric aggregation. Being compiled by a large group of experts from more than 30 institutes worldwide, we believe that our framework could be of general interest to the MIDL community and will improve the quality of biomedical image analysis algorithm validation.",
        "authors": "Annika Reinke, Matthias Eisenmann, Minu Dietlinde Tizabi, Carole H. Sudre, Tim R\u00e4dsch, Michela Antonelli, Tal Arbel, Spyridon Bakas, M. Jorge Cardoso, Veronika Cheplygina, Keyvan Farahani, Ben Glocker, Doreen Heckmann-N\u00f6tzel, Fabian Isensee, Pierre Jannin, Charles Kahn, Jens Kleesiek, Tahsin Kurc, Michal Kozubek, Bennett A. Landman, Geert Litjens, Klaus Maier-Hein, Anne Lousise Martel, bjoern menze, Henning M\u00fcller, Jens Petersen, Mauricio Reyes, Nicola Rieke, Bram Stieltjes, Ronald M. Summers, Sotirios A. Tsaftaris, Bram van Ginneken, Annette Kopp-Schneider, Paul J\u00e4ger, Lena Maier-Hein",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "9c9ec2eaa6f8028398fd2767a7bfea44",
        "id": "A4",
        "or_id": "76X9Mthzv4X",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=76X9Mthzv4X",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_21_poster.pdf",
        "title": "Common limitations of performance metrics in biomedical image analysis",
        "url": "papers/A4.html",
        "video": "/videos/short_21_video.mp4",
        "youtube_video_id": "toehYwAvu6A"
    },
    "A5": {
        "abstract": "Segmenting and labeling correctly the individual ribs from chest radiograph (CXR) are of significant clinical value for several diagnostic tasks. Developing automatic deep learning (DL) algorithms for this task requires annotated images of the ribs at pixel-level. However, to the best of our knowledge, there exists no such public datasets as well as benchmark protocols for performance evaluation. To solve this problem, we establish a new CXR dataset, namely VinDr-RibCXR, for automatically segmenting and labeling of individual ribs. The VinDr-RibCXR contains 245 posteroanterior CXRs with corresponding segmentation annotations for each rib provided by human experts. Furthermore, we train the state-of-the-art DL-based segmentation models on 196 images from the RibCXR and report performance of those models on an independent test set of 49 images. Our best performing DL model (i.e., Nested U-Net with EfficientNet-B0)  obtains  a  Dice  score  of  0.834 (95% CI, 0.810-0.853). The  sensitivity,  specificity  and  Hausdorff distance are 0.841 (95% CI, 0.812-0.858), 0.998 (95% CI, 0.997-0.998), and 15.453 (95% CI, 13.340-17.450), respectively. These results demonstrate a high-level of performance in labeling of the individual ribs from CXRs. Our study, therefore, serves as a proof of concept and baseline performance for future research. The dataset, codes, and trained DL models will be made publicly available to encourage new advances in this research direction.",
        "authors": "Hoang Canh Nguyen, Tung Thanh Le, Hieu Pham, Ha Quy Nguyen",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "1f065aacd8d978c38d4e4bea2395b19a",
        "id": "A5",
        "or_id": "oJi6xpSLdsj",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=oJi6xpSLdsj",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_36_poster.pdf",
        "title": "VinDr-RibCXR: A Benchmark Dataset for Automatic Segmentation and Labeling of Individual Ribs on Chest X-rays",
        "url": "papers/A5.html",
        "video": "/videos/short_36_video.mp4",
        "youtube_video_id": "DBUDZlOOOyw"
    },
    "A6": {
        "abstract": "Studying brain architecture at the cellular level requires histological image analysis of sectioned postmortem samples. We trained a deep neural network to estimate relative angles between the cutting plane and the local 3D brain surface from 2D cortical image patches sampled from microscopic scans of human brain tissue sections. The model allows to automatically identify obliquely cut tissue parts, which often confuse downstream texture classification tasks and typically require specific treatment in image analysis workflows. It has immediate applications for the automated analysis of brain structures, like cytoarchitectonic mapping of the highly convoluted human brain.",
        "authors": "Christian Schiffer, Luisa Schuhmacher, Katrin Amunts, Timo Dickscheid",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "c64424a08ae2312b45c8bfabbce66e64",
        "id": "A6",
        "or_id": "9CSM4yQmZiN",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=9CSM4yQmZiN",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_6_poster.pdf",
        "title": "Learning to predict cutting angles from histological human brain sections",
        "url": "papers/A6.html",
        "video": "/videos/short_6_video.mp4",
        "youtube_video_id": "4XyUF6dz5pk"
    },
    "A7": {
        "abstract": "Towards computer-assisted neurosurgery, robust methods for instrument localization on neurosurgical microscope video data are needed. Specifically for neurosurgical data, challenges arise from visual conditions such as strong blur and from an unknowingly large variety of instrument types. For neurosurgical domain, instrument localization methods must generalize across different sub-disciplines such as cranial tumor and aneurysm surgeries which exhibit different visual properties. We present and evaluate a methodology towards robust instrument tip localization for neurosurgical microscope data, formulated as coarse saliency prediction. For our analysis, we build a comprehensive dataset comprising in-the-wild data from several neurosurgical sub-disciplines as well as phantom surgeries. Comparing single stream networks using either image or optical flow information, we find complementary performance of both networks. Plain optical flow enables better cross-domain generalization, while the image-based network performs better on surgeries from the training domain. Based on these findings, we present a two-stream architecture that fuses image and optical flow information to utilize the complementary performance of both. Being trained on tumor surgeries, our architecture outperforms both single stream networks and shows improved robustness on data from different neurosurgical sub-disciplines. From our findings, future work must focus more on how to incorporate optical flow information into fusion architectures to further improve cross-domain generalization.",
        "authors": "Markus Philipp, Anna Alperovich, Marielena Gutt-Will, Andrea Mathis, Stefan Saur, Andreas Raabe, Franziska Mathis-Ullrich",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "d8c70a3da8f18ead5aaec0fbeb6b8ee1",
        "id": "A7",
        "or_id": "21m0dBCMdd",
        "oral": "False",
        "pdf": "/proceedings/philipp21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/philipp21a.html",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_114_poster.pdf",
        "title": "Localizing neurosurgical instruments across domains and in the wild",
        "url": "papers/A7.html",
        "video": "/videos/full_114_video.mp4",
        "youtube_video_id": "t7bm5NP_C0o"
    },
    "A8": {
        "abstract": "Weakly supervised segmentation is an important problem in medical image analysis due to the high cost of pixelwise annotation. Prior methods, while often focusing on weak labels of 2D images, exploit few structural cues of volumetric medical images. To address this, we propose a novel weakly-supervised segmentation strategy capable of better capturing 3D shape prior in both model prediction and learning. Our main idea is to extract a self-taught shape representation by leveraging weak labels, and then integrate this representation into segmentation prediction for shape refinement. To this end, we design a deep network consisting of a segmentation module and a shape denoising module, which are trained by an iterative learning strategy. Moreover, we introduce a weak annotation scheme with a hybrid label design for volumetric images, which improves model learning without increasing the overall annotation cost. The empirical experiments show that our approach outperforms existing SOTA strategies on three organ segmentation benchmarks with distinctive shape properties. Notably, we can achieve strong performance with even 10% labeled slices, which is significantly superior to other methods.",
        "authors": "Qian He, Shuailin Li, Xuming He",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "c8b67f371835df4a3586902ebf5dd39a",
        "id": "A8",
        "or_id": "Koyg3kvH-Mq",
        "oral": "False",
        "pdf": "/proceedings/he21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/he21a.html",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_69_poster.pdf",
        "title": "Weakly Supervised Volumetric Segmentation via Self-taught Shape Denoising Model",
        "url": "papers/A8.html",
        "video": "/videos/full_69_video.mp4",
        "youtube_video_id": "jRgU2GIhwUI"
    },
    "A9": {
        "abstract": "Medical images are often accompanied by metadata describing the image (vendor, acquisition parameters) and the patient (disease type or severity, demographics, genomics). This metadata is usually disregarded by image segmentation methods. In this work, we adapt a linear conditioning method called FiLM (Feature-wise Linear Modulation) for image segmentation tasks. This FiLM adaptation enables integrating metadata into segmentation models for better performance. We observed an average Dice score increase of 5.1% on spinal cord tumor segmentation when incorporating the tumor type with FiLM. The metadata modulates the segmentation process through low-cost affine transformations applied on feature maps which can be included in any neural network's architecture. Additionally, we assess the relevance of segmentation FiLM layers for tackling common challenges in medical imaging: training with limited or unbalanced number of annotated data, multi-class training with missing segmentations, and model adaptation to multiple tasks. Our results demonstrated the following benefits of FiLM for segmentation: FiLMed U-Net was robust to missing labels and reached higher Dice scores with few labels (up to 16.7%) compared to single-task U-Net. The code is open-source and available at www.ivadomed.org.",
        "authors": "Andreanne Lemay, Charley Gros, Olivier Vincent, Yaou Liu, Joseph Paul Cohen, Julien Cohen-Adad",
        "award": "",
        "chairs": "Francesco Caliva, Christian Desrosiers",
        "cloudflare_video_id": "255e693a3eb646e15ed8c51612d8e5c3",
        "id": "A9",
        "or_id": "fa176bQAbr",
        "oral": "False",
        "pdf": "/proceedings/lemay21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/lemay21a.html",
        "schedule": " Wednesday 7th July\nA4-12 (short): Segmentation - 13:45 - 14:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_80_poster.pdf",
        "title": "Benefits of Linear Conditioning for Segmentation using Metadata",
        "url": "papers/A9.html",
        "video": "/videos/full_80_video.mp4",
        "youtube_video_id": "WKFtlnlSf0I"
    },
    "B1": {
        "abstract": "In this paper, we take the recently presented PseudoEdgeNet model to the level of multi-class cell segmentation in histopathology images solely trained with point annotations. We tailor its loss function to the challenges of multi-class segmentation and equip it with an additional false positive loss term. We evaluate it on the assessment of tumor and immune cells in PD-L1 stained lung cancer histopathology images, and compare it with YOLOv5.",
        "authors": "Jeroen Vermazeren, Leander van Eekelen, Luca Dulce Meesters, Monika Looijen-Salamon, Shoko Vos, Enrico Munari, Caner Mercan, Francesco Ciompi",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "f511a04eee916c880625b7755ed11ba4",
        "id": "B1",
        "or_id": "rHAiz2pnxkB",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=rHAiz2pnxkB",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_53_poster.pdf",
        "title": "\u03bcPEN: Multi-class PseudoEdgeNet for PD-L1 assessment",
        "url": "papers/B1.html",
        "video": "/videos/short_53_video.mp4",
        "youtube_video_id": "i2bUr6INHPI"
    },
    "B2": {
        "abstract": "One of the most important challenges for computer-aided analysis in digital pathology is the development of robust deep neural networks, which can cope with variations in color and resolution of digitized whole-slide images (WSIs). It has been shown that color augmentation during training is a useful method to aid a model generalize better to heterogeneous data. In this work, we compare state of the art models EfficientNet, Xception, Inception, ResNet, DenseNet, MobileNet and QuickNet on a multi-scanner database comprising slides each digitized with six different scanners. All of the networks are trained with data of only one scanner applying a combination of color and blur augmentation techniques. All models show similar tendencies across the different scanner databases but differ in the overall classification accuracy. Differences in training and inference time, however, are more pronounced: on a mid-range GPU, the inference time of the fastest model (QuickNet) is 13 times faster than the slowest one (EfficientNet B4). There is also a trade-off between speed and accuracy, the slower networks are more stable across different scanners and show the overall best performance. A good compromise between quality and inference time is achieved by EfficientNet B0.",
        "authors": "Petr Kuritcyn, Michaela Benz, Jakob Dexl, Volker Bruns, Arndt Hartmann, Carol Geppert",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "c49be9c6cc5f7440b2cd4d4560c1cbdd",
        "id": "B2",
        "or_id": "CdQn5goh0E4",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=CdQn5goh0E4",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_70_poster.pdf",
        "title": "Comparison of CNN models on a multi-scanner database in colon cancer histology",
        "url": "papers/B2.html",
        "video": "/videos/short_70_video.mp4",
        "youtube_video_id": "ZZ-4GOixVTc"
    },
    "B3": {
        "abstract": "Stochastic gradient descent (SGD) is the main approach for training deep networks: it moves towards the optimum of the cost function by  iteratively updating the parameters of a model in the direction of the gradient of the loss evaluated on a minibatch. Several variants of SGD have been proposed to make adaptive step sizes for each parameter (adaptive gradient) and take into account the previous updates (momentum). Among several alternative of SGD the most popular are AdaGrad, AdaDelta, RMSProp and Adam which scale coordinates of the gradient by square roots of some form of averaging of the squared coordinates in the past gradients and automatically adjust the learning rate on a parameter basis. In this work, we compare Adam based variants based on the difference between the present and the past gradients, the step size is adjusted for each parameter. We run several tests benchmarking proposed methods using medical image data. The experiments are performed using ResNet50 architecture neural network. Moreover, we have tested ensemble of networks and the fusion with ResNet50 trained with stochastic gradient descent. To combine the set of ResNet50 the simple sum rule has been applied. Proposed ensemble obtains very high performance, it obtains accuracy comparable or better than actual state of the art. To improve reproducibility and research efficiency the MATLAB source code used for this research is available at GitHub: https://github.com/LorisNanni.",
        "authors": "Loris Nanni, Gianluca Maguolo, Alessandra Lumini",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "26b0f5d7391b05124b2478d43cb407b1",
        "id": "B3",
        "or_id": "RFwhfWEMyzm",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=RFwhfWEMyzm",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_19_poster.pdf",
        "title": "Exploiting Adam-like Optimization Algorithms to Improve the Performance of Convolutional Neural Networks",
        "url": "papers/B3.html",
        "video": "/videos/short_19_video.mp4",
        "youtube_video_id": "QUs-PfOT7ug"
    },
    "B4": {
        "abstract": "Nuclei segmentation using deep learning has been achieving high accuracy using U-Net and variants, but a remaining challenge is distinguishing touching and overlapping cells. In this work, we propose using gated CNN (GCNN) networks to obtain sharper predictions around object boundaries and improve nuclei segmentation performance. The method is evaluated in over 1000 multicentre diverse H&E breast cancer images from three databases and compared to baseline U-Net and R2U-Net.",
        "authors": "Shana Beniamin, April Khademi, Dimitri Androutsos",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "185d25edfe0e315546e9b0baa0149520",
        "id": "B4",
        "or_id": "fQDGt0RJkMu",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=fQDGt0RJkMu",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_144_poster.pdf",
        "title": "Gated CNNs for Nuclei Segmentation in H&E Breast Images",
        "url": "papers/B4.html",
        "video": "/videos/short_144_video.mp4",
        "youtube_video_id": "ZhjuDbZ5R8A"
    },
    "B5": {
        "abstract": "Self-supervised learning (SSL) has demonstrated success in computer vision tasks for natural images, and recently histopathological images, where there is limited availability of annotations. Despite this, there has been limited research into how the diversity of source data used for SSL tasks impacts performance. The current study quantifies changes to downstream classification of metastatic tissue in lymph node sections of the PatchCamelyon dataset when datasets from different domains (natural images, textures, histology) are used for SSL pre-training. We show that for cases with limited training data, using diverse datasets from different domains for SSL pre-training can achieve comparable performance when compared with SSL pre-training on the target dataset.",
        "authors": "Kristina Lynn Kupferschmidt, Eu Wern Teh, Graham W. Taylor",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "76f08a10bb0f8f35cd429b3fa4b36e58",
        "id": "B5",
        "or_id": "cA4VVWbNO-",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=cA4VVWbNO-",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_99_poster.pdf",
        "title": "Strength in Diversity: Understanding the impacts of diverse training sets in self-supervised pre-training for histology images",
        "url": "papers/B5.html",
        "video": "/videos/short_99_video.mp4",
        "youtube_video_id": "PjIHdRWSsrU"
    },
    "B6": {
        "abstract": "Diagnosis of cancer often relies on the time-consuming examination of histopathology slides by expert pathologists. Automation via supervised deep learning methods require large amounts of pixel-wise annotated data that is costly to acquire. Unsupervised density estimation methods that rely only on the availability of healthy examples could cut down the cost of annotation. We propose to use residual flows as density estimator and compare different tests for out-of-distribution (OOD) detection. Our results suggest that unsupervised OOD detection is a viable approach for detecting suspicious regions in histopathology slides.",
        "authors": "Nick Pawlowski, Ben Glocker",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "b8c44bb1b28825e043021e5126142a51",
        "id": "B6",
        "or_id": "-j7vnPsPWys",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=-j7vnPsPWys",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_94_poster.pdf",
        "title": "Abnormality Detection in Histopathology via Density Estimation with Normalising Flows",
        "url": "papers/B6.html",
        "video": "/videos/short_94_video.mp4",
        "youtube_video_id": "Uz68iNSsfzU"
    },
    "B7": {
        "abstract": "Advances in multiplex biomarker imaging systems have enabled the study of complex spatial biology within the tumor microenvironment. However, the high-resolution multiplexed images are often only available for a subset of regions of interest (RoIs), clinical data is not easily accessible and the datasets are generally too small to apply off-the-shelf deep learning methods commonly used in histopathology. In this paper, we focus on datasets with few images and without labels, and aim to learn representations for slides. We choose a task of patient identification that leads our new model to select RoIs with discriminative properties and infer patient-specific features that can be used later for any task via transfer learning. The experimental results on the synthetic data generated by taking the tumor microenvironment into account indicate that the proposed method is a promising step towards computer-aided analysis in unlabeled datasets of high-resolution images.",
        "authors": "Yinan Zhang, Beril Besbinar, Pascal Frossard",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "55e42f9bc4ad9aba3bf9288fa0b21de2",
        "id": "B7",
        "or_id": "hPUnpHJHuy",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=hPUnpHJHuy",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_73_poster.pdf",
        "title": "Learning to Represent Whole Slide Images by Selecting Cell Graphs of Patches",
        "url": "papers/B7.html",
        "video": "/videos/short_73_video.mp4",
        "youtube_video_id": "j6EA8doAI9c"
    },
    "B8": {
        "abstract": "Quality control is an integral part in the digitization process of whole-slide histopathology images due to artifacts that arise during various stages of slide preparation. Manual control and supervision of these gigapixel images are labor-intensive.  Therefore, we report the first multi-class deep learning model trained on whole-slide images covering multiple tissue and stain types for semantic segmentation of artifacts.  Our approach reaches a Dice score of 0.91, on average, across six artifact types, and outperforms the competition on external test set. Finally, we extend the artifact segmentation network to a multi-decision quality control system that can be deployed in routine clinical practice.",
        "authors": "Gijs Smit, Francesco Ciompi, Maria Cig\u00e9hn, Anna Bod\u00e9n, Jeroen van der Laak, Caner Mercan",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "1ba05fb327b8d2b665c3a4f658c8802e",
        "id": "B8",
        "or_id": "7EZ4JOtlRl",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=7EZ4JOtlRl",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_64_poster.pdf",
        "title": "Quality control of whole-slide images through multi-class semantic segmentation of artifacts",
        "url": "papers/B8.html",
        "video": "/videos/short_64_video.mp4",
        "youtube_video_id": "Ec9kWeKk-3Q"
    },
    "B9": {
        "abstract": "In recent years, the availability of digitized Whole Slide Images (WSIs) has enabled the use of deep learning-based computer vision techniques for automated disease diagnosis. However, WSIs present unique computational and algorithmic challenges. WSIs are gigapixel-sized (approx. 100K pixels), making them infeasible to be used directly for training deep neural networks. Also, often only slide-level labels are available for training as detailed annotations are tedious and can be time-consuming for experts. Approaches using multiple-instance learning (MIL) frameworks have been shown to overcome these challenges. Current state-of-the-art approaches divide the learning framework into two decoupled parts: a convolutional neural network (CNN) for encoding the patches followed by an independent aggregation approach for slide-level prediction. In this approach, the aggregation step has no bearing on the representations learned by the CNN encoder. We have proposed an end-to-end framework that clusters the patches from a WSI into k-groups, samples k' patches from each group for training, and uses an adaptive attention mechanism for slide level prediction; Cluster-to-Conquer (C2C). We have demonstrated that dividing a WSI into clusters can improve the model training by exposing it to diverse discriminative features extracted from the patches. We regularized the clustering mechanism by introducing a KL-divergence loss between the attention weights of patches in a cluster and the uniform distribution. The framework is optimized end-to-end on slide-level cross-entropy, patch-level cross-entropy, and KL-divergence loss.",
        "authors": "Yash Sharma, Aman Shrivastava, Lubaina Ehsan, Christopher A. Moskaluk, Sana Syed, Donald Brown",
        "award": "",
        "chairs": "Mitko Veta, Jianhua Yao",
        "cloudflare_video_id": "ae868e31e5222b59c0f40d42ec200d74",
        "id": "B9",
        "or_id": "7i1-2oKIELU",
        "oral": "False",
        "pdf": "/proceedings/sharma21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/sharma21a.html",
        "schedule": " Wednesday 7th July\nB1-9 (short): Application: Histopathology - 13:45 - 14:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_161_poster.pdf",
        "title": "Cluster-to-Conquer: A Framework for End-to-End Multi-Instance Learning for Whole Slide Image Classification",
        "url": "papers/B9.html",
        "video": "/videos/full_161_video.mp4",
        "youtube_video_id": "G-28G5nr8dg"
    },
    "C1": {
        "abstract": "Biomedical semantic segmentation is typically performed on dedicated, costly hardware. In a recent study, we suggested an optimized, tiny-weight U-Net for an inexpensive hardware accelerator, the Google Edge TPU. Using an open biomedical dataset for high-speed laryngeal videoendoscopy, we exemplarily show that we can dramatically reduce the parameter space and computations while keeping a high segmentation quality. Using a custom upsampling routine, we fully deployed optimized architectures to the Edge TPU. Combining the optimized architecture and the Edge TPU, we gain a total speedup of >79x compared to our initial baseline while keeping a high accuracy. This combination allows to provide immediate results at the point of care, especially in constrained computational environments.",
        "authors": "Andreas M Kist, Michael D\u00f6llinger",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "8f2cdc3741b1a9da54d0ad597a4ebe48",
        "id": "C1",
        "or_id": "HajxTQpPniD",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=HajxTQpPniD",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_55_poster.pdf",
        "title": "Efficient biomedical image segmentation on Edge TPUs",
        "url": "papers/C1.html",
        "video": "/videos/short_55_video.mp4",
        "youtube_video_id": "jHvZqGwqk14"
    },
    "C2": {
        "abstract": "Semantic segmentation has a wide array of applications ranging from medical-image analysis, scene understanding,\u00a0autonomous driving and robotic navigation. This work deals with medical image segmentation and in particular with accurate polyp detection and segmentation during colonoscopy examinations. Several convolutional neural network architectures have been proposed to effectively deal with this task and with the problem of segmenting objects at different scale input. The basic architecture in image segmentation consists of an encoder and a decoder: the first uses convolutional filters to extract features from the image, the second is responsible for generating the final output. In this work, we compare some variant of the DeepLab architecture obtained by varying the decoder backbone. We compare several decoder architectures, including ResNet, Xception, EfficentNet, MobileNet and we perturb their layers by substituting ReLU activation layers with other functions. The resulting methods are used to create deep ensembles which are shown to be very effective. Our experimental evaluations show that our best ensemble produces good segmentation results by achieving high evaluation scores with a dice coefficient of  0.884, and a mean Intersection over Union (mIoU) of 0.818 for the Kvasir-SEG dataset. To improve reproducibility and research efficiency the MATLAB source code used for this research is available at GitHub: https://github.com/LorisNanni.",
        "authors": "Alessandra Lumini, Loris Nanni, Gianluca Maguolo",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "143be009291dc94dc5c41d6f3f96849b",
        "id": "C2",
        "or_id": "NJcszyl19PN",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=NJcszyl19PN",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_24_poster.pdf",
        "title": "Deep ensembles based on Stochastic Activation Selection for Polyp Segmentation",
        "url": "papers/C2.html",
        "video": "/videos/short_24_video.mp4",
        "youtube_video_id": "PPtIyzFunWc"
    },
    "C3": {
        "abstract": "We present the first place recognition system trained specifically for colonoscopy sequences. We use the convolutional neural network for image retrieval proposed by Radenovic et al. and we fine-tune it using image pairs from real human colonoscopies. The colonoscopy frames are clustered automatically by a Structure-from-Motion (SfM) algorithm, which has proven to cope with scene deformation and illumination changes. The experiments show that the system is able to generalize by testing in a different human colonoscopy, retrieving frames observing the same place despite of the different viewpoint and illumination changes. The proposed place recognition would be a key component of Simultaneous Localization and Mapping (SLAM) systems operating in colonoscopy to assist doctors during the explorations or to support robotization.",
        "authors": "Javier Morlana, Pablo Azagra Mill\u00e1n, Javier Civera, Jos\u00e9 M. M. Montiel",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "f3d56c5b1438ffa7e02cb8c6e6ea9506",
        "id": "C3",
        "or_id": "tgkEqYyA12p",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=tgkEqYyA12p",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_120_poster.pdf",
        "title": "Self-supervised Visual Place Recognition for Colonoscopy Sequences",
        "url": "papers/C3.html",
        "video": "/videos/short_120_video.mp4",
        "youtube_video_id": "dfZqJF4UZC8"
    },
    "C4": {
        "abstract": "Selecting task appropriate deep learning models is a resource intensive process; more so when working with large quantities of high dimensional data that are encountered in medical imaging. Model selection procedures that are primarily aimed at improving performance measures such as accuracy could become biased towards resource intensive models. In this work, we propose to inform and drive the model selection procedure using the carbon footprint of training deep learning models as a complementary measure along with other standard performance metrics. We experimentally demonstrate that increasing carbon footprint of large models might not necessarily translate into proportional performance gains, and suggest useful trade-offs to obtain resource efficient models.",
        "authors": "Raghavendra Selvan",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "a5ccb8108ec5c6e27b42b857441cef99",
        "id": "C4",
        "or_id": "1TPRpNyyj2L",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=1TPRpNyyj2L",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_15_poster.pdf",
        "title": "Carbon footprint driven deep learning model selection for medical imaging",
        "url": "papers/C4.html",
        "video": "/videos/short_15_video.mp4",
        "youtube_video_id": "Tm8vOWJHnso"
    },
    "C5": {
        "abstract": "Surgical workflow recognition has been playing an essential role in computer-assisted interventional systems for modern operating rooms. In this paper, we present a computer vision-based method named SWNet that focuses on utilizing spatial information and temporal information from the surgical video to achieve surgical workflow recognition. As the first step, we utilize Interaction-Preserved Channel-Separated Convolutional Network (IP-CSN) to extract features that contain spatial information and local temporal information from the surgical video through segments. Secondly, we train a Multi-Stage Temporal Convolutional Network (MS-TCN) with those extracted features to capture global temporal information from the full surgical video. Finally, by utilizing Prior Knowledge Noise Filtering (PKNF), prediction noise from the output of MS-TCN is filtered. We evaluate SWNet for Sleeve Gastrectomy surgical workflow recognition. SWNet achieves 90% frame-level accuracy and reaches a weighted Jaccard Score of 0.8256. This demonstrates that SWNet has considerable potential to solve the surgical workflow recognition problem.",
        "authors": "Bokai Zhang, Amer Ghanem, Alexander Simes, Henry Choi, Andrew Yoo, Andrew Min",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "246a6e1fc8a936c0de617fdf47ff45e8",
        "id": "C5",
        "or_id": "g1sESqlP214",
        "oral": "False",
        "pdf": "/proceedings/zhang21c.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/zhang21b.html",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_27_poster.pdf",
        "title": "SWNet: Surgical Workflow Recognition with Deep Convolutional Network",
        "url": "papers/C5.html",
        "video": "/videos/full_27_video.mp4",
        "youtube_video_id": "DxyJ0UKPqog"
    },
    "C6": {
        "abstract": "Prior work demonstrated the ability of machine learning to automatically recognize surgical workflow steps from videos. However, these studies focused on only a single type of procedure. In this work, we analyze, for the first time, surgical step recognition on four different laparoscopic surgeries: Cholecystectomy, Right Hemicolectomy, Sleeve Gastrectomy, and Appendectomy. Inspired by the traditional apprenticeship model, in which surgical training is based on the Halstedian method, we paraphrase the \u201csee one, do one, teach one\u201d approach for the surgical intelligence domain as \u201ctrain one, classify one, teach one\u201d. In machine learning, this approach is often referred to as transfer learning. To analyze the impact of transfer learning across different laparoscopic procedures, we explore various time-series architectures and examine their performance on each target domain. We introduce a new architecture, the Time-Series Adaptation Network (TSAN), an architecture optimized for transfer learning of surgical step recognition, and we show how TSAN can be pre-trained using self-supervised learning on a Sequence Sorting task. Such pre-training enables TSAN to learn workflow steps of a new laparoscopic procedure type from only a small number of labeled samples from the target procedure. Our proposed architecture leads to better performance compared to other possible architectures, reaching over 90% accuracy when transferring from laparoscopic Cholecystectomy to the other three procedure types.",
        "authors": "Daniel Neimark, Omri Bar, Maya Zohar, Gregory D. Hager, Dotan Asselmann",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "33cb10acca0a612c6d474b2795cb88a9",
        "id": "C6",
        "or_id": "cTB4Qz3RzCl",
        "oral": "False",
        "pdf": "/proceedings/neimark21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/neimark21a.html",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_44_poster.pdf",
        "title": "\u201cTrain one, Classify one, Teach one\u201d - Cross-surgery transfer learning for surgical step recognition",
        "url": "papers/C6.html",
        "video": "/videos/full_44_video.mp4",
        "youtube_video_id": "FTi-RYaQbaM"
    },
    "C7": {
        "abstract": "Automated analyses of chest imaging in Coronavirus Disease 2019 (COVID-19) have largely focused on a single timepoint, usually at disease presentation, and have not explicitly taken into account temporal disease manifestations. We present a deep learning-based approach for prediction of imaging progression from serial chest radiographs (CXRs) of COVID-19 patients. Our method first utilizes convolutional neural networks (CNNs) for feature extraction from patches within the concerned lung zone, and also from neighboring areas to enhance the contextual phenotypic information. The framework further incorporates two distinct spatio-temporal Long Short Term Memory (LSTM) modules for effective predictions. The first LSTM module captures spatial dependencies between patches and the second exploits the temporal context of sequential CXR scans. The resulting network focuses on critical image regions that provide relevant information for learning the progression of lung infiltrates without the explicit need for infiltrate segmentation. The second LSTM provides an encoded context vector used as an input to a decoder module to predict future severity grades. Our novel multi-institutional dataset comprises sequential CXR scans from N=100 patients. Specifically, our framework predicts zone-wise disease severity for a patient on the last day by learning representations from the previous temporal CXRs. We design two baseline approaches - one using fine-tuned VGG-16 features and the other using radiomic descriptors. Experimental results demonstrate that our proposed approach outperforms both baselines in average accuracy by 10.33% and 12.16%, respectively, in predicting COVID-19 progression severity.",
        "authors": "Aishik Konwer, Joseph Bae, Gagandeep Singh, Rishabh Gattu, Syed Ali, Jeremy Green, Tej Phatak, Amit Gupta, Chao Chen, Joel Saltz, Prateek Prasanna",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "c3fc1ae73541ca0cb63e802885feb952",
        "id": "C7",
        "or_id": "96BhL_MERil",
        "oral": "False",
        "pdf": "/proceedings/konwer21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/konwer21a.html",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_123_poster.pdf",
        "title": "Predicting COVID-19 Lung Infiltrate Progression on Chest Radiographs Using Spatio-temporal LSTM based Encoder-Decoder Network",
        "url": "papers/C7.html",
        "video": "/videos/full_123_video.mp4",
        "youtube_video_id": "-1AOXy5GJ7E"
    },
    "C8": {
        "abstract": "Images offer a two-dimensional (2D) representation of a three-dimensional (3D) environment. However, in many biomedical tasks, a 3D view is crucial for diagnosis. Projecting structured light, such as a regular laser grid, onto the surface of interest allows to reconstruct its 3D structure. For reconstruction, it is crucial to correctly identify and assign each laser ray to its respective position in the laser grid. Current methods for this task use semi-automatic, yet highly manual annotations. Hence, a fully automatic, reliable method is desired. Here, we show that this assignment can be approached as an image registration. We first separate the laser rays from the background using semantic segmentation. We found that registration of the extracted laser rays directly to the fixed laser grid image fails, when we use state-of-the-art intensity-based image registration techniques, such as ANTs. Using our feature-based custom loss and a deep neural network, we are able to use a U-Net-like architecture to compute deformation fields to successfully register the laser rays onto the fixed image accompanied with a custom post-processing sorting step. Using synthetic data, we show that the network is in general able to learn affine and non-linear transformations. Our method is also robust to missing or occluded rays. Using an ex vivo dataset, we achieved an registration accuracy of 91%. In summary, we provide a new platform to perform feature-based registration and showcase this on a biomedical dataset. In future, we will evaluate different architectural designs and more complex datasets.",
        "authors": "Andreas M Kist, Julian Zilker, Michael D\u00f6llinger, Marion Semmler",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "18b3f989af37dbfb3adf7ae46ed72adc",
        "id": "C8",
        "or_id": "MzC8X6cMF2r",
        "oral": "False",
        "pdf": "/proceedings/kist21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/kist21a.html",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_110_poster.pdf",
        "title": "Feature-based image registration in structured light endoscopy",
        "url": "papers/C8.html",
        "video": "/videos/full_110_video.mp4",
        "youtube_video_id": "PIWjjUsIOPk"
    },
    "C9": {
        "abstract": "Intensity of acquired electron microscopy data is subjected to large variability due to the interplay of many different factors, such as microscope and camera settings used for data acquisition, sample thickness, specimen staining protocol and more. In this work, we developed an efficient method for performing intensity inhomogeneity correction on a single set of combined transmission electron microscopy (TEM) images and demonstrated its positive impact on training a neural network on these data. In addition, we investigated what impact different intensity standardization methods have on the training performance, both for data originating from a single source as well as from several different sources. As a concrete example, we considered the problem of segmenting mitochondria from EM data and demonstrated that we were able to obtain promising results when training our network on a large array of highly-variable in-house TEM data.",
        "authors": "Oleh Dzyubachyk, Roman I Koning, Aat A Mulder, M. Christina Avramut, Frank GA Faas, Abraham J Koster",
        "award": "",
        "chairs": "Sandy Engelhardt, Lena Maier-Hein",
        "cloudflare_video_id": "bd73a60a985e65dadd32a5e6294367ac",
        "id": "C9",
        "or_id": "MAUkVcDzDPA",
        "oral": "False",
        "pdf": "/proceedings/dzyubachyk21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/dzyubachyk21a.html",
        "schedule": " Wednesday 7th July\nC1-9 (short): Endoscopy and Validation Studies - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_86_poster.pdf",
        "title": "Intensity Correction and Standardization for Electron Microscopy Data",
        "url": "papers/C9.html",
        "video": "/videos/full_86_video.mp4",
        "youtube_video_id": "ea1RWfGWc64"
    },
    "D1": {
        "abstract": "Supervised learning is conditioned by the availability of labeled data, which are especially expensive to acquire in the field of medical image analysis. Making use of open-source data for pre-training or using domain adaptation can be a way to overcome this issue. However, pre-trained networks often fail to generalize to new test domains that are not distributed identically due to variations in tissue stainings, types, and textures. Additionally, current domain adaptation methods mainly rely on fully-labeled source datasets. In this work, we propose Self-Rule to Adapt (SRA) which takes advantage of self-supervised learning to perform domain adaptation and removes the burden of fully-labeled source datasets. SRA can effectively transfer the discriminative knowledge obtained from a few labeled source domain to a new target domain without requiring additional tissue annotations. Our method harnesses both domains\u2019 structures by capturing visual similarity with intra-domain and cross-domain self-supervision. We show that our proposed method outperforms baselines across diverse domain adaptation settings and further validate our approach to our in-house clinical cohort.",
        "authors": "Christian Abbet, Linda Studer, Andreas Fischer, Heather Dawson, Inti Zlobec, Behzad Bozorgtabar, Jean-Philippe Thiran",
        "award": "",
        "chairs": "Jannis Hagenah, Caroline Petitjean",
        "cloudflare_video_id": "0c33305de7e9ac6555c22c10f705da21",
        "id": "D1",
        "or_id": "VO7asaS5GUk",
        "oral": "True",
        "pdf": "/proceedings/abbet21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/abbet21a.html",
        "schedule": " Wednesday 7th July\nD1-3 (long): Unsupervised and Representation Learning - 16:00 - 16:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_16_poster.pdf",
        "title": "Self-Rule to Adapt: Learning Generalized Features from Sparsely-Labeled Data Using Unsupervised Domain Adaptation for Colorectal Cancer Tissue Phenotyping",
        "url": "papers/D1.html",
        "video": "/videos/full_16_video.mp4",
        "youtube_video_id": "Hp3DW8ib3j8"
    },
    "D10": {
        "abstract": "The quality of radiographs is of major importance for diagnosis and treatment planning. While most research regarding automated radiograph quality assessment uses technical features such as noise or contrast, we propose to use anatomical structures as more appropriate features. We show that based on such anatomical features, a modular deep-learning framework can serve as a quality control mechanism for the diagnostic quality of ankle radiographs. For evaluation, a dataset consisting of 950 ankle radiographs was collected and their quality was labeled by radiologists. We obtain an average accuracy of 94.1%, which is better than the expert radiologists are on average.",
        "authors": "Dominik Mairh\u00f6fer, Manuel Laufer, Paul Martin Simon, Malte Sieren, Arpad Bischof, Thomas K\u00e4ster, Erhardt Barth, J\u00f6rg Barkhausen, Thomas Martinetz",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "9634a968ce994c5ca0a1d02a327e04e2",
        "id": "D10",
        "or_id": "bj04hJss_xZ",
        "oral": "False",
        "pdf": "/proceedings/mairhoefer21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/mairhofer21a.html",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_171_poster.pdf",
        "title": "An AI-based Framework for Diagnostic Quality Assessment of Ankle Radiographs",
        "url": "papers/D10.html",
        "video": "/videos/full_171_video.mp4",
        "youtube_video_id": "8gcvB4gB9Ms"
    },
    "D11": {
        "abstract": "Chlamydia trachomatous is an infectious ocular condition that can cause the eyelid to turn inward so that one or more eyelashes touch the eyeball, a condition call trachomatous trichiasis (TT), which can lead to blindness. Community-based screeners are used in rural areas to identify patients with TT, who can then be referred for proper medical care. Having automatic methods to detect TT will reduce the amount of time required to train screeners and improve accuracy of detection. This paper proposes a method to automatically identify regions of an eye and identify TT, using photographs taken with smartphones in the field. The attention-based gated deep learning networks in combination with a regionidentification network can identify TT with an accuracy of 91%, sensitivity of 92% and specificity of 87%, showing that these methods have the potential to be deployed in the field.",
        "authors": "Juan Carlos Prieto, Hina Shah, Kasey Jones, Robert F Chew, Hashiya M. Kana, Jerusha Weaver, Rebecca M. Flueckiger, Scott McPherson, Emily W. Gower",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "abbe5f8ef7790d4e339374be2a2b86d1",
        "id": "D11",
        "or_id": "umb5xsy1-zS",
        "oral": "False",
        "pdf": "/proceedings/prieto21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/prieto21a.html",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_144_poster.pdf",
        "title": "Image Sequence Generation and Analysis via GRU and Attention for Trachomatous Trichiasis Classification",
        "url": "papers/D11.html",
        "video": "/videos/full_144_video.mp4",
        "youtube_video_id": "l_MyuuD_HXg"
    },
    "D12": {
        "abstract": "Contrastive learning is a form of self-supervision that can leverage unlabeled data to produce pretrained models. While contrastive learning has demonstrated promising results on natural image classification tasks, its application to medical imaging tasks like chest X-ray interpretation has been limited. In this work, we propose MoCo-CXR, which is an adaptation of the contrastive learning method Momentum Contrast (MoCo), to produce models with better representations and initializations for the detection of pathologies in chest X-rays. In detecting pleural effusion, we find that linear models trained on MoCo-CXR-pretrained representations outperform those without MoCo-CXR-pretrained representations, indicating that MoCo-CXR-pretrained representations are of higher-quality. End-to-end fine-tuning experiments reveal that a model initialized via MoCo-CXR-pretraining outperforms its non-MoCo-CXR-pretrained counterpart. We find that MoCo-CXR-pretraining provides the most benefit with limited labeled training data. Finally, we demonstrate similar results on a target Tuberculosis dataset unseen during pretraining, indicating that MoCo-CXR-pretraining endows models with representations and transferability that can be applied across chest X-ray datasets and tasks.",
        "authors": "Hari Sowrirajan, Jingbo Yang, Andrew Y. Ng, Pranav Rajpurkar",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "8d1f06d3ec92570032960f0fab4eb8cd",
        "id": "D12",
        "or_id": "LO7Su0-dPJl",
        "oral": "False",
        "pdf": "/proceedings/sowrirajan21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/sowrirajan21a.html",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_56_poster.pdf",
        "title": "MoCo Pretraining Improves Representation and Transferability of Chest X-ray Models",
        "url": "papers/D12.html",
        "video": "/videos/full_56_video.mp4",
        "youtube_video_id": "00TxDtbEka0"
    },
    "D2": {
        "abstract": "We propose a semantic similarity metric for image registration. Existing metrics like euclidean distance or normalized cross-correlation focus on aligning intensity values, giving difficulties with low intensity contrast or noise. Our approach learns dataset-specific features that drive the optimization of a learning-based registration model. We train both an unsupervised approach using an auto-encoder, and a semi-supervised approach using supplemental segmentation data to extract semantic features for image registration. Comparing to existing methods across multiple image modalities and applications, we achieve consistently high registration accuracy. A learned invariance to noise gives smoother transformations on low-quality images.",
        "authors": "Steffen Czolbe, Oswin Krause, Aasa Feragen",
        "award": "",
        "chairs": "Jannis Hagenah, Caroline Petitjean",
        "cloudflare_video_id": "8f137e99a048ba808b8a9ef322816698",
        "id": "D2",
        "or_id": "9M5cH--UdcC",
        "oral": "True",
        "pdf": "/proceedings/czolbe21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/czolbe21a.html",
        "schedule": " Wednesday 7th July\nD1-3 (long): Unsupervised and Representation Learning - 16:00 - 16:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_28_poster.pdf",
        "title": "Semantic similarity metrics for learned image registration",
        "url": "papers/D2.html",
        "video": "/videos/full_28_video.mp4",
        "youtube_video_id": "qHADp8I2iR4"
    },
    "D3": {
        "abstract": "The tumor microenvironment is an area of intense interest in cancer research and may be a clinically actionable aspect of cancer care. One way to study the tumor microenvironment is to characterize the spatial interactions between various types of nuclei in cancer tissue from H&E whole slide images, which require nucleus segmentation and classification. Current methods of nucleus classification rely on extensive labeling from pathologists and are limited by the number of categories a nucleus can be classified into. In this work, leveraging existing nucleus segmentation and contrastive representation learning methods, we developed a method that learns vector embeddings of nuclei based on their morphology in histopathology images. We show that the embeddings learned by this model capture distinctive morphological features of nuclei and can be used to group them into meaningful subtypes. These embeddings can provide a much richer characterization of the statistics of the spatial distribution of nuclei in cancer and open new possibilities in the quantitative study of the tumor microenvironment.",
        "authors": "Chao Feng, Chad Vanderbilt, Thomas Fuchs",
        "award": "",
        "chairs": "Jannis Hagenah, Caroline Petitjean",
        "cloudflare_video_id": "63d8f8b7ef0d08989e95c84f426d8d67",
        "id": "D3",
        "or_id": "uLtYvtWw8PH",
        "oral": "True",
        "pdf": "/proceedings/feng21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/feng21a.html",
        "schedule": " Wednesday 7th July\nD1-3 (long): Unsupervised and Representation Learning - 16:00 - 16:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_125_poster.pdf",
        "title": "Nuc2Vec: Learning Representations of Nuclei in Histopathology Images with Contrastive Loss",
        "url": "papers/D3.html",
        "video": "/videos/full_125_video.mp4",
        "youtube_video_id": "N8TiiujVJMc"
    },
    "D4": {
        "abstract": "Accurately determining the molecular subtype of breast cancer is an important factor for the prognosis of breast cancer patients, and can guide treatment selection. In this study, we report a multimodal deep learning with attention mechanism (MDLA) for predicting the molecular subtypes of breast cancer from mammography and ultrasound images. Incorporation of the attention mechanism improved diagnostic performance for predicting 4-class molecular subtypes with Matthews correlation coefficient (MCC) of 0.794. The MDLA can also discriminate between Luminal disease and non-luminal disease with areas under the receiver operating characteristic curve (AUC) of 0.855. This work thus provides a noninvasive imaging biomarker to predict the molecular subtypes of breast cancer.",
        "authors": "Tianyu Zhang, Luyi Han, Yuan Gao, Xin Wang, Regina Beets-Tan, Ritse Mann",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "9ccffa963bff5d552c2d394f6ab1622f",
        "id": "D4",
        "or_id": "GHNGMR1EAtN",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=GHNGMR1EAtN",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_33_poster.pdf",
        "title": "Predicting molecular subtypes of breast cancer using multimodal deep learning and incorporation of the attention mechanism",
        "url": "papers/D4.html",
        "video": "/videos/short_33_video.mp4",
        "youtube_video_id": "MDrVL1GAj34"
    },
    "D5": {
        "abstract": "Image classification on whole-slide-image (WSI) is a challenging task. A previous work based on Fisher vector encoding provided a novel end-to-end pipeline with promising accuracy and computational efficiency.\nHowever, this pipeline suffers from an accuracy drop when deployed to another dataset to perform the same task.\nThis poses a limitation on the practical use of the pipeline especially when the diagnoses of WSIs are hard to obtain.\nThis paper aims at providing a solution to mitigate the accuracy drop by using an unsupervised domain adaptation approach.\nWe propose to insert the domain classifiers into the pipeline in two stages to align the features during training. \nWe evaluate accuracy by calculating the confusion matrices before and after the adaptation on two datasets. We demonstrate that placing domain classifiers in different stages will boost accuracy.",
        "authors": "Yuchen Yang, Amir Akbarnejad, Nilanjan Ray, Gilbert Bigras",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "07aa0cc4b07030f7692c9fdfc527818e",
        "id": "D5",
        "or_id": "70gFxx5ytwh",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=70gFxx5ytwh",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_47_poster.pdf",
        "title": "Double adversarial domain adaptation for whole-slide-imageclassification",
        "url": "papers/D5.html",
        "video": "/videos/short_47_video.mp4",
        "youtube_video_id": "HBjJZM_X7k0"
    },
    "D6": {
        "abstract": "We use deep learning to age knee bone surfaces four years. We propose to encode an MRI-based bone surface in a spherical coordinate format, and use these spherical maps to predict shape changes in a 48 months time frame, in subjects with and without osteoarthritis. The experiments show that a 2D V-Net can predict bone surface shape with a mean absolute error of about 1 mm. Our code is available  at https://github.com/fcaliva/Bone_Shape_Virtual_Aging.",
        "authors": "Francesco Caliva, Alejandro Morales Martinez, Sharmila Majumdar, Valentina Pedoia",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "75e12b6e67a67b2a4eb2fea2d326188f",
        "id": "D6",
        "or_id": "1JP1g5htY6K",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=1JP1g5htY6K",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_133_poster.pdf",
        "title": "Virtual Bone Shape Aging",
        "url": "papers/D6.html",
        "video": "/videos/short_133_video.mp4",
        "youtube_video_id": "JgPPlzT1WoY"
    },
    "D7": {
        "abstract": "We extend the CycleGAN architecture with a style-based generator and show the efficacy of the proposed domain adaptation-based method between two histopathology image domains - Hematoxylin and Eosin (H&E) and HER2 immunohistochemically (IHC) images. Using the proposed method, we re-used large set of pre-existing annotations for detection of tumor infiltrating lymphocytes (TILs), which were originally done on H&E, towards a TIL detector applicable on HER2 IHC images. We provide analytical validation of the resulting TIL detector. Furthermore, we show that the detected stromal TIL densities are significantly prognostic as a biomarker for patient stratification on a triple-negative breast cancer (TNBC) cohort.",
        "authors": "Ansh Kapil, Armin Meier, Anatoliy Shumilov, Susanne Haneder, Helen Angell, G\u00fcnter Schmidt",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "536c4ab6f20f9b213f38ec88973b18d2",
        "id": "D7",
        "or_id": "IAuBCvaTKHr",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=IAuBCvaTKHr",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_138_poster.pdf",
        "title": "Breast cancer patient stratification using domain adaptation based lymphocyte detection in HER2 stained tissue sections",
        "url": "papers/D7.html",
        "video": "/videos/short_138_video.mp4",
        "youtube_video_id": "uPQITIqNJVk"
    },
    "D8": {
        "abstract": "Adhesions are an important cause of chronic pain following abdominal surgery. Recent developments in abdominal cine-MRI have enabled the non-invasive diagnosis of adhesions. Adhesions are identified on cine-MRI by the absence of sliding motion during movement. Diagnosis and mapping of adhesions  improves the management of patients with pain. Detection of abdominal adhesions on cine-MRI is challenging from both a radiological and deep learning perspective. We focus on classifying presence or absence of adhesions in sagittal abdominal cine-MRI series. We experimented with spatio-temporal deep learning architectures centered around a ConvGRU architecture. A hybrid architecture comprising a ResNet followed by a ConvGRU model allows to classify a whole time-series. Compared to a stand-alone ResNet with a two time-point (inspiration/expiration) input, we show an increase in classification performance (AUROC) from 0.74 to 0.83 (p<0.05). Our full temporal classification approach adds only a small amount (5%) of parameters to the entire architecture, which may be useful for other  medical imaging problems with a temporal dimension.",
        "authors": "Bram de Wilde, Richard P. G. ten Broek, Henkjan Huisman",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "cc47bccf82bba2f29eb434d997db1d85",
        "id": "D8",
        "or_id": "-KI5qmKvhKQ",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=-KI5qmKvhKQ",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_89_poster.pdf",
        "title": "Cine-MRI detection of abdominal adhesions with spatio-temporal deep learning",
        "url": "papers/D8.html",
        "video": "/videos/short_89_video.mp4",
        "youtube_video_id": "QKIFWr_qRK8"
    },
    "D9": {
        "abstract": "We investigate video-based deep learning approaches for detecting needle insertions in ultrasound videos. We introduce two efficient and conceptually simple extensions to convert standard 2D object detectors into video object detectors that make use of temporal information from a history of frames. We compare our approaches to a 2D baseline method that makes independent predictions per frame. Given the need to run in real-time on computationally restricted environments, emphasis is placed on low computational complexity.",
        "authors": "Jonathan Rubin, Alvin Chen, Anumod Odungattu Thodiyil, Raghavendra Srinivasa Naidu, Ramon Erkamp, Jon Fincke, Balasundar Raju",
        "award": "",
        "chairs": "Tal Arbel, Hans Meine",
        "cloudflare_video_id": "cdb30231447259480c93eb9dfb96ee32",
        "id": "D9",
        "or_id": "dVUHL5QhDhL",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=dVUHL5QhDhL",
        "pmlr_url": "",
        "schedule": " Wednesday 7th July\nD4-12 (short): Detection and Diagnosis 1 - 16:45 - 17:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_34_poster.pdf",
        "title": "Efficient Video-Based Deep Learning for Ultrasound Guided Needle Insertion",
        "url": "papers/D9.html",
        "video": "/videos/short_34_video.mp4",
        "youtube_video_id": "pBOrG8uRW6o"
    },
    "E1": {
        "abstract": "The growing demand for head magnetic resonance imaging (MRI) examinations, along with a global shortage of radiologists, has led to an increase in the time taken to report head MRI scans around the world.  For many neurological conditions, this delay can result in increased morbidity and mortality.  An automated triaging tool could reduce reporting times for abnormal examinations by identifying abnormalities at the time of imaging and prioritizing the reporting of these scans.  In this work, we present a convolutional neural network (CNN) for detecting clinically-relevant  abnormalities  in T2-weighted  head  MRI scans. Using a validated neuroradiology report classifier, we generated a labelled dataset of 43,754 scans from two large UK hospitals for model training, and demonstrate accurate classification (area under the receiver operating curve (AUC) = 0.943) on a test set of 800 scans labelled by a team of neuroradiologists.  Importantly,  when trained on scans from only a single hospital the model generalized to scans from the other hospital (\u2206AUC\u22640.02).  A simulation study demonstrated that our model would reduce the mean reporting time for abnormal scans from 28 days to 14 days and from 9 days to 5 days at the two hospitals, demonstrating feasibility for use in a clinical triage environment.",
        "authors": "David A Wood, Sina Kafiabadi, Ayisha Al Busaidi, Emily Guilhem, Antanas Montvila, Siddharth Agarwal, Jeremy Lynch, Matthew Townend, Gareth Barker, Sebastien Ourselin, James H Cole, Thomas C Booth",
        "award": "",
        "chairs": "Ivana Isgum, Carole Sudre",
        "cloudflare_video_id": "1bc2008505c69b87326dfcbd4ee2b267",
        "id": "E1",
        "or_id": "gh8qD_lAADe",
        "oral": "True",
        "pdf": "/proceedings/wood21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/wood21a.html",
        "schedule": " Thursday 8th July\nE1-3 (long): Detection and Diagnosis - 13:00 - 13:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_17_poster.pdf",
        "title": "Automated triaging of head MRI examinations using convolutional neural networks",
        "url": "papers/E1.html",
        "video": "/videos/full_17_video.mp4",
        "youtube_video_id": "iwDREZJEl_g"
    },
    "E10": {
        "abstract": "Modern generative adversarial networks (GANs) have been enabling the realistic generation of full 3D brain images by sampling from a latent space prior Z (i.e., random vectors) and mapping it to realistic images in X (e.g., 3D MRIs). To address the ubiquitous mode collapse issue, recent works have strongly imposed certain characteristics such as Gaussianness to the prior by also explicitly mapping X to Z via encoder. These efforts, however, fail to accurately map 3D brain images to the desirable prior, which the generator assumes to be sampling the random vectors from. On the other hand, Variational Auto-Encoding GAN (VAE-GAN) solves mode collapse by enforcing Gaussianness by two learned parameter, yet causes blurriness in images. In this work, we show how our cycle consistent embedding} GAN (CCE-GAN) both accurately encodes 3D MRIs to the standard normal prior, and maintains the quality of the generated images. We achieve this without a network-based code discriminator via the Wasserstein measure. We quantitatively and qualitatively assess the embeddings and the generated 3D MRIs using healthy T1-weighted MRIs from ADNI.",
        "authors": "Shibo Xing, Harsh Sinha, Seong Jae Hwang",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "fe998757d99d3f9914cf5614db0b019d",
        "id": "E10",
        "or_id": "jgBzGIG-kB",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=jgBzGIG-kB",
        "pmlr_url": "",
        "schedule": " Thursday 8th July\nE4-12 (short): Image Registration / Synthesis - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_117_poster.pdf",
        "title": "Cycle Consistent Embedding of 3D Brains with Auto-Encoding Generative Adversarial Networks",
        "url": "papers/E10.html",
        "video": "/videos/short_117_video.mp4",
        "youtube_video_id": "zoOcsdue9oI"
    },
    "E11": {
        "abstract": "While deep-learning-based imaging denoising techniques can improve the quality of low-dose computed tomography (CT) scans, repetitive 3D convolution operations cost significant computation resources and time. We present an efficient and accurate spatial-temporal convolution method to accelerate an existing denoising network based on the SRResNet. We trained and evaluated our model on our dataset containing 184 low-dose chest CT scans. We compared the performance of the proposed spatial-temporal convolution network to the SRResNet with full 3D convolutional layers. Using 8-bit quantization, we demonstrated a 7-fold speed-up during inference. Using lung nodule characterization as a driving task, we analyzed the impact on image quality and radiomic features. Our results show that our method achieves better perceptual quality, and the outputs are consistent with the SRResNet baseline outputs for some radiomics features (31 out of 57 total features). These observations together demonstrate that the proposed spatial-temporal method can be potentially useful for clinical applications where the computational resource is limited.",
        "authors": "Leihao Wei, William Hsu",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "c70c5f00d07212d92d5f969072da6ff0",
        "id": "E11",
        "or_id": "XHWqF4DlRr0",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=XHWqF4DlRr0",
        "pmlr_url": "",
        "schedule": " Thursday 8th July\nE4-12 (short): Image Registration / Synthesis - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_108_poster.pdf",
        "title": "Efficient and Accurate Spatial-Temporal Denoising Network for Low-dose CT Scans",
        "url": "papers/E11.html",
        "video": "/videos/short_108_video.mp4",
        "youtube_video_id": "qgfRGJ-4AR0"
    },
    "E12": {
        "abstract": "Automatic segmentation of retina lesions have been a long standing and challenging task for learning based models, mostly due to the lack of available and accurate lesion segmentation datasets. In this paper, we propose a two-step process for generating photo-realistic fundus images conditioned on synthetic \"ground truth\" semantic labels, and demonstrate its potential for further downstream tasks, such as, but not limited to; automated grading of diabetic retinopathy, dataset balancing, creating image examples for trainee ophthalmologists, etc.",
        "authors": "Joon-Ho Son, Amir Alansary, Daniel Rueckert, Bernhard Kainz, Benjamin Hou",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "b85756ed0dc46c3bf03b2d688768ab1b",
        "id": "E12",
        "or_id": "wiKDehhdnz",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=wiKDehhdnz",
        "pmlr_url": "",
        "schedule": " Thursday 8th July\nE4-12 (short): Image Registration / Synthesis - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_93_poster.pdf",
        "title": "Synthesis of Diabetic Retina Fundus Images Using Semantic Label Generation",
        "url": "papers/E12.html",
        "video": "/videos/short_93_video.mp4",
        "youtube_video_id": "O240rY_gnmg"
    },
    "E2": {
        "abstract": "In this paper, we propose a novel approach to overcome the problem of imbalanced datasets for object detection tasks, when the distribution is not uniform over all classes. The general idea is to compute a probability vector, encoding the probability for each image to be fed to the network during the training phase. This probability vector is computed by solving some quadratic optimization problem and ensures that all classes are seen with similar frequency. We apply this method to a fetal anatomies detection problem, and conduct a thorough statistical analysis of the resulting performance to show that it performs significantly better than two baseline models: one with images sampled uniformly and one implementing classical oversampling.",
        "authors": "Antoine Olivier, Caroline Raynaud",
        "award": "",
        "chairs": "Ivana Isgum, Carole Sudre",
        "cloudflare_video_id": "c4a1b5e8bbd6fd719373cd95c36716bc",
        "id": "E2",
        "or_id": "ZGvtypAfHiA",
        "oral": "True",
        "pdf": "/proceedings/olivier21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/olivier21a.html",
        "schedule": " Thursday 8th July\nE1-3 (long): Detection and Diagnosis - 13:00 - 13:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_29_poster.pdf",
        "title": "Balanced sampling for an object detection problem - application to fetal anatomies detection",
        "url": "papers/E2.html",
        "video": "/videos/full_29_video.mp4",
        "youtube_video_id": "lmAVPi98xW4"
    },
    "E3": {
        "abstract": "Pathological brain appearances may be so heterogeneous as to be intelligible only as anomalies, defined by their deviation from normality rather than any specific pathological characteristic. Amongst the hardest tasks in medical imaging, detecting such anomalies requires models of the normal brain that combine compactness with the expressivity of the complex, long-range interactions that characterise its structural organisation. These are requirements transformers have arguably greater potential to satisfy than other current candidate architectures, but their application has been inhibited by their demands on data and computational resource. Here we combine the latent representation of vector quantised variational autoencoders with an ensemble of autoregressive transformers to enable unsupervised anomaly detection and segmentation defined by deviation from healthy brain imaging data, achievable at low computational cost, within relative modest data regimes. We compare our method to current state-of-the-art approaches across a series of experiments involving synthetic and real pathological lesions. On real lesions, we train our models on 15,000 radiologically normal participants from UK Biobank, and evaluate performance on four different brain MR datasets with small vessel disease, demyelinating lesions, and tumours. We demonstrate superior anomaly detection performance both image-wise and pixel-wise, achievable without post-processing. These results draw attention to the potential of transformers in this most challenging of imaging tasks.",
        "authors": "Walter Hugo Lopez Pinaya, Petru-Daniel Tudosiu, Robert Gray, Geraint Rees, Parashkev Nachev, S\u00e9bastien Ourselin, M. Jorge Cardoso",
        "award": "",
        "chairs": "Ivana Isgum, Carole Sudre",
        "cloudflare_video_id": "526c672bc0228c1cd0603889d9ca5230",
        "id": "E3",
        "or_id": "Z1tlNqbCpp_",
        "oral": "True",
        "pdf": "/proceedings/pinaya21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/pinaya21a.html",
        "schedule": " Thursday 8th July\nE1-3 (long): Detection and Diagnosis - 13:00 - 13:30 (UTC+2)",
        "short": "False",
        "slides": "/slides/full_42_poster.pdf",
        "title": "Unsupervised Brain Anomaly Detection and Segmentation with Transformers",
        "url": "papers/E3.html",
        "video": "/videos/full_42_video.mp4",
        "youtube_video_id": "cSGtcb8nu9w"
    },
    "E4": {
        "abstract": "Deep learning has the potential to substantially improve inter-subject alignment for shape and atlas analysis. So far most highly accurate supervised approaches require dense manual annotations and complex multi-level architectures but may still be susceptible to label bias. We present a radically different approach for learning to estimate large deformations without expert supervision. Instead of regressing displacements, we train a 3D DeepLab network to predict automatic supervoxel segmentations. To enable consistent supervoxel labels, we use the warping field of a conventional approach and increase the accuracy by sampling multiple complementary over-segmentations. We experimentally demonstrate that 1) our deformable supervoxels are less sensitive to large initial misalignment and can combine linear and nonlinear registration and 2) using this self-supervised classification loss is more robust to noisy ground truth and leads to better convergence than direct regression as supervision.",
        "authors": "Mattias P Heinrich",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "36c70776fc40277790fd3297bc260106",
        "id": "E4",
        "or_id": "zZA5TpNdC4Z",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=zZA5TpNdC4Z",
        "pmlr_url": "",
        "schedule": " Thursday 8th July\nE4-12 (short): Image Registration / Synthesis - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_122_poster.pdf",
        "title": "Rethinking the Design of Learning based Inter-Patient Registration using Deformable Supervoxels ",
        "url": "papers/E4.html",
        "video": "/videos/short_122_video.mp4",
        "youtube_video_id": "Uhe3uB9NhBQ"
    },
    "E5": {
        "abstract": "The Japan Brain/MINDS Project aims at studying the neural networks controlling higher brain functions in the marmoset. As part of it, we develop an image processing pipeline for marmoset brain imaging data, where various microscopy images of different modalities need to be co-registered. In initial experiments, multi-modal image registration frequently failed due to an erroneous initialization. Our data set includes images of Nissl stained brain sections, backlit images as well as images of neural tracer injections using two-photon microscopy. More than 10000 high-resolution 2D images required co-registration, a large amount that demands a reliable automation process. We implemented a semi-supervised image-to-image translation which allowed a robust image alignment initialization. With such an initial alignment, all images can be successfully registered using a state-of-the-art multi-modal image registration algorithm.",
        "authors": "henrik skibbe, akiya watakabe, Febrian Rachmadi, Carlos Enrique Gutierrez, Ken Nakae, tetsuo yamamori",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "12456ac038e423cbdd9a51867ea7103b",
        "id": "E5",
        "or_id": "GOhAojdaLg",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=GOhAojdaLg",
        "pmlr_url": "",
        "schedule": " Thursday 8th July\nE4-12 (short): Image Registration / Synthesis - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_125_poster.pdf",
        "title": "Semi-supervised Image-to-Image translation for robust image registration",
        "url": "papers/E5.html",
        "video": "/videos/short_125_video.mp4",
        "youtube_video_id": "QNdFADBzJ2Q"
    },
    "E6": {
        "abstract": "In the last decade, convolutional neural networks (ConvNets) have dominated and achieved state-of-the-art performances in a variety of medical imaging applications. However, the performances of ConvNets are still limited by lacking the understanding of long-range spatial relations in an image. The recently proposed Vision Transformer (ViT) for image classification uses a purely self-attention-based model that learns long-range spatial relations to focus on the relevant parts of an image. Nevertheless, ViT emphasizes the low-resolution features because of the consecutive downsamplings, result in a lack of detailed localization information, making it unsuitable for image registration. Recently, several ViT-based image segmentation methods have been combined with ConvNets to improve the recovery of detailed localization information. Inspired by them, we present ViT-V-Net, which bridges ViT and ConvNet to provide volumetric medical image registration. The experimental results presented here demonstrate that the proposed architecture achieves superior performance to several top-performing registration methods.",
        "authors": "Junyu Chen, Yufan He, Eric Frey, Ye Li, Yong Du",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "1dee9a737a9d785259de7c0f7fe7e78f",
        "id": "E6",
        "or_id": "h3HC1EU7AEz",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=h3HC1EU7AEz",
        "pmlr_url": "",
        "schedule": " Thursday 8th July\nE4-12 (short): Image Registration / Synthesis - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_4_poster.pdf",
        "title": "ViT-V-Net: Vision Transformer for Unsupervised Volumetric Medical Image Registration",
        "url": "papers/E6.html",
        "video": "/videos/short_4_video.mp4",
        "youtube_video_id": "pxKCbsEcm2c"
    },
    "E7": {
        "abstract": "Training deep learning based medical image registration methods involves the challenge of finding a suitable metric. To avoid the difficulty of choosing a metric for multimodal image registration, we propose a completely new concept relying on geometric instead of metric supervision with three-way registration cycles. Therefore, we create a synthetic image by applying a synthetic transformation on one of the input images. This leads to cycles that for each pair of input images comprise two multimodal transformations to be estimated and one known synthetic monomodal transformation. We minimise the discrepancy between the combined multimodal transformations and the synthetic monomodal transformation. By minimising this cycle discrepancy, we are able to learn multimodal registration between CT and MRI without metric supervision. Our method outperforms state-of-the-art metric supervision and comes very close to fully-supervised learning with ground truth labels.",
        "authors": "Hanna Siebert, Lasse Hansen, Mattias P Heinrich",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "bfd9ba4d5873e60273a645350bc8154e",
        "id": "E7",
        "or_id": "sua3vlnkmEv",
        "oral": "False",
        "pdf": "https://openreview.net/pdf?id=sua3vlnkmEv",
        "pmlr_url": "",
        "schedule": " Thursday 8th July\nE4-12 (short): Image Registration / Synthesis - 13:45 - 14:30 (UTC+2)",
        "short": "True",
        "slides": "/slides/short_128_poster.pdf",
        "title": "Learning a Metric without Supervision: Multimodal Registration using Synthetic Cycle Discrepancy",
        "url": "papers/E7.html",
        "video": "/videos/short_128_video.mp4",
        "youtube_video_id": "s1CT-1Qc3jw"
    },
    "E8": {
        "abstract": "We present a deep learning (DL) registration framework for fast mono-modal and multi-modal image registration using differentiable mutual information and diffeomorphic B-spline free-form deformation (FFD). Deep learning registration has been shown to achieve competitive accuracy and significant speedups from traditional iterative registration methods. In this paper, we propose to use a B-spline FFD parameterisation of Stationary Velocity Field (SVF) to in DL registration in order to achieve smooth diffeomorphic deformation while being computationally-efficient. In contrast to most DL registration methods which use intensity similarity metrics that assume linear intensity relationship, we apply a differentiable variant of a classic similarity metric, mutual information, to achieve robust mono-modal and multi-modal registration. We carefully evaluated our proposed framework on mono- and multi-modal registration using 3D brain MR images and 2D cardiac MR images.",
        "authors": "Huaqi Qiu, Chen Qin, Andreas Schuh, Kerstin Hammernik, Daniel Rueckert",
        "award": "",
        "chairs": "Alessa Hering, Herv\u00e9 Lombaert",
        "cloudflare_video_id": "b2338cd525b89da77466dd1b12a5a5d1",
        "id": "E8",
        "or_id": "eSI9Qh2DJhN",
        "oral": "False",
        "pdf": "/proceedings/qiu21.pdf",
        "pmlr_url": "https://proceedings.mlr.press/v143/qiu21a.html",