Skip to content

Commit 2ca3179

Browse files
pancettagithub-actions[bot]
authored andcommitted
updated pint.bib using bibbot
1 parent 91bdf7b commit 2ca3179

1 file changed

Lines changed: 9 additions & 0 deletions

File tree

_bibliography/pint.bib

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8641,6 +8641,15 @@ @article{HonEtAl2026
86418641
year = {2026},
86428642
}
86438643

8644+
@unpublished{HuangEtAl2026,
8645+
abstract = {Simulating large-scale microswimmer dynamics in viscous fluid poses significant challenges due to the coupled high spatial and temporal complexity. Conventional high-performance computing (HPC) methods often address these two dimensions in isolation, leaving a critical gap for synergistic acceleration. This paper introduces a heterogeneous CPU--GPU computing framework specifically optimized for the long-time simulation of filamentous microswimmers in viscous fluid. We propose a two-level parallelization strategy: (1) high-intensity GPU kernels to resolve the quadratic spatial interactions given by the Method of Regularized Stokeslets (MRS), and (2) a distributed MPI-GPU pipelined Parareal architecture to exploit temporal concurrency. By mapping the asynchronous pipeline onto multiple GPU devices, our framework effectively overlaps coarse and fine propagators, overcoming the serial bottlenecks of traditional Parareal method. Furthermore, we employ a GPU-optimized numerical routine for computing the matrix square root arising in the numerical scheme of the filamentous microswimmer simulations. Theoretical analysis of the efficiency improvement of the pipelined Parareal is presented. Numerical experiments demonstrate that the proposed framework achieves order-of-magnitude speedups over CPU-only methods, providing a scalable pathway for simulating complex emergent behaviors in large-scale biology and physics systems.},
8646+
author = {Ruixiang Huang and Weifan Liu},
8647+
howpublished = {arXiv:2604.12083v1 [cs.DC]},
8648+
title = {Accelerating Microswimmer Simulations via a Heterogeneous Pipelined Parallel-in-Time Framework},
8649+
url = {https://arxiv.org/abs/2604.12083v1},
8650+
year = {2026},
8651+
}
8652+
86448653
@unpublished{JiangEtAl2026,
86458654
abstract = {We present a new training methodology for transformers using a multilevel, layer-parallel approach. Through a neural ODE formulation of transformers, our application of a multilevel parallel-in-time algorithm for the forward and backpropagation phases of training achieves parallel acceleration over the layer dimension. This dramatically enhances parallel scalability as the network depth increases, which is particularly useful for increasingly large foundational models. However, achieving this introduces errors that cause systematic bias in the gradients, which in turn reduces convergence when closer to the minima. We develop an algorithm to detect this critical transition and either switch to serial training or systematically increase the accuracy of layer-parallel training. Results, including BERT, GPT2, ViT, and machine translation architectures, demonstrate parallel-acceleration as well as accuracy commensurate with serial pre-training while fine-tuning is unaffected.},
86468655
author = {Shuai Jiang and Marc Salvado and Eric C. Cyr and Alena Kopaničáková and Rolf Krause and Jacob B. Schroder},

0 commit comments

Comments
 (0)