Skip to content

Commit 5c0e946

Browse files
authored
ggml-hexagon: cpy: add contiguous fast-path in reshape copy (ggml-org#23076)
1 parent 3e037f3 commit 5c0e946

1 file changed

Lines changed: 23 additions & 0 deletions

File tree

ggml/src/ggml-hexagon/htp/cpy-ops.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,29 @@ static void cpy_thread_sametype_reshape(struct htp_copy_context * ct, struct htp
8888
const uint32_t ir0 = dr * ith;
8989
const uint32_t ir1 = (ir0 + dr) < nr ? (ir0 + dr) : nr;
9090

91+
// Fast path: when both src0 and dst are contiguous in memory
92+
// Replace the element-by-element loop with a single bulk HVX copy per (i03, i02) slice.
93+
const bool src0_contig = (nb00 == ct->src0_type_size) &&
94+
(nb01 == ne00 * nb00) &&
95+
(nb02 == ne01 * nb01) &&
96+
(nb03 == ne02 * nb02);
97+
const bool dst_contig = (nb0 == ct->dst_type_size) &&
98+
(nb1 == ne0 * nb0) &&
99+
(nb2 == ne1 * nb1) &&
100+
(nb3 == ne2 * nb2);
101+
102+
if (src0_contig && dst_contig) {
103+
for (int64_t i03 = 0; i03 < ne03; i03++) {
104+
for (int64_t i02 = 0; i02 < ne02; i02++) {
105+
uint8_t * src_ptr = (uint8_t *) src0->data + i03*nb03 + i02*nb02 + ir0*nb01;
106+
uint32_t flat = ((i03*ne02 + i02)*ne01 + ir0) * ne00;
107+
uint8_t * dst_ptr = (uint8_t *) dst->data + flat * ct->src0_type_size;
108+
hvx_copy_uu(dst_ptr, src_ptr, (ir1 - ir0) * ne00, ct->src0_type_size);
109+
}
110+
}
111+
return;
112+
}
113+
91114
// dst counters
92115
int64_t k10 = 0;
93116
int64_t i11 = 0;

0 commit comments

Comments
 (0)