Skip to content

Commit 41a63be

Browse files
hexagon: make vmem and buffer-size configurable (#22487)
* hexagon: allow host to set max vmem size We use a sane default but it's helpful to allow for an override if needed. * hexagon: add support for measuring vmem space and move pinned mmaping management to host * hexagon: update vmem checks to use uint64 * hexagon: bump op buffers to 16 (matches max mmaps) * hexagon: bump default vmem to 3.2GB * hexagon: add support for autodetecting vmem space and some logging cleanup in that area * hexagon: fix whitespace warnings * Update scripts/snapdragon/adb/run-cli.sh Co-authored-by: Pascal <admin@serveurperso.com> * hex-adb: fix run-completion script --------- Co-authored-by: Pascal <admin@serveurperso.com>
1 parent 098705a commit 41a63be

7 files changed

Lines changed: 180 additions & 121 deletions

File tree

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 143 additions & 95 deletions
Large diffs are not rendered by default.

ggml/src/ggml-hexagon/htp/htp-ctx.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ struct htp_mmap {
2020
uint64_t size;
2121
uint64_t base;
2222
uint32_t fd;
23-
uint32_t pinned;
23+
uint32_t reserved;
2424
};
2525

2626
// Scratchpad state
@@ -77,6 +77,8 @@ struct htp_context {
7777
atomic_bool vtcm_valid;
7878
atomic_bool vtcm_needs_release;
7979

80+
uint64_t max_vmem;
81+
8082
struct htp_ops_context octx;
8183

8284
#ifdef HTP_HAS_HMX

ggml/src/ggml-hexagon/htp/htp-ops.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,11 @@ enum htp_op_code {
9090
#define HTP_OP_MAX_INPUTS 6 // aka GGML_MAX_SRCS
9191
#define HTP_OP_MAX_PARAMS 16 // aka GGML_MAX_OP_PARAMS
9292

93-
#define HTP_OP_MAX_BUFS 8
93+
#define HTP_OP_MAX_BUFS 16
9494
#define HTP_OP_MAX_REQS 256
9595
#define HTP_OP_MAX_TENSORS (HTP_OP_MAX_REQS * HTP_OP_MAX_INPUTS + HTP_OP_MAX_REQS)
9696

97-
#if __HVX_ARCH__ < 75
98-
#define HTP_OP_MAX_VMEM (3167538380u)
99-
#else
100-
#define HTP_OP_MAX_VMEM (3221225472u)
101-
#endif
97+
#define HTP_OP_MAX_VMEM_DEFAULT (3355443200u)
10298

10399
#define HTP_MMAP_MAX_VMEM (2147483648u)
104100

ggml/src/ggml-hexagon/htp/htp_iface.idl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ struct htp_iface_pmu_conf {
1111
};
1212

1313
interface htp_iface : remote_handle64 {
14-
AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx);
14+
AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx, in uint64 max_vmem);
1515
AEEResult stop();
16-
AEEResult mmap(in uint32 fd, in uint32 size, in uint32 pinned);
16+
AEEResult mmap(in uint32 fd, in uint32 size);
1717
AEEResult munmap(in uint32 fd);
1818
AEEResult profiler(in uint32 mode, in htp_iface_pmu_conf pmu);
1919
AEEResult etm(in uint32 enable);

ggml/src/ggml-hexagon/htp/main.c

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ AEEResult htp_iface_close(remote_handle64 handle) {
210210
return AEE_SUCCESS;
211211
}
212212

213-
AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32 pinned) {
213+
AEEResult htp_iface_mmap(remote_handle64 handle, uint32_t fd, uint32_t size) {
214214
struct htp_context * ctx = (struct htp_context *) handle;
215215
if (!ctx) {
216216
return AEE_EBADPARM;
@@ -220,7 +220,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
220220
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) {
221221
struct htp_mmap *m = &ctx->mmap[i];
222222
if (m->fd == fd) {
223-
m->pinned = pinned;
224223
return AEE_SUCCESS;
225224
}
226225
}
@@ -229,7 +228,7 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
229228
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) {
230229
struct htp_mmap *m = &ctx->mmap[i];
231230
if (!m->size) {
232-
FARF(HIGH, "mmap : fd %u size %u pinned %u", fd, size, pinned);
231+
FARF(HIGH, "mmap : fd %u size %u", fd, size);
233232
#if __HVX_ARCH__ > 73
234233
void *va = HAP_mmap2(NULL, size, HAP_PROT_READ | HAP_PROT_WRITE, 0, fd, 0);
235234
#else
@@ -248,7 +247,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32
248247
m->base = (uint64_t) va;
249248
m->fd = fd;
250249
m->size = size;
251-
m->pinned = pinned;
252250

253251
return AEE_SUCCESS;
254252
}
@@ -275,7 +273,6 @@ AEEResult htp_iface_munmap(remote_handle64 handle, uint32 fd) {
275273
m->size = 0;
276274
m->base = NULL;
277275
m->fd = -1;
278-
m->pinned = 0;
279276
}
280277
}
281278

@@ -358,7 +355,7 @@ static void vtcm_free(struct htp_context * ctx) {
358355
static void htp_packet_callback(dspqueue_t queue, int error, void * context);
359356
static void htp_error_callback(dspqueue_t queue, int error, void * context);
360357

361-
AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx) {
358+
AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx, uint64_t max_vmem) {
362359
struct htp_context * ctx = (struct htp_context *) handle;
363360

364361
if (!ctx) {
@@ -376,12 +373,12 @@ AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_que
376373
htp_error_callback, // Error callback; no errors expected on the DSP
377374
(void *) ctx, // Callback context
378375
&ctx->queue);
379-
380376
if (err) {
381377
FARF(ERROR, "Queue import failed with 0x%08x", (unsigned) err);
382378
return err;
383379
}
384380

381+
ctx->max_vmem = max_vmem;
385382
ctx->thread_id = qurt_thread_get_id();
386383
ctx->thread_prio = qurt_thread_get_priority(ctx->thread_id);
387384

@@ -622,8 +619,8 @@ static inline bool reuse_buf(struct htp_context *ctx, uint32_t *m_reuse, struct
622619
}
623620

624621
static inline void drop_mmap(struct htp_context *ctx, struct htp_mmap *m) {
625-
if (m->size && !m->pinned) {
626-
FARF(HIGH, "unmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned);
622+
if (m->size) {
623+
FARF(HIGH, "unmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size);
627624
#if __HVX_ARCH__ > 73
628625
HAP_munmap2((void *) m->base, m->size);
629626
#else
@@ -660,9 +657,8 @@ static inline void mmap_buf(struct htp_context *ctx, struct htp_buf_desc *b) {
660657
m->base = b->base = (uint64_t) va;
661658
m->fd = b->fd;
662659
m->size = b->size;
663-
m->pinned = 0;
664660

665-
FARF(HIGH, "mmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned);
661+
FARF(HIGH, "mmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size);
666662
return;
667663
}
668664
}
@@ -672,8 +668,8 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin
672668
uint32_t m_reuse = 0; // mmap reuse mask (index from ctx->mmap array)
673669
uint32_t b_reuse = 0; // buf reuse count
674670

675-
size_t m_vmem = 0; // mapped vmem
676-
size_t e_vmem = 0; // extra vmem
671+
uint64_t m_vmem = 0; // mapped vmem
672+
uint64_t e_vmem = 0; // extra vmem
677673

678674
// See what we can reuse
679675
for (uint32_t i=0; i < n_bufs; i++) {
@@ -687,9 +683,10 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin
687683
// See how much vmem we have mmaped right now
688684
for (uint32_t i=0; i<HTP_MAX_MMAPS; i++) { m_vmem += ctx->mmap[i].size; }
689685

690-
FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu n-bufs %u b-reuse %u", m_vmem, e_vmem, n_bufs, b_reuse);
686+
FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu max-vmem %zu : n-bufs %u b-reuse %u",
687+
(size_t) m_vmem, (size_t) e_vmem, (size_t) ctx->max_vmem, n_bufs, b_reuse);
691688

692-
if ((m_vmem + e_vmem) > HTP_OP_MAX_VMEM) {
689+
if ((m_vmem + e_vmem) > ctx->max_vmem) {
693690
// Drop unused mappings
694691
for (uint32_t i=0; i < HTP_MAX_MMAPS; i++) {
695692
bool used = m_reuse & (1<<i);

scripts/snapdragon/adb/run-cli.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,23 @@ opqueue=
5454
opflt=
5555
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
5656

57+
vmem=
58+
[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM"
59+
60+
mbuf=
61+
[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB"
62+
vmem=
63+
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
64+
65+
mbuf=
66+
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
5767
set -x
5868

5969
adb $adbserial $adbhost shell " \
6070
cd $basedir; ulimit -c unlimited; \
6171
LD_LIBRARY_PATH=$basedir/$branch/lib \
6272
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
63-
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \
73+
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
6474
./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
6575
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
6676
--ctx-size 8192 --ubatch-size 256 -fa on \

scripts/snapdragon/adb/run-completion.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,19 @@ opqueue=
5454
opflt=
5555
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
5656

57+
vmem=
58+
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
59+
60+
mbuf=
61+
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
62+
5763
set -x
5864

5965
adb $adbserial $adbhost shell " \
6066
cd $basedir; ulimit -c unlimited; \
6167
LD_LIBRARY_PATH=$basedir/$branch/lib \
6268
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
63-
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \
69+
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
6470
./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
6571
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
6672
--ctx-size 8192 --ubatch-size 256 -fa on \

0 commit comments

Comments
 (0)