Skip to content

Commit f6cbd4b

Browse files
committed
Index searching for sparse waveforms is now done on the GPU where possible
1 parent f118088 commit f6cbd4b

File tree

5 files changed

+202
-24
lines changed

5 files changed

+202
-24
lines changed

src/ngscopeclient/WaveformArea.cpp

Lines changed: 70 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,6 @@ DisplayedChannel::DisplayedChannel(StreamDescriptor stream, Session& session)
8080
m_rasterizedWaveform.SetCpuAccessHint(AcceleratorBuffer<float>::HINT_LIKELY);
8181
m_rasterizedWaveform.SetGpuAccessHint(AcceleratorBuffer<float>::HINT_LIKELY);
8282

83-
//Use pinned memory for index buffer since it should only be read once
84-
m_indexBuffer.SetCpuAccessHint(AcceleratorBuffer<uint32_t>::HINT_LIKELY);
85-
m_indexBuffer.SetGpuAccessHint(AcceleratorBuffer<uint32_t>::HINT_UNLIKELY);
86-
8783
//Create tone map pipeline depending on waveform type
8884
switch(m_stream.GetType())
8985
{
@@ -111,6 +107,23 @@ DisplayedChannel::DisplayedChannel(StreamDescriptor stream, Session& session)
111107
m_toneMapPipe = make_shared<ComputePipeline>(
112108
"shaders/WaveformToneMap.spv", 1, sizeof(WaveformToneMapArgs), 1);
113109
}
110+
111+
//If we have native int64 support we can do the index search for sparse waveforms on the GPU
112+
if(g_hasShaderInt64)
113+
{
114+
m_indexSearchComputePipeline = make_shared<ComputePipeline>(
115+
"shaders/IndexSearch.spv", 2, sizeof(IndexSearchConstants));
116+
117+
//Use GPU local memory for index buffer
118+
m_indexBuffer.SetCpuAccessHint(AcceleratorBuffer<uint32_t>::HINT_LIKELY);
119+
m_indexBuffer.SetGpuAccessHint(AcceleratorBuffer<uint32_t>::HINT_LIKELY);
120+
}
121+
else
122+
{
123+
//Use pinned memory for index buffer since it should only be read once
124+
m_indexBuffer.SetCpuAccessHint(AcceleratorBuffer<uint32_t>::HINT_LIKELY);
125+
m_indexBuffer.SetGpuAccessHint(AcceleratorBuffer<uint32_t>::HINT_UNLIKELY);
126+
}
114127
}
115128

116129
DisplayedChannel::~DisplayedChannel()
@@ -1339,7 +1352,6 @@ ImVec2 WaveformArea::ClosestPointOnLineSegment(ImVec2 lineA, ImVec2 lineB, ImVec
13391352
void WaveformArea::RenderSpectrumPeaks(ImDrawList* list, shared_ptr<DisplayedChannel> channel)
13401353
{
13411354
auto stream = channel->GetStream();
1342-
auto data = stream.GetData();
13431355
auto& peaks = dynamic_cast<PeakDetectionFilter*>(stream.m_channel)->GetPeaks();
13441356

13451357
//TODO: add a preference for peak circle color and size?
@@ -2107,38 +2119,72 @@ void WaveformArea::RasterizeAnalogOrDigitalWaveform(
21072119
}
21082120

21092121
//Bind input buffers
2110-
if(uadata)
2111-
comp->BindBufferNonblocking(1, uadata->m_samples, cmdbuf);
2112-
if(uddata)
2113-
comp->BindBufferNonblocking(1, uddata->m_samples, cmdbuf);
21142122
if(sdata)
21152123
{
2124+
//Calculate indexes for X axis
2125+
auto& ibuf = channel->GetIndexBuffer();
2126+
2127+
//FIXME: what still depends on m_offsets CPU side??
2128+
//If we don't copy this, nothing is drawn
2129+
sdata->m_offsets.PrepareForCpuAccessNonblocking(cmdbuf);
2130+
2131+
//If we have native int64, do this on the GPU
2132+
if(g_hasShaderInt64)
2133+
{
2134+
IndexSearchConstants cfg;
2135+
cfg.len = data->size();
2136+
cfg.w = w;
2137+
cfg.xscale = xscale;
2138+
cfg.offset_samples = offset_samples;
2139+
2140+
const uint32_t threadsPerBlock = 64;
2141+
const uint32_t numBlocks = (w | (threadsPerBlock - 1)) / threadsPerBlock;
2142+
2143+
auto ipipe = channel->GetIndexSearchPipeline();
2144+
ipipe->BindBufferNonblocking(0, sdata->m_offsets, cmdbuf);
2145+
ipipe->BindBufferNonblocking(1, ibuf, cmdbuf, true);
2146+
ipipe->Dispatch(cmdbuf, cfg, numBlocks);
2147+
ipipe->AddComputeMemoryBarrier(cmdbuf);
2148+
ibuf.MarkModifiedFromGpu();
2149+
}
2150+
2151+
//otherwise CPU fallback
2152+
else
2153+
{
2154+
ibuf.PrepareForCpuAccess();
2155+
sdata->m_offsets.PrepareForCpuAccess();
2156+
for(size_t i=0; i<w; i++)
2157+
{
2158+
int64_t target = floor(i / xscale) + offset_samples;
2159+
ibuf[i] = BinarySearchForGequal(
2160+
sdata->m_offsets.GetCpuPointer(),
2161+
data->size(),
2162+
target);
2163+
2164+
if(i < 16)
2165+
LogDebug("ibuf[%zu] = %d\n", i, ibuf[i]);
2166+
}
2167+
ibuf.MarkModifiedFromCpu();
2168+
}
2169+
2170+
//Bind the buffers
21162171
if(sadata)
21172172
comp->BindBufferNonblocking(1, sadata->m_samples, cmdbuf);
21182173
if(sddata)
21192174
comp->BindBufferNonblocking(1, sddata->m_samples, cmdbuf);
21202175

21212176
//Map offsets and, if requested, durations
21222177
comp->BindBufferNonblocking(2, sdata->m_offsets, cmdbuf);
2178+
comp->BindBufferNonblocking(3, ibuf, cmdbuf);
21232179
if(channel->ShouldMapDurations())
21242180
comp->BindBufferNonblocking(4, sdata->m_durations, cmdbuf);
2125-
2126-
//Calculate indexes for X axis
2127-
auto& ibuf = channel->GetIndexBuffer();
2128-
ibuf.PrepareForCpuAccess();
2129-
sdata->m_offsets.PrepareForCpuAccess();
2130-
for(size_t i=0; i<w; i++)
2131-
{
2132-
int64_t target = floor(i / xscale) + offset_samples;
2133-
ibuf[i] = BinarySearchForGequal(
2134-
sdata->m_offsets.GetCpuPointer(),
2135-
data->size(),
2136-
target);
2137-
}
2138-
ibuf.MarkModifiedFromCpu();
2139-
comp->BindBufferNonblocking(3, ibuf, cmdbuf);
21402181
}
21412182

2183+
if(uadata)
2184+
comp->BindBufferNonblocking(1, uadata->m_samples, cmdbuf);
2185+
if(uddata)
2186+
comp->BindBufferNonblocking(1, uddata->m_samples, cmdbuf);
2187+
21422188
//Bind output texture and bail if there's nothing there
21432189
auto& imgOut = channel->GetRasterizedWaveform();
21442190
if(imgOut.empty())

src/ngscopeclient/WaveformArea.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,15 @@ struct ConfigPushConstants
145145
float persistScale;
146146
};
147147

148+
class IndexSearchConstants
149+
{
150+
public:
151+
int64_t offset_samples;
152+
float xscale;
153+
uint32_t len;
154+
uint32_t w;
155+
};
156+
148157
/**
149158
@brief State for a single peak label
150159
@@ -335,6 +344,9 @@ class DisplayedChannel
335344
std::shared_ptr<ComputePipeline> GetToneMapPipeline()
336345
{ return m_toneMapPipe; }
337346

347+
std::shared_ptr<ComputePipeline> GetIndexSearchPipeline()
348+
{ return m_indexSearchComputePipeline; }
349+
338350
bool ZeroHoldFlagSet()
339351
{
340352
return m_stream.GetFlags() & Stream::STREAM_DO_NOT_INTERPOLATE;
@@ -432,6 +444,9 @@ class DisplayedChannel
432444
///@brief Compute pipeline for rendering sparse digital waveforms
433445
std::shared_ptr<ComputePipeline> m_sparseDigitalComputePipeline;
434446

447+
///@brief Compute pipeline for index searching
448+
std::shared_ptr<ComputePipeline> m_indexSearchComputePipeline;
449+
435450
///@brief Y axis position of our button within the view
436451
float m_yButtonPos;
437452

src/ngscopeclient/WaveformThread.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ void WaveformThread(Session* session, atomic<bool>* shuttingDown)
128128
//Wait for data to be available from all scopes
129129
if(!session->CheckForPendingWaveforms())
130130
{
131+
#ifdef HAVE_NVTX
132+
nvtx3::scoped_range range2("No data ready");
133+
#endif
134+
131135
this_thread::sleep_for(chrono::milliseconds(1));
132136
continue;
133137
}

src/ngscopeclient/shaders/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ add_compute_shaders(
3131
SOURCES
3232
ConstellationToneMap.glsl
3333
EyeToneMap.glsl
34+
IndexSearch.glsl
3435
ScopeDeskewUniform4xRate.glsl
3536
ScopeDeskewUniformUnequalRate.glsl
3637
ScopeDeskewUniformEqualRate.glsl
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/***********************************************************************************************************************
2+
* *
3+
* libscopehal *
4+
* *
5+
* Copyright (c) 2012-2026 Andrew D. Zonenberg and contributors *
6+
* All rights reserved. *
7+
* *
8+
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the *
9+
* following conditions are met: *
10+
* *
11+
* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the *
12+
* following disclaimer. *
13+
* *
14+
* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the *
15+
* following disclaimer in the documentation and/or other materials provided with the distribution. *
16+
* *
17+
* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products *
18+
* derived from this software without specific prior written permission. *
19+
* *
20+
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
21+
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL *
22+
* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
23+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *
24+
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
25+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *
26+
* POSSIBILITY OF SUCH DAMAGE. *
27+
* *
28+
***********************************************************************************************************************/
29+
30+
#version 430
31+
#pragma shader_stage(compute)
32+
#extension GL_ARB_gpu_shader_int64 : require
33+
34+
layout(std430, binding=0) restrict readonly buffer buf_din
35+
{
36+
int64_t din[];
37+
};
38+
39+
layout(std430, binding=1) restrict writeonly buffer buf_results
40+
{
41+
uint results[];
42+
};
43+
44+
layout(std430, push_constant) uniform constants
45+
{
46+
int64_t offset_samples;
47+
float xscale;
48+
uint len;
49+
uint w;
50+
};
51+
52+
layout(local_size_x=64, local_size_y=1, local_size_z=1) in;
53+
54+
void main()
55+
{
56+
//Get thread index and bounds check
57+
if(gl_GlobalInvocationID.x >= w)
58+
return;
59+
60+
//Get timestamp of the first sample in this thread's block
61+
int64_t dx = int64_t(floor(float(gl_GlobalInvocationID.x) / xscale));
62+
int64_t target = dx + offset_samples;
63+
64+
//Binary search for the first clock edge after this sample
65+
uint pos = len/2;
66+
uint last_lo = 0;
67+
uint last_hi = len-1;
68+
uint iclk = 0;
69+
if(len > 0)
70+
{
71+
//Clip if out of range
72+
if(din[0] >= target)
73+
iclk = 0;
74+
else if(din[last_hi] < target)
75+
iclk = len-1;
76+
77+
//Main loop
78+
else
79+
{
80+
while(true)
81+
{
82+
//Stop if we've bracketed the target
83+
if( (last_hi - last_lo) <= 1)
84+
{
85+
iclk = last_lo;
86+
break;
87+
}
88+
89+
//Move down
90+
if(din[pos] > target)
91+
{
92+
uint delta = pos - last_lo;
93+
last_hi = pos;
94+
pos = last_lo + delta/2;
95+
}
96+
97+
//Move up
98+
else
99+
{
100+
uint delta = last_hi - pos;
101+
last_lo = pos;
102+
pos = last_hi - delta/2;
103+
}
104+
}
105+
}
106+
}
107+
108+
//We want one before the target
109+
iclk --;
110+
111+
results[gl_GlobalInvocationID.x] = iclk;
112+
}

0 commit comments

Comments
 (0)