Skip to content

Commit 04ebeee

Browse files
Fix and readd VK_ARM_explicit_host_updates support
If the extension is enabled, the normal write-out of data during vkQueueSubmit*() is stopped, and instead we'll do it during vkFlushMappedMemoryRanges().
1 parent a8656fe commit 04ebeee

3 files changed

Lines changed: 49 additions & 23 deletions

File tree

scripts/lava-capture.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def args():
2323
parser.add_argument('--gpu', dest='gpu', metavar='<gpu>', help='Use the specified GPU for tracing')
2424
parser.add_argument('--automate', dest='automate', action='store_true', help='Try to automate the run as much as possible if app supports CBS')
2525
parser.add_argument('--no-multithread', dest='nomp', action='store_true', help='Turn off multi-threaded compression and disk writeout (saves memory)')
26+
parser.add_argument('--trust-flushing', dest='explicit', action='store_true', help='Trust app to flush modified host memory instead of tracking usage')
2627
parser.add_argument('programAndArgs', metavar='<program> [<program args>]', nargs=argparse.REMAINDER, help='Application to capture and any program arguments')
2728
return parser
2829

@@ -81,6 +82,7 @@ def PrintEnvVar(envVar):
8182
if args.debug: os.environ['LAVATUBE_DEBUG'] = args.debug
8283
if args.file: os.environ['LAVATUBE_DESTINATION'] = os.path.abspath(args.file)
8384
if args.log: os.environ['LAVATUBE_DEBUG_FILE'] = args.log
85+
if args.explicit: os.environ['LAVATUBE_TRUST_HOST_FLUSHING'] = '1'
8486
if args.layer: os.environ['VK_LAYER_PATH'] = args.layer
8587
else: os.environ['VK_LAYER_PATH'] = '/opt/lavatube'
8688
if args.nomp:

src/hardcode_write.cpp

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,24 @@ static void queue_update(lava_file_writer& writer, trackedqueue* t, VkCommandBuf
660660
}
661661
}
662662

663+
static uint64_t write_out_object(lava_file_writer& writer, const auto* device_data, trackedobject* object_data, char* cloneptr, char* changedptr, uint64_t offset, uint64_t size)
664+
{
665+
switch (object_data->object_type)
666+
{
667+
case VK_OBJECT_TYPE_IMAGE: writer.write_uint8_t((uint8_t)PACKET_IMAGE_UPDATE); break;
668+
case VK_OBJECT_TYPE_BUFFER: writer.write_uint8_t((uint8_t)PACKET_BUFFER_UPDATE); break;
669+
case VK_OBJECT_TYPE_TENSOR_ARM: writer.write_uint8_t((uint8_t)PACKET_TENSOR_UPDATE); break;
670+
default: assert(false); break;
671+
}
672+
writer.write_handle(device_data);
673+
writer.write_handle(object_data);
674+
uint64_t written = writer.write_patch(cloneptr, changedptr, offset, size);
675+
object_data->updates++;
676+
object_data->written += written;
677+
writer.thaw();
678+
return written;
679+
}
680+
663681
static void memory_update(lava_file_writer& writer, trackedqueue* queue_data, const std::unordered_map<VkDeviceMemory, range>& ranges_by_memory, std::unordered_set<trackedcmdbuffer_trace*>& cmdbufs)
664682
{
665683
// for each, map and update
@@ -714,25 +732,13 @@ static void memory_update(lava_file_writer& writer, trackedqueue* queue_data, co
714732
range r2 = { r.first + object_data->offset, r.last + object_data->offset };
715733
assert(r2.last < object_data->offset + object_data->size);
716734
range v = memory_data->exposed.fetch(r2, memory_data->ptr != nullptr);
717-
switch (object_data->object_type)
718-
{
719-
case VK_OBJECT_TYPE_IMAGE: writer.write_uint8_t((uint8_t)PACKET_IMAGE_UPDATE); break;
720-
case VK_OBJECT_TYPE_BUFFER: writer.write_uint8_t((uint8_t)PACKET_BUFFER_UPDATE); break;
721-
case VK_OBJECT_TYPE_TENSOR_ARM: writer.write_uint8_t((uint8_t)PACKET_TENSOR_UPDATE); break;
722-
default: assert(false); break;
723-
}
724-
writer.write_handle(device_data);
725-
writer.write_handle(object_data);
726-
written += writer.write_patch(cloneptr, changedptr, v.first - object_data->offset, v.last - v.first + 1);
735+
written += write_out_object(writer, device_data, object_data, cloneptr, changedptr, v.first - object_data->offset, v.last - v.first + 1);
727736
scanned += v.last - v.first + 1;
728-
object_data->updates++;
729737
NEVER("flushing obj %u (%lu, %lu) -> (%lu, %lu) -> (%lu, %lu), exposed after (%lu, %lu), total written %lu, memory %u; binding_offset=%lu binding_size=%lu ptr=%p",
730738
object_data->index, r.first, r.last, r2.first, r2.last, v.first, v.last, memory_data->exposed.span().first, memory_data->exposed.span().last,
731739
(unsigned long)object_data->written, memory_data->index, binding_offset, binding_size, memory_data->ptr);
732740
}
733-
object_data->written += written;
734741
NEVER("%s(%u) offset=%u size=%u memory=%u(total size=%u) written=%u scanned=%u cmdbuf=%u source=%d", pretty_print_VkObjectType(object_data->type), (unsigned)object_data->index, (unsigned)object_data->offset, (unsigned)object_data->size, memory_data->index, (unsigned)memory_data->allocationSize, (unsigned)written, (unsigned)scanned, cmdbuf_data->index, (int)object_data->source);
735-
writer.thaw();
736742
}
737743
}
738744

@@ -936,7 +942,7 @@ static void modify_device_extensions(VkPhysicalDevice physicalDevice) REQUIRES(f
936942
device_extension_properties.push_back({VK_EXT_FRAME_BOUNDARY_EXTENSION_NAME, 1});
937943
device_extension_properties.push_back({VK_ARM_TRACE_HELPERS_EXTENSION_NAME, 1});
938944
device_extension_properties.push_back({VK_ARM_TRACE_DESCRIPTOR_BUFFER_EXTENSION_NAME, 1});
939-
//device_extension_properties.push_back({VK_ARM_EXPLICIT_HOST_UPDATES_EXTENSION_NAME, 1});
945+
device_extension_properties.push_back({VK_ARM_EXPLICIT_HOST_UPDATES_EXTENSION_NAME, 1});
940946
device_extension_properties.push_back({VK_TRACETOOLTEST_TRACE_HELPERS2_EXTENSION_NAME, 1});
941947

942948
for (const auto &ext : tmp_device_extension_properties)
@@ -1767,6 +1773,7 @@ void trace_post_vkFlushMappedMemoryRanges(lava_file_writer& writer, VkResult res
17671773
{
17681774
writer.parent->memory_mutex.lock();
17691775
assert(result == VK_SUCCESS);
1776+
const auto* device_data = writer.parent->records.VkDevice_index.at(device);
17701777
// The memory must be memory mapped
17711778
for (unsigned i = 0; i < memoryRangeCount; i++)
17721779
{
@@ -1779,6 +1786,21 @@ void trace_post_vkFlushMappedMemoryRanges(lava_file_writer& writer, VkResult res
17791786
}
17801787
assert(memory_data->ptr != nullptr && memory_data->size != 0); // the memory must be memory mapped
17811788
memory_data->exposed.add_os(v.offset, size);
1789+
if (device_data->explicit_host_updates)
1790+
{
1791+
for (auto& pair : memory_data->usage)
1792+
{
1793+
if (pair.first > v.offset + size) continue;
1794+
trackedobject* object_data = pair.second;
1795+
if (pair.first + object_data->size > pair.first) continue;
1796+
char* cloneptr = memory_data->clone + object_data->offset;
1797+
char* changedptr = memory_data->ptr + object_data->offset - memory_data->offset;
1798+
uint64_t start = std::max<uint64_t>(pair.first, v.offset);
1799+
uint64_t end = std::min<uint64_t>(pair.first + object_data->size, v.offset + size);
1800+
uint64_t written = write_out_object(writer, device_data, object_data, cloneptr, changedptr, start, end - start);
1801+
ILOG("vkFlushMappedMemoryRanges[%u] from [%u] %lu to %lu", i, (unsigned)object_data->index, (unsigned long)start, (unsigned long)end);
1802+
}
1803+
}
17821804
}
17831805
writer.parent->memory_mutex.unlock();
17841806
}

src/lavatube.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,11 @@ struct trackedmemory : trackable
123123
exposure exposed;
124124
/// Native handle of the memory
125125
VkDeviceMemory backing = VK_NULL_HANDLE;
126-
/// Data structure used to find aliasing objects to make sure we recreate them together again on replay.
126+
127+
/// Data structure used to track usage of Vulkan objects. We can use this to
128+
/// make sure we recreate them together again on replay if they are aliased.
127129
/// For now, this only supports 1-to-1 aliasing. Only used during capture.
128-
std::multimap<VkDeviceSize, trackedobject*> aliasing;
130+
std::multimap<VkDeviceSize, trackedobject*> usage; // do not touch unless you hold the memory mutex
129131

130132
void bind(trackedobject* obj);
131133
void unbind(trackedobject* obj);
@@ -739,8 +741,8 @@ struct trackedframebuffer : trackable
739741
inline void trackedmemory::bind(trackedobject* obj)
740742
{
741743
// only 1-to-1 aliasing for now
742-
auto it = aliasing.find(obj->offset);
743-
if (it != aliasing.end()) // we are aliasing some other object
744+
auto it = usage.find(obj->offset);
745+
if (it != usage.end()) // we are aliasing some other object
744746
{
745747
trackedobject* other = it->second;
746748
if (obj->object_type == VK_OBJECT_TYPE_IMAGE && other->object_type == VK_OBJECT_TYPE_IMAGE)
@@ -759,18 +761,18 @@ inline void trackedmemory::bind(trackedobject* obj)
759761
obj->alias_type = other->object_type;
760762
obj->alias_index = other->index;
761763
}
762-
aliasing.insert({obj->offset, obj});
764+
usage.insert({obj->offset, obj});
763765
}
764766

765767
inline void trackedmemory::unbind(trackedobject* obj)
766768
{
767-
auto it = aliasing.find(obj->offset);
768-
assert(it != aliasing.end());
769-
for (; it != aliasing.end(); ++it)
769+
auto it = usage.find(obj->offset);
770+
assert(it != usage.end());
771+
for (; it != usage.end(); ++it)
770772
{
771773
if (it->first == obj->offset)
772774
{
773-
aliasing.erase(it);
775+
usage.erase(it);
774776
return;
775777
}
776778
}

0 commit comments

Comments
 (0)