onnxruntime/onnxruntime/core/providers/dml/DmlExecutionProvider/src/CommandQueue.cpp at ef6d106d6446231f7952cc9f72875d606ba470af · microsoft/onnxruntime · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "precomp.h"
#include "CommandQueue.h"

namespace Dml
{
    CommandQueue::CommandQueue(ID3D12CommandQueue* existingQueue, bool cpuSyncSpinningEnabled)
        : m_queue(existingQueue)
        , m_type(existingQueue->GetDesc().Type)
        , m_cpuSyncSpinningEnabled(cpuSyncSpinningEnabled)
    {
        ComPtr<ID3D12Device> device;
        GRAPHICS_THROW_IF_FAILED(m_queue->GetDevice(IID_GRAPHICS_PPV_ARGS(device.GetAddressOf())));
        ORT_THROW_IF_FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_GRAPHICS_PPV_ARGS(m_fence.ReleaseAndGetAddressOf())));
    }

    void CommandQueue::ExecuteCommandList(ID3D12CommandList* commandList)
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);
        ExecuteCommandLists(gsl::make_span(&commandList, 1));
    }

    void CommandQueue::ExecuteCommandLists(gsl::span<ID3D12CommandList*> commandLists)
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);

        m_queue->ExecuteCommandLists(gsl::narrow<uint32_t>(commandLists.size()), commandLists.data());

        ++m_lastFenceValue;
        ORT_THROW_IF_FAILED(m_queue->Signal(m_fence.Get(), m_lastFenceValue));
    }

    void CommandQueue::Wait(ID3D12Fence* fence, uint64_t value)
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);

        ORT_THROW_IF_FAILED(m_queue->Wait(fence, value));

        ++m_lastFenceValue;
        ORT_THROW_IF_FAILED(m_queue->Signal(m_fence.Get(), m_lastFenceValue));
    }

    GpuEvent CommandQueue::GetCurrentCompletionEvent()
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);
        return GpuEvent{ m_lastFenceValue, m_fence };
    }

    GpuEvent CommandQueue::GetNextCompletionEvent()
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);
        return GpuEvent{ m_lastFenceValue + 1, m_fence };
    }

    void CommandQueue::QueueReference(IUnknown* object, bool waitForUnsubmittedWork)
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);

        // If the CommandQueue is closing, then m_queuedReferences is being cleared -- it is not OK
        // to queue additional references at this time, since those references would be leaked. This
        // affects any objects in m_queuedReferences whose destructors indirectly call QueueReference;
        // for example, an allocation from BucketizedBufferAllocator attempts to queue a reference
        // to its underlying D3D resource when freed. Furthermore, these references are unnecessary
        // since Close() already blocks for scheduled GPU work before clearing m_queuedReferences.
        if (!m_closing)
        {
            QueuedReference queuedReference = {GetLastFenceValue(), object};

            // If something has been recorded into a command list but not submitted yet, it means that the *next* fence
            // value is the one to signal completion.
            if (waitForUnsubmittedWork)
            {
                ++queuedReference.fenceValue;
            }

            m_queuedReferences.push_back(queuedReference);
        }
    }

    void CommandQueue::Close()
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);

        // Wait for flushed work:
        assert(!m_closing);
        m_closing = true;
        GpuEvent event = GetCurrentCompletionEvent();
        event.WaitForSignal(m_cpuSyncSpinningEnabled);
        m_queuedReferences.clear();
        m_closing = false;
    }

    void CommandQueue::ReleaseCompletedReferences()
    {
        std::lock_guard<std::recursive_mutex> lock(m_mutex);

        uint64_t completedValue = GetFence()->GetCompletedValue();
        while (!m_queuedReferences.empty() && m_queuedReferences.front().fenceValue <= completedValue)
        {
            m_queuedReferences.pop_front();
        }
    }

} // namespace Dml