forked from intel/pti-gpu
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgpu_inst_count.cc
More file actions
140 lines (111 loc) · 4.76 KB
/
Copy pathgpu_inst_count.cc
File metadata and controls
140 lines (111 loc) · 4.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//==============================================================
// Copyright (C) Intel Corporation
//
// SPDX-License-Identifier: MIT
// =============================================================
#include "gpu_inst_count.hpp"
namespace gtpin {
namespace gtpin_prof {
/********************
* Requered functions - should be implemented
*/
PROF_STATUS GpuInstCountKernel::Accumulate(std::shared_ptr<ResultData> profilingResult,
GTPinProfileRecord* record) {
auto gpuInstCountRec = reinterpret_cast<GpuInstCountRecord*>(record);
auto gpuInstCountResult = std::dynamic_pointer_cast<GpuInstCountResultData>(profilingResult);
/// Accumulate data from GpuInstCountRec to GpuInstCountResult here.
/// For each profiling results may be several records, data should be
/// accumulated, not just transferred
gpuInstCountResult->count += gpuInstCountRec->count;
return PROF_STATUS_SUCCESS;
}
PROF_STATUS GpuInstCountKernel::AnalyzeKernel(IGtKernelInstrument& instrumentor) {
const IGtKernel& kernel = instrumentor.Kernel();
const IGtCfg& cfg = instrumentor.Cfg();
const IGtGenArch& genArch = GTPin_GetCore()->GenArch();
SetRecordSize(sizeof(GpuInstCountRecord));
SetDefautBuckets(instrumentor);
for (auto bblPtr : cfg.Bbls()) {
for (auto insPtr : bblPtr->Instructions()) {
const IGtIns& ins = *insPtr;
const InstructionOffset offset = cfg.GetInstructionOffset(ins);
bblData.emplace(offset, bblPtr->FirstIns());
}
}
/// Set number of records and store required data based on information from
/// instrumentor
SetRecordsNum(bblData.size());
return PROF_STATUS_SUCCESS;
}
PROF_STATUS GpuInstCountKernel::Instrument(IGtKernelInstrument& instrumentor) {
const IGtKernel& kernel = instrumentor.Kernel();
const IGtCfg& cfg = instrumentor.Cfg();
const IGtGenCoder& coder = instrumentor.Coder();
IGtVregFactory& vregs = coder.VregFactory();
IGtInsFactory& insF = coder.InstructionFactory();
const IGtGenArch& genArch = GTPin_GetCore()->GenArch();
uint32_t grfRegSize = insF.GenModel().GrfRegSize(); // bytes
GtGenProcedure proc;
size_t bblIdx = 0;
for (auto it = bblData.begin(); it != bblData.end(); it++, bblIdx++) {
GtGenProcedure proc;
PointOfInterest poi(instrumentor, m_profileArray, bblIdx);
poi.InstructionCounterAnalysis(offsetof(GpuInstCountRecord, count));
poi.FinishPOI(proc);
instrumentor.InstrumentInstruction(it->second, GtIpoint::Before(), proc);
}
return PROF_STATUS_SUCCESS;
}
/********************
* Optional functions - may be changed or not, base on tool behaviour
*/
PROF_STATUS GpuInstCountKernel::InitResultData(std::shared_ptr<InvocationData> invocationData,
IGtKernelDispatch& dispatcher,
const GTPinKernelExecDesriptor& execDescr,
const std::shared_ptr<IToolFactory> factory) {
auto invData = std::dynamic_pointer_cast<GpuInstCountInvocationData>(invocationData);
PTI_ASSERT((invData != nullptr) && "Invocation data was wrongly initialized. Check factory.");
size_t idx = 0;
for (auto it = bblData.begin(); it != bblData.end(); it++, idx++) {
auto resData = factory->MakeResultData();
auto gpuInstCountResult = std::dynamic_pointer_cast<GpuInstCountResultData>(resData);
gpuInstCountResult->instructionOffset = it->first;
invData->data.push_back(gpuInstCountResult);
}
return PROF_STATUS_SUCCESS;
};
PROF_STATUS GpuInstCountKernel::PostProcData(std::shared_ptr<InvocationData> invocationData) {
return PROF_STATUS_SUCCESS;
}
/**
* GpuInstCount implementations
*/
std::vector<const char*> GpuInstCount::SetGtpinKnobs() const {
return std::vector<const char*>{"--no_empty_profile_dir"};
};
/**
* GpuInstCountFactory implementations
*/
std::shared_ptr<GTPinProfileKernel> GpuInstCountFactory::MakeKernel(
IGtKernelInstrument& instrumentor, std::shared_ptr<KernelData> kernelData) {
return std::make_shared<GpuInstCountKernel>(instrumentor, kernelData);
}
GTPinProfileRecord* GpuInstCountFactory::MakeRecord() {
GpuInstCountRecord* rec = new GpuInstCountRecord();
return rec;
};
std::shared_ptr<ProfilerData> GpuInstCountFactory::MakeProfilerData() {
return std::make_shared<GpuInstCountProfilerData>();
};
std::shared_ptr<KernelData> GpuInstCountFactory::MakeKernelData(IGtKernelInstrument& instrumentor) {
return std::make_shared<GpuInstCountKernelData>(instrumentor);
};
std::shared_ptr<InvocationData> GpuInstCountFactory::MakeInvocationData(
const GTPinKernelExecDesriptor& execDescr) {
return std::make_shared<GpuInstCountInvocationData>(execDescr);
};
std::shared_ptr<ResultData> GpuInstCountFactory::MakeResultData() {
return std::make_shared<GpuInstCountResultData>();
};
}
}