Skip to content

Commit 53fb592

Browse files
committed
opt arc770 for Q4_0
1 parent e21cdc1 commit 53fb592

4 files changed

Lines changed: 87 additions & 18 deletions

File tree

ggml/src/ggml-sycl/common.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ struct sycl_device_info {
217217
// cudaOccupancyMaxActiveBlocksPerMultiprocessor
218218
bool vmm; // virtual memory support
219219
size_t total_vram;
220-
//sycl_hw_info hw_info; \\ device id and aarch, currently not used
220+
sycl_hw_info hw_info;
221221
optimize_feature opt_feature;
222222
};
223223

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
104104

105105
info.max_work_group_sizes[i] = prop.get_max_work_group_size();
106106
info.devices[i].max_wg_per_cu = info.max_work_group_sizes[i] / prop.get_max_compute_units();
107+
info.devices[i].hw_info = get_device_hw_info(&device);
107108

108109
}
109110

@@ -3620,9 +3621,13 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
36203621
// Dispatch becomes obscure with the reorder, MMVQ when the reorder optimization
36213622
// is enabled takes precedence over DMMV, the current if-else implementation
36223623
// requires disabling DMMV if both conditions are met
3624+
36233625
if (!g_ggml_sycl_prioritize_dmmv && ((should_reorder_tensor(ctx, dst) &&
36243626
ggml_sycl_supports_reorder_mmvq(src0->type)))) {
3625-
use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
3627+
//Arc770 get benefit with Q4_0 by skipping it.
3628+
if (ggml_sycl_info().devices[ctx.device].hw_info.arch != gpu_arch::intel_gpu_acm_g10) {
3629+
use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
3630+
}
36263631
}
36273632

36283633
if (!split && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {

ggml/src/ggml-sycl/sycl_hw.cpp

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,67 @@
11
#include "sycl_hw.hpp"
22

3-
// TODO: currently not used
4-
/*
5-
sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
6-
sycl_hw_info res;
7-
int32_t id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
8-
res.device_id = id;
3+
using namespace std;
94

10-
syclex::architecture arch = device_ptr->get_info<syclex::info::device::architecture>();
11-
res.arch = arch;
5+
/*defined in
6+
* /opt/intel/oneapi/compiler/latest/include/sycl/ext/oneapi/experimental/device_architecture.def
7+
*/
8+
static map<gpu_arch, std::pair<const char*, sycl_intel_gpu_family>> arch2name = {
9+
{gpu_arch::intel_gpu_bdw, {"intel_gpu_bdw", GPU_FAMILY_IGPU_NON_XE}},
10+
{gpu_arch::intel_gpu_skl, {"intel_gpu_skl", GPU_FAMILY_IGPU_NON_XE}},
11+
{gpu_arch::intel_gpu_kbl, {"intel_gpu_kbl", GPU_FAMILY_IGPU_NON_XE}},
12+
{gpu_arch::intel_gpu_cfl, {"intel_gpu_cfl", GPU_FAMILY_IGPU_NON_XE}},
13+
{gpu_arch::intel_gpu_apl, {"intel_gpu_apl", GPU_FAMILY_IGPU_NON_XE}},
14+
{gpu_arch::intel_gpu_glk, {"intel_gpu_glk", GPU_FAMILY_IGPU_NON_XE}},
15+
{gpu_arch::intel_gpu_whl, {"intel_gpu_whl", GPU_FAMILY_IGPU_NON_XE}},
16+
{gpu_arch::intel_gpu_aml, {"intel_gpu_aml", GPU_FAMILY_IGPU_NON_XE}},
17+
{gpu_arch::intel_gpu_cml, {"intel_gpu_cml", GPU_FAMILY_IGPU_NON_XE}},
18+
{gpu_arch::intel_gpu_icllp, {"intel_gpu_icllp", GPU_FAMILY_IGPU_NON_XE}},
19+
{gpu_arch::intel_gpu_ehl, {"intel_gpu_ehl", GPU_FAMILY_IGPU_NON_XE}},
20+
{gpu_arch::intel_gpu_tgllp, {"intel_gpu_tgllp", GPU_FAMILY_IGPU_NON_XE}},
21+
{gpu_arch::intel_gpu_rkl, {"intel_gpu_rkl", GPU_FAMILY_IGPU_NON_XE}},
22+
{gpu_arch::intel_gpu_adl_s, {"intel_gpu_adl_s", GPU_FAMILY_IGPU_NON_XE}},
23+
{gpu_arch::intel_gpu_adl_p, {"intel_gpu_adl_p", GPU_FAMILY_IGPU_NON_XE}},
24+
{gpu_arch::intel_gpu_adl_n, {"intel_gpu_adl_n", GPU_FAMILY_IGPU_NON_XE}},
25+
{gpu_arch::intel_gpu_dg1, {"intel_gpu_dg1", GPU_FAMILY_DGPU_CLIENT_GAME}},
26+
{gpu_arch::intel_gpu_acm_g10, {"intel_gpu_acm_g10", GPU_FAMILY_DGPU_CLIENT_GAME}},
27+
{gpu_arch::intel_gpu_acm_g11, {"intel_gpu_acm_g11", GPU_FAMILY_DGPU_CLIENT_GAME}},
28+
{gpu_arch::intel_gpu_acm_g12, {"intel_gpu_acm_g12", GPU_FAMILY_DGPU_CLIENT_GAME}},
29+
{gpu_arch::intel_gpu_pvc, {"intel_gpu_pvc", GPU_FAMILY_DGPU_CLOUD}},
30+
{gpu_arch::intel_gpu_pvc_vg, {"intel_gpu_pvc_vg", GPU_FAMILY_DGPU_CLOUD}},
31+
{gpu_arch::intel_gpu_mtl_u, {"intel_gpu_mtl_u", GPU_FAMILY_IGPU_XE}},
32+
{gpu_arch::intel_gpu_mtl_h, {"intel_gpu_mtl_h", GPU_FAMILY_IGPU_XE}},
33+
{gpu_arch::intel_gpu_arl_h, {"intel_gpu_arl_h", GPU_FAMILY_IGPU_XE}},
34+
{gpu_arch::intel_gpu_bmg_g21, {"intel_gpu_bmg_g21", GPU_FAMILY_DGPU_CLIENT_GAME}},
35+
{gpu_arch::intel_gpu_bmg_g31, {"intel_gpu_bmg_g31", GPU_FAMILY_DGPU_CLIENT_GAME}},
36+
{gpu_arch::intel_gpu_lnl_m, {"intel_gpu_lnl_m", GPU_FAMILY_IGPU_XE}},
37+
{gpu_arch::intel_gpu_ptl_h, {"intel_gpu_ptl_h", GPU_FAMILY_IGPU_XE}},
38+
{gpu_arch::intel_gpu_ptl_u, {"intel_gpu_ptl_u", GPU_FAMILY_IGPU_XE}},
39+
{gpu_arch::intel_gpu_wcl, {"intel_gpu_wcl", GPU_FAMILY_IGPU_XE}}
40+
};
41+
42+
43+
sycl_hw_info get_device_hw_info(sycl::device* device_ptr) {
44+
sycl_hw_info res;
45+
int32_t id =
46+
device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
47+
res.device_id = id;
48+
49+
res.name = device_ptr->get_info<sycl::info::device::name>();
1250

13-
return res;
51+
syclex::architecture arch =
52+
device_ptr->get_info<syclex::info::device::architecture>();
53+
res.arch = arch;
54+
55+
map<syclex::architecture,
56+
std::pair<const char*, sycl_intel_gpu_family>>::iterator it =
57+
arch2name.find(res.arch);
58+
if (it != arch2name.end()) {
59+
res.arch_name = it->second.first;
60+
res.gpu_family = it->second.second;
61+
} else {
62+
res.arch_name = "unknown";
63+
res.gpu_family = GPU_FAMILY_UKNOWN;
64+
}
65+
66+
return res;
1467
}
15-
*/

ggml/src/ggml-sycl/sycl_hw.hpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,30 @@
99
#include <sycl/sycl.hpp>
1010

1111
namespace syclex = sycl::ext::oneapi::experimental;
12+
using gpu_arch = sycl::ext::oneapi::experimental::architecture;
13+
14+
// It's used to mark the GPU computing capacity
15+
// The value must flow the order of performance.
16+
enum sycl_intel_gpu_family {
17+
GPU_FAMILY_UKNOWN = -1,
18+
// iGPU without Xe core, before Meteor Lake iGPU(Xe)
19+
GPU_FAMILY_IGPU_NON_XE = 0,
20+
// iGPU with Xe core, Meteor Lake iGPU or newer.
21+
GPU_FAMILY_IGPU_XE = 1,
22+
// dGPU for gaming in client/data center (DG1/FLex 140 or newer).
23+
GPU_FAMILY_DGPU_CLIENT_GAME = 2,
24+
// dGPU for AI in cloud, PVC or newer.
25+
GPU_FAMILY_DGPU_CLOUD = 3
26+
};
1227

13-
// TODO: currently not used
14-
/*
1528
struct sycl_hw_info {
1629
syclex::architecture arch;
30+
const char* arch_name;
1731
int32_t device_id;
32+
std::string name;
33+
sycl_intel_gpu_family gpu_family;
1834
};
1935

20-
bool is_in_vector(std::vector<int> &vec, int item);
21-
2236
sycl_hw_info get_device_hw_info(sycl::device *device_ptr);
23-
*/
24-
2537

2638
#endif // SYCL_HW_HPP

0 commit comments

Comments
 (0)