This repository was archived by the owner on Mar 2, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 71
Expand file tree
/
Copy pathacl_program.cpp
More file actions
2058 lines (1814 loc) · 72 KB
/
Copy pathacl_program.cpp
File metadata and controls
2058 lines (1814 loc) · 72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (C) 2010-2021 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause
// System headers.
#include <algorithm>
#include <cassert>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include <string>
#include <vector>
// External library headers.
#include <CL/opencl.h>
#include <acl_hash/acl_hash.h>
// Internal headers.
#include <acl.h>
#include <acl_auto.h>
#include <acl_command_queue.h>
#include <acl_context.h>
#include <acl_device_op.h>
#include <acl_event.h>
#include <acl_globals.h>
#include <acl_hal.h>
#include <acl_hostch.h>
#include <acl_icd_dispatch.h>
#include <acl_mem.h>
#include <acl_program.h>
#include <acl_support.h>
#include <acl_thread.h>
#include <acl_types.h>
#include <acl_util.h>
#ifdef __GNUC__
#pragma GCC visibility push(protected)
#endif
#define MAX_STRING_LENGTH 100000
// Programs
// ========
//
// Lifecycle of cl_program:
// States are:
// "new" - initial state. no program is built, and kernels can't be
// loaded "built" - Program has been built, and kernels can be loaded.
//
//
// Faking the build process:
//
// Note: Because we don't really have a CL compiler, we're
// faking *everything* about program builds. We only store
// a "built" boolean flag, and that's it.
//
// The only interesting bit comes a the time we enqueue a
// kernel.
// At that time we trace:
// kernel -> program -> context -> devices
// and
// command_queue -> device
// Once we verify a common device is present, we can look
// up the kernel interface in the list of accelerator
// definitions inside the device definition.
//
//
// Data model:
//
// cl_program has:
// - reference to context
// - reference to list of devices
// - For now, we punt on this.
// - a representation:
// - Normally source binaries are stored here
// - But we're implementing an OpenCL embedded profile, so we
// don't have a compiler.
// - We're likely going to shortchange things here, i.e. store
// nothing
// - build info:
// - info to support clGetProgramBuildInfo
// - For now, this is just an overall build_status
// - kernels
// - instantiated kernels
// - Each kernel has:
// - reference to interface
// - argument values
ACL_DEFINE_CL_OBJECT_ALLOC_FUNCTIONS(cl_program);
//////////////////////////////
// Local functions
static void l_init_program(cl_program program, cl_context context);
static void l_free_program(cl_program program);
static acl_device_program_info_t *
l_create_dev_prog(cl_program program, cl_device_id device, size_t binary_len,
const unsigned char *binary);
static cl_int l_build_program_for_device(cl_program program,
unsigned int dev_idx,
const char *options);
static void l_compute_hash(cl_program program,
acl_device_program_info_t *dev_prog);
static cl_int l_build_from_source(acl_device_program_info_t *dev_prog);
static cl_int l_build_from_source_in_dir(acl_device_program_info_t *dev_prog,
const char *dir);
static void l_try_to_eagerly_program_device(cl_program program);
static void
l_device_memory_definition_copy(acl_device_def_autodiscovery_t *dest_dev,
acl_device_def_autodiscovery_t *src_dev);
//////////////////////////////
// OpenCL API
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clRetainProgramIntelFPGA(cl_program program) {
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_program_is_valid(program)) {
return CL_INVALID_PROGRAM;
}
acl_retain(program);
return CL_SUCCESS;
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program program) {
return clRetainProgramIntelFPGA(program);
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clReleaseProgramIntelFPGA(cl_program program) {
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_program_is_valid(program)) {
return CL_INVALID_PROGRAM;
}
acl_release(program);
if (!acl_ref_count(program)) {
acl_release(program->context);
acl_untrack_object(program);
// Make sure we clean up the elf files
for (unsigned i = 0; i < program->num_devices; i++) {
if (program->device[i]->loaded_bin != nullptr)
program->device[i]->loaded_bin->unload_content();
if (program->device[i]->last_bin != nullptr)
program->device[i]->last_bin->unload_content();
}
l_free_program(program);
}
return CL_SUCCESS;
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program program) {
return clReleaseProgramIntelFPGA(program);
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clUnloadCompilerIntelFPGA(void) {
return CL_SUCCESS;
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clUnloadCompiler(void) {
return clUnloadCompilerIntelFPGA();
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSourceIntelFPGA(
cl_context context, cl_uint count, const char **strings,
const size_t *lengths, cl_int *errcode_ret) {
cl_uint i;
cl_uint idev;
int pass;
cl_program program = 0;
struct acl_file_handle_t *capture_fp = NULL;
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_context_is_valid(context))
BAIL(CL_INVALID_CONTEXT);
if (count == 0) {
BAIL_INFO(CL_INVALID_VALUE, context, "Count parameter is zero");
}
if (strings == 0) {
BAIL_INFO(CL_INVALID_VALUE, context, "No source strings specified");
}
for (i = 0; i < count; i++) {
if (strings[i] == 0) {
BAIL_INFO(CL_INVALID_VALUE, context, "A string pointers is NULL");
}
}
// Go ahead and allocate it.
program = acl_alloc_cl_program();
if (program == 0) {
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate a program object");
}
l_init_program(program, context);
// Just copy devices straight over from the context.
program->num_devices = context->num_devices;
for (idev = 0; idev < context->num_devices; idev++) {
program->device[idev] = context->device[idev];
}
// Now process the source text.
// We have to store it, for later access via clGetProgramInfo.
// We do two passes: first to determine the text size, the second
// to actually store and possibly output the text.
// Capture the source if we've been asked.
capture_fp = NULL;
if (acl_get_platform()->next_capture_id != ACL_OPEN) {
std::stringstream name;
name << acl_get_platform()->capture_base_path << "."
<< acl_get_platform()->next_capture_id << "\n";
capture_fp = acl_fopen(name.str().c_str(), "w");
acl_get_platform()->next_capture_id++;
}
// Use two passes:
// First pass computes the source size in bytes.
// Second pass allocates memory and stores the data.
for (pass = 0; pass < 2; ++pass) {
size_t offset = 0;
if (pass == 1) {
unsigned char *buffer;
program->source_len++; // Must also reserve space for the terminating NUL.
buffer = (unsigned char *)acl_malloc(program->source_len);
if (buffer == 0) {
acl_free_cl_program(program);
if (capture_fp) {
acl_fclose(capture_fp);
}
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate memory to store program source");
}
program->source_text = buffer;
}
for (i = 0; i < count; i++) {
size_t len = lengths ? lengths[i] : 0;
if (len == 0)
len = strnlen(strings[i], MAX_STRING_LENGTH);
switch (pass) {
case 0:
program->source_len += len;
break;
case 1:
safe_memcpy(program->source_text + offset, strings[i], len, len, len);
if (capture_fp) {
acl_fwrite(strings[i], sizeof(char), len, capture_fp);
}
offset += len;
break;
}
}
if (pass == 1)
program->source_text[offset] = 0; // Terminating NUL
}
if (capture_fp) {
acl_fclose(capture_fp);
}
acl_retain(program->context);
if (errcode_ret) {
*errcode_ret = CL_SUCCESS;
}
acl_track_object(ACL_OBJ_PROGRAM, program);
return program;
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource(
cl_context context, cl_uint count, const char **strings,
const size_t *lengths, cl_int *errcode_ret) {
return clCreateProgramWithSourceIntelFPGA(context, count, strings, lengths,
errcode_ret);
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const size_t *lengths, const unsigned char **binaries,
cl_int *binary_status, cl_int *errcode_ret) {
cl_uint i;
cl_uint idev;
cl_program program = 0;
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_context_is_valid(context))
BAIL(CL_INVALID_CONTEXT);
if (num_devices == 0 || device_list == 0) {
BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
}
for (i = 0; i < num_devices; i++) {
if (!acl_device_is_valid(device_list[i])) {
BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
}
if (!acl_context_uses_device(context, device_list[i])) {
BAIL_INFO(CL_INVALID_DEVICE, context,
"Device is not associated with the context");
}
if (lengths[i] == 0 || binaries[i] == 0) {
if (binary_status) {
binary_status[i] = CL_INVALID_VALUE;
}
BAIL_INFO(CL_INVALID_VALUE, context,
lengths[i] == 0 ? "A binary length is zero"
: "A binary pointer is NULL");
}
}
// Go ahead and allocate it.
program = acl_alloc_cl_program();
if (program == 0) {
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate a program object");
}
l_init_program(program, context);
// Copy devices from arguments and set status.
program->num_devices = num_devices;
for (idev = 0; idev < num_devices; idev++) {
program->device[idev] = device_list[idev];
// Save the binary in a new acl_device_program_info_t
program->dev_prog[idev] = l_create_dev_prog(program, device_list[idev],
lengths[idev], binaries[idev]);
if (program->dev_prog[idev]) {
if (context->programs_devices || context->uses_dynamic_sysdef) {
if (!context->split_kernel) {
// Load and validate the ELF package form.
auto status =
program->dev_prog[idev]->device_binary.load_binary_pkg(0, 1);
if (status != CL_SUCCESS) {
l_free_program(program);
if (binary_status) {
binary_status[idev] = CL_INVALID_BINARY;
}
BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
}
} else {
assert(context->uses_dynamic_sysdef);
// Allow disabling preloading of split binaries if the user requests
// it. This is required in cases where there are many aocx files in
// the directory and each cl_program that is created only uses a small
// subset of them. Note that when preloading is disabled we load the
// kernels when clCreateKernel is called. Also queries to
// CL_PROGRAM_NUM_KERNELS and CL_PROGRAM_KERNEL_NAMES in
// clGetProgramInfo will return inaccurate results unless all kernels
// in the program are created. Preloading is enabled by default.
const char *preload =
acl_getenv("CL_PRELOAD_SPLIT_BINARIES_INTELFPGA");
if (!preload || std::string(preload) != "0") {
// In split_kernel mode we have to load all aocx files
// in the specified directory which cumulatively contain all the
// kernels.
auto result = acl_glob(std::string(context->program_library_root) +
std::string("/kernel_*.aocx"));
for (const auto &filename : result) {
// Trim ".aocx" file extension.
auto kernel_name = filename.substr(0, filename.length() - 5);
auto l = kernel_name.find_last_of("/");
if (l != std::string::npos) {
kernel_name = kernel_name.substr(l + 1);
}
auto &dev_bin =
program->dev_prog[idev]->add_split_binary(kernel_name);
dev_bin.load_content(filename);
auto status = dev_bin.load_binary_pkg(0, 1);
if (status != CL_SUCCESS) {
l_free_program(program);
if (binary_status) {
binary_status[idev] = CL_INVALID_BINARY;
}
BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
}
// Need to unload the binary and only load it on an as needed
// basis due to high memory usage when there are many split
// binaries.
dev_bin.unload_content();
}
}
}
} else {
assert(!context->split_kernel);
// Copy memory definition from initial device def to program in
// CL_CONTEXT_COMPILER_MODE_INTELFPGA mode.
l_device_memory_definition_copy(
&(program->dev_prog[idev]
->device_binary.get_devdef()
.autodiscovery_def),
&(program->device[idev]->def.autodiscovery_def));
}
} else {
// Release all the memory we've allocated.
l_free_program(program);
if (binary_status) {
binary_status[idev] = CL_INVALID_VALUE;
}
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate memory to store program binaries");
}
// Wait to set status until after failures may have occurred for this
// device.
if (binary_status) {
binary_status[idev] = CL_SUCCESS;
}
}
acl_retain(program->context);
if (errcode_ret) {
*errcode_ret = CL_SUCCESS;
}
acl_track_object(ACL_OBJ_PROGRAM, program);
l_try_to_eagerly_program_device(program);
return program;
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const size_t *lengths, const unsigned char **binaries,
cl_int *binary_status, cl_int *errcode_ret) {
return clCreateProgramWithBinaryIntelFPGA(context, num_devices, device_list,
lengths, binaries, binary_status,
errcode_ret);
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithBinaryAndProgramDeviceIntelFPGA(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const size_t *lengths, const unsigned char **binaries,
cl_int *binary_status, cl_int *errcode_ret) {
cl_uint i;
cl_uint idev;
cl_program program = 0;
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_context_is_valid(context))
BAIL(CL_INVALID_CONTEXT);
// split_kernel mode is not supported in this special extension API which is
// not part of the OpenCL standard.
assert(context->split_kernel == 0);
if (num_devices == 0 || device_list == 0) {
BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
}
for (i = 0; i < num_devices; i++) {
if (!acl_device_is_valid(device_list[i])) {
BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
}
if (!acl_context_uses_device(context, device_list[i])) {
BAIL_INFO(CL_INVALID_DEVICE, context,
"Device is not associated with the context");
}
if (lengths[i] == 0 || binaries[i] == 0) {
if (binary_status) {
binary_status[i] = CL_INVALID_VALUE;
}
BAIL_INFO(CL_INVALID_VALUE, context,
lengths[i] == 0 ? "A binary length is zero"
: "A binary pointer is NULL");
}
}
program = acl_alloc_cl_program();
if (program == 0) {
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate a program object");
}
l_init_program(program, context);
// Copy devices from arguments and set status.
program->num_devices = num_devices;
for (idev = 0; idev < num_devices; idev++) {
program->device[idev] = device_list[idev];
// Save the binary in a new acl_device_program_info_t
program->dev_prog[idev] = l_create_dev_prog(program, device_list[idev],
lengths[idev], binaries[idev]);
if (program->dev_prog[idev]) {
if (context->programs_devices || context->uses_dynamic_sysdef) {
// Load and validate the ELF package form.
auto status =
program->dev_prog[idev]->device_binary.load_binary_pkg(0, 0);
if (status != CL_SUCCESS) {
l_free_program(program);
if (binary_status) {
binary_status[idev] = CL_INVALID_BINARY;
}
BAIL_INFO(CL_INVALID_BINARY, context, "Invalid binary");
}
} else {
// Copy memory definition from initial device def to program in
// CL_CONTEXT_COMPILER_MODE_INTELFPGA mode.
l_device_memory_definition_copy(
&(program->dev_prog[idev]
->device_binary.get_devdef()
.autodiscovery_def),
&(program->device[idev]->def.autodiscovery_def));
}
} else {
// Release all the memory we've allocated.
l_free_program(program);
if (binary_status) {
binary_status[idev] = CL_INVALID_VALUE;
}
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate memory to store program binaries");
}
// Wait to set status until after failures may have occurred for this
// device.
if (binary_status) {
binary_status[idev] = CL_SUCCESS;
}
}
acl_retain(program->context);
acl_track_object(ACL_OBJ_PROGRAM, program);
for (idev = 0; idev < program->num_devices; idev++) {
acl_device_program_info_t *dev_prog = program->dev_prog[idev];
if (dev_prog && dev_prog->build_status == CL_BUILD_SUCCESS) {
cl_int result;
// Use a regular event with an internal command type.
// We want a regular event so we get profiling info, so that we
// have the infrastructure to measure delays, and the user
// could in principle optimize this delay away.
cl_event reprogram_event = 0;
// Just use the auto_queue. We really only support one device
// anyway.
cl_command_queue cq = program->context->auto_queue;
// Schedule an eager programming of the device.
acl_print_debug_msg(
"Device is not yet programmed: plan to eagerly program it\n");
result = acl_create_event(cq, 0, 0, // Don't wait on other events
CL_COMMAND_PROGRAM_DEVICE_INTELFPGA,
&reprogram_event);
if (result == CL_SUCCESS) {
acl_device_op_t reprogram_op;
reprogram_event->cmd.info.eager_program = &(dev_prog->device_binary);
// Try scheduling it.
reprogram_op.link = ACL_OPEN;
acl_device_op_reset_device_op(&reprogram_op);
reprogram_op.status = ACL_PROPOSED;
reprogram_op.execution_status = ACL_PROPOSED;
reprogram_op.info.type = ACL_DEVICE_OP_REPROGRAM;
reprogram_op.info.event = reprogram_event;
reprogram_op.info.index = 0;
reprogram_op.conflict_type = acl_device_op_conflict_type(&reprogram_op);
acl_program_device(NULL, &reprogram_op);
if (reprogram_op.execution_status != CL_SUCCESS) {
BAIL_INFO(CL_DEVICE_NOT_AVAILABLE, context,
"Reprogram of device failed");
}
} else {
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Invalid binary");
}
} else {
BAIL_INFO(CL_BUILD_PROGRAM_FAILURE, context,
"Program is not built correctly");
}
}
if (errcode_ret) {
*errcode_ret = CL_SUCCESS;
}
return program;
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernelsIntelFPGA(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const char *kernel_names, cl_int *errcode_ret) {
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_context_is_valid(context))
BAIL(CL_INVALID_CONTEXT);
if (num_devices == 0 || device_list == 0) {
BAIL_INFO(CL_INVALID_VALUE, context, "Invalid device list");
}
if (kernel_names == NULL) {
BAIL_INFO(CL_INVALID_VALUE, context, "kernel_names is NULL");
}
if (num_devices >= ACL_MAX_DEVICE) {
BAIL_INFO(CL_INVALID_VALUE, context,
"num_dives specified is great thatn ACL_MAX_DEVICES");
}
// list of semicolon delimited string of kernel names
std::set<std::string> kernel_names_split;
std::string token;
std::istringstream tokenStream(kernel_names);
while (std::getline(tokenStream, token, ';')) {
kernel_names_split.insert(token);
};
for (cl_uint i = 0; i < num_devices; i++) {
if (!acl_device_is_valid(device_list[i])) {
BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid device");
}
if (!acl_context_uses_device(context, device_list[i])) {
BAIL_INFO(CL_INVALID_DEVICE, context,
"Device is not associated with the context");
}
// make sure current device contains all the builtin kernels
size_t find_count = kernel_names_split.size();
for (acl_accel_def_t accel : device_list[i]->def.autodiscovery_def.accel) {
if (kernel_names_split.count(accel.iface.name)) {
// found one of the kernels the device needs to have
find_count--;
}
if (find_count == 0)
break;
}
if (find_count != 0) {
BAIL_INFO(CL_INVALID_VALUE, context,
"kernel_names contains a kernel name that is not "
"supported by all of the devices in device_list");
}
}
// Go ahead and allocate it.
cl_program program = acl_alloc_cl_program();
if (program == 0) {
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate a program object");
}
l_init_program(program, context);
// Copy devices from arguments and set status.
program->num_devices = num_devices;
for (cl_uint idev = 0; idev < num_devices; idev++) {
program->device[idev] = device_list[idev];
// Save the binary in a new acl_device_program_info_t
program->dev_prog[idev] =
l_create_dev_prog(program, device_list[idev], 0, NULL);
if (program->dev_prog[idev]) {
// i put this here since dla flow makes call to clGetProgramInfo which
// requires CL_BUILD_SUCCESS
program->dev_prog[idev]->build_status = CL_BUILD_SUCCESS;
// Copy memory definition from initial device def to program in
// CL_CONTEXT_COMPILER_MODE_INTELFPGA mode.
l_device_memory_definition_copy(
&(program->dev_prog[idev]
->device_binary.get_devdef()
.autodiscovery_def),
&(program->device[idev]->def.autodiscovery_def));
} else {
// Release all the memory we've allocated.
l_free_program(program);
BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context,
"Could not allocate memory to store program binaries");
}
}
program->uses_builtin_kernels = CL_TRUE;
acl_retain(program->context);
if (errcode_ret) {
*errcode_ret = CL_SUCCESS;
}
acl_track_object(ACL_OBJ_PROGRAM, program);
return program;
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const char *kernel_names, cl_int *errcode_ret) {
return clCreateProgramWithBuiltInKernelsIntelFPGA(
context, num_devices, device_list, kernel_names, errcode_ret);
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clCompileProgramIntelFPGA(
cl_program program, cl_uint num_devices, const cl_device_id *device_list,
const char *options, cl_uint num_input_headers,
const cl_program *input_headers, const char **header_include_names,
acl_program_build_notify_fn_t pfn_notify, void *user_data) {
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_program_is_valid(program))
return CL_INVALID_PROGRAM;
// Suppress compiler warnings.
num_devices = num_devices;
device_list = device_list;
options = options;
num_input_headers = num_input_headers;
input_headers = input_headers;
header_include_names = header_include_names;
pfn_notify = pfn_notify;
user_data = user_data;
ERR_RET(CL_COMPILER_NOT_AVAILABLE, program->context,
"Device compiler is not available");
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clCompileProgram(
cl_program program, cl_uint num_devices, const cl_device_id *device_list,
const char *options, cl_uint num_input_headers,
const cl_program *input_headers, const char **header_include_names,
acl_program_build_notify_fn_t pfn_notify, void *user_data) {
return clCompileProgramIntelFPGA(program, num_devices, device_list, options,
num_input_headers, input_headers,
header_include_names, pfn_notify, user_data);
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clLinkProgramIntelFPGA(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const char *options, cl_uint num_input_programs,
const cl_program *input_programs, acl_program_build_notify_fn_t pfn_notify,
void *user_data, cl_int *errcode_ret) {
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_context_is_valid(context))
BAIL(CL_INVALID_CONTEXT);
// For the sake of MSVC compiler warnings.
num_devices = num_devices;
device_list = device_list;
options = options;
num_input_programs = num_input_programs;
input_programs = input_programs;
pfn_notify = pfn_notify;
user_data = user_data;
BAIL_INFO(CL_LINKER_NOT_AVAILABLE, context, "Device linker is not available");
}
ACL_EXPORT
CL_API_ENTRY cl_program CL_API_CALL clLinkProgram(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const char *options, cl_uint num_input_programs,
const cl_program *input_programs, acl_program_build_notify_fn_t pfn_notify,
void *user_data, cl_int *errcode_ret) {
return clLinkProgramIntelFPGA(context, num_devices, device_list, options,
num_input_programs, input_programs, pfn_notify,
user_data, errcode_ret);
}
ACL_EXPORT
CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfoIntelFPGA(
cl_program program, cl_program_info param_name, size_t param_value_size,
void *param_value, size_t *param_value_size_ret) {
cl_context context;
acl_result_t result;
std::scoped_lock lock{acl_mutex_wrapper};
if (!acl_program_is_valid(program)) {
return CL_INVALID_PROGRAM;
}
context = program->context;
VALIDATE_ARRAY_OUT_ARGS(param_value_size, param_value, param_value_size_ret,
context);
RESULT_INIT;
switch (param_name) {
case CL_PROGRAM_REFERENCE_COUNT:
RESULT_UINT(acl_ref_count(program));
break;
case CL_PROGRAM_CONTEXT:
RESULT_PTR(program->context);
break;
case CL_PROGRAM_NUM_DEVICES:
RESULT_UINT(program->num_devices);
break;
case CL_PROGRAM_DEVICES:
RESULT_BUF(program->device,
program->num_devices * sizeof(program->device[0]));
break;
case CL_PROGRAM_SOURCE:
// Program source could be empty if the program was created from
// binary.
RESULT_BUF(program->source_text, program->source_len);
break;
case CL_PROGRAM_BINARY_SIZES: {
// Special case to copy the binary sizes.
// We don't store it in the shape that this query wants it.
// This returns early!
if (param_value_size_ret) {
*param_value_size_ret = program->num_devices * sizeof(size_t);
}
if (param_value) {
// They actually want the values
if (param_value_size < (program->num_devices * sizeof(size_t))) {
ERR_RET(CL_INVALID_VALUE, context,
"Parameter return buffer is too small");
}
for (unsigned i = 0; i < program->num_devices; i++) {
// program->dev_prog[] could be NULL if a compile failed.
auto *dev_prog = program->dev_prog[i];
((size_t *)param_value)[i] =
dev_prog ? dev_prog->device_binary.get_binary_len() : 0;
}
}
return CL_SUCCESS;
}
case CL_PROGRAM_BINARIES: {
// Special case to copy a sequence of buffers.
// This returns early!
// Put the size copyback first in case we error out later.
if (param_value_size_ret) {
*param_value_size_ret = program->num_devices * sizeof(char *);
}
if (param_value) {
// They actually want the values
unsigned char **dest = (unsigned char **)param_value;
if (param_value_size < (program->num_devices * sizeof(char *))) {
ERR_RET(CL_INVALID_VALUE, context,
"Parameter return buffer is too small");
}
for (unsigned i = 0; i < program->num_devices; ++i) {
auto *dev_prog = program->dev_prog[i];
if (dest[i] == nullptr) {
// Spec says:
// If an entry value in the array is NULL, the implementation skips
// copying the program binary for the specific device identified by
// the array index.
continue;
}
// The dev_prog or binary could be NULL if an attempted compile failed.
// But the call should still succeed.
if (dev_prog && dev_prog->device_binary.get_binary_len() > 0) {
const auto &db = dev_prog->device_binary;
// The OpenCL spec implies that the user must ensure dest[i] has
// enough allocated space to store the entire contents of the binary.
std::copy(db.get_content(), db.get_content() + db.get_binary_len(),
dest[i]);
}
}
}
return CL_SUCCESS;
}
case CL_PROGRAM_NUM_KERNELS: {
size_t kernel_cnt = 0;
char exists_built_dev_prog =
0; // a flag to indicate if any built dev_prog exists.
for (cl_uint idev = 0; idev < program->num_devices; ++idev) {
acl_device_program_info_t *dev_prog = program->dev_prog[idev];
if (dev_prog && dev_prog->build_status == CL_BUILD_SUCCESS) {
// We need to find the number of kernels on one successfully built
// dev_prog.
kernel_cnt =
context->uses_dynamic_sysdef
? program->dev_prog[idev]->get_num_kernels()
: program->device[idev]->def.autodiscovery_def.accel.size();
exists_built_dev_prog = 1;
break; // the rest, if any, will be repetitive
}
}
if (!exists_built_dev_prog)
ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context,
"A successfully built program executable was not found for any "
"device in the list of devices associated with program");
RESULT_SIZE_T(kernel_cnt);
break;
}
case CL_PROGRAM_KERNEL_NAMES: {
// Special case to copy the name of all kernels.
// This returns early!
size_t total_ret_len = 0; // we don't know the param_Value_size_ret yet.
bool exists_built_dev_prog =
0; // a flag to indicate if any built dev_prog exists.
// Go through devices in this program for which the build status is
// successful.
// First, find the total return size:
std::set<std::string> names;
for (cl_uint idev = 0; idev < program->num_devices; idev++) {
acl_device_program_info_t *dev_prog = program->dev_prog[idev];
// finding a dev_prog that is built sucessfully.
if (dev_prog && dev_prog->build_status == CL_BUILD_SUCCESS) {
if (context->uses_dynamic_sysdef) {
names = dev_prog->get_all_kernel_names();
} else {
for (const auto &a :
program->device[idev]->def.autodiscovery_def.accel) {
names.insert(a.iface.name);
}
}
exists_built_dev_prog = true;
for (const auto &n : names) {
total_ret_len +=
n.length() +
1; //+1 is for the extra semi-colon to separate names.
}
break; // The rest, if any, will be repetitive.
}
}
if (!exists_built_dev_prog)
ERR_RET(CL_INVALID_PROGRAM_EXECUTABLE, context,
"A successfully built program executable was not "
"found for any device in the list of devices "
"associated with program");
// Based on the OpenCL 1.2 CTS api test, total_ret_len must include the
// space for the null terminator.
total_ret_len = total_ret_len > 0 ? total_ret_len : 1;
if (param_value_size_ret) {
*param_value_size_ret = total_ret_len;
}
if (param_value) {
if (total_ret_len > param_value_size) {
ERR_RET(CL_INVALID_VALUE, context,
"Parameter return buffer is too small");
}
std::stringstream ss;
size_t i = 0;
for (const auto &n : names) {
ss << n;
if (i < names.size() - 1)
ss << ";";
++i;
}
auto result_str = ss.str();
assert(result_str.length() == total_ret_len - 1);
safe_memcpy(param_value, result_str.c_str(), total_ret_len, total_ret_len,
total_ret_len);
}
}
return CL_SUCCESS;
default:
ERR_RET(CL_INVALID_VALUE, context, "Invalid program info query");
}
// zero size result is valid!
if (param_value) {
if (param_value_size < result.size) {
ERR_RET(CL_INVALID_VALUE, context,
"Parameter return buffer is too small");
}
RESULT_COPY(param_value, param_value_size);
}
if (param_value_size_ret) {
*param_value_size_ret = result.size;
}
return CL_SUCCESS;