Skip to content

Commit 04fef7d

Browse files
committed
Reviews
1 parent 59ba666 commit 04fef7d

1 file changed

Lines changed: 15 additions & 32 deletions

File tree

cuda_bindings/docs/source/overview.md

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -402,19 +402,19 @@ Putting it all together:
402402
# Define a custom type
403403
testStruct = np.dtype([("value", np.int32)], align=True)
404404

405-
# Allocate host memory
406-
pInt_host = checkCudaErrors(cudart.cudaHostAlloc(np.dtype(np.int32).itemsize, cudart.cudaHostAllocMapped))
407-
pFloat_host = checkCudaErrors(cudart.cudaHostAlloc(np.dtype(np.float32).itemsize, cudart.cudaHostAllocMapped))
408-
pStruct_host = checkCudaErrors(cudart.cudaHostAlloc(testStruct.itemsize, cudart.cudaHostAllocMapped))
405+
# Allocate device memory
406+
pInt = checkCudaErrors(cudart.cudaMalloc(np.dtype(np.int32).itemsize))
407+
pFloat = checkCudaErrors(cudart.cudaMalloc(np.dtype(np.float32).itemsize))
408+
pStruct = checkCudaErrors(cudart.cudaMalloc(testStruct.itemsize))
409409

410410
# Collect all input kernel arguments into a single tuple for further processing
411411
kernelValues = (
412412
np.array(1, dtype=np.uint32),
413-
np.array([pInt_host], dtype=np.intp),
413+
np.array([pInt], dtype=np.intp),
414414
np.array(123.456, dtype=np.float32),
415-
np.array([pFloat_host], dtype=np.intp),
415+
np.array([pFloat], dtype=np.intp),
416416
np.array([5], testStruct),
417-
np.array([pStruct_host], dtype=np.intp),
417+
np.array([pStruct], dtype=np.intp),
418418
)
419419
```
420420

@@ -444,24 +444,7 @@ checkCudaErrors(cuda.cuLaunchKernel(
444444

445445
The [ctypes](https://docs.python.org/3/library/ctypes.html) approach relaxes the parameter preparation requirement by delegating the contiguous memory requirement to the API launch call.
446446

447-
Let's use the following kernel definition as an example:
448-
```python
449-
kernel_string = """\
450-
struct testStruct {
451-
int value;
452-
};
453-
454-
extern "C" __global__
455-
void testkernel(int i, int *pi,
456-
float f, float *pf,
457-
struct testStruct s, struct testStruct *ps)
458-
{
459-
*pi = i;
460-
*pf = f;
461-
ps->value = s.value;
462-
}
463-
"""
464-
```
447+
Let's use the same kernel definition as the previous section for the example.
465448

466449
The ctypes approach treats the `kernelParams` argument as a pair of two tuples: `kernel_values` and `kernel_types`.
467450

@@ -478,19 +461,19 @@ For this example the result becomes:
478461
class testStruct(ctypes.Structure):
479462
_fields_ = [("value", ctypes.c_int)]
480463

481-
# Allocate host memory
482-
pInt_host = checkCudaErrors(cudart.cudaHostAlloc(ctypes.sizeof(ctypes.c_int), cudart.cudaHostAllocMapped))
483-
pFloat_host = checkCudaErrors(cudart.cudaHostAlloc(ctypes.sizeof(ctypes.c_float), cudart.cudaHostAllocMapped))
484-
pStruct_host = checkCudaErrors(cudart.cudaHostAlloc(ctypes.sizeof(testStruct), cudart.cudaHostAllocMapped))
464+
# Allocate device memory
465+
pInt = checkCudaErrors(cudart.cudaMalloc(ctypes.sizeof(ctypes.c_int)))
466+
pFloat = checkCudaErrors(cudart.cudaMalloc(ctypes.sizeof(ctypes.c_float)))
467+
pStruct = checkCudaErrors(cudart.cudaMalloc(ctypes.sizeof(testStruct)))
485468

486469
# Collect all input kernel arguments into a single tuple for further processing
487470
kernelValues = (
488471
1,
489-
pInt_host,
472+
pInt,
490473
123.456,
491-
pFloat_host,
474+
pFloat,
492475
testStruct(5),
493-
pStruct_host,
476+
pStruct,
494477
)
495478
kernelTypes = (
496479
ctypes.c_int,

0 commit comments

Comments
 (0)