-
Notifications
You must be signed in to change notification settings - Fork 77
Make row/colscale, select_bitmap more memory-friendly for CUDA #406
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev2
Are you sure you want to change the base?
Changes from all commits
4a03d25
76c006c
b96f496
c0d5602
290429b
5f655c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -92,21 +92,17 @@ GrB_Info GB_colscale // C = A*D, column scale with diagonal D | |
| GB_void cscalar [GB_VLA(zsize)] ; | ||
| bool C_iso = GB_AxB_iso (cscalar, A, D, A->vdim, semiring, flipxy, true) ; | ||
|
|
||
| //-------------------------------------------------------------------------- | ||
| // copy the pattern of A into C | ||
| //-------------------------------------------------------------------------- | ||
|
|
||
| // allocate C->x but do not initialize it | ||
| GB_OK (GB_dup_worker (&C, C_iso, A, false, ztype)) ; | ||
| info = GrB_NO_VALUE ; | ||
| ASSERT (C->type == ztype) ; | ||
|
|
||
| //-------------------------------------------------------------------------- | ||
| // C = A*D, column scale, compute numerical values | ||
| //-------------------------------------------------------------------------- | ||
|
|
||
| if (GB_IS_BUILTIN_BINOP_CODE_POSITIONAL (opcode)) | ||
| { | ||
| { | ||
| // Copy the pattern of A into C. Allocates, but does not initialize C->x. | ||
| GB_OK (GB_dup_worker (&C, C_iso, A, false, ztype)) ; | ||
| ASSERT (C->type == ztype) ; | ||
|
|
||
| //---------------------------------------------------------------------- | ||
| // apply a positional operator: convert C=A*D to C=op(A) | ||
|
|
@@ -157,7 +153,10 @@ GrB_Info GB_colscale // C = A*D, column scale with diagonal D | |
|
|
||
| } | ||
| else if (C_iso) | ||
| { | ||
| { | ||
| // Copy the pattern of A into C. Allocates, but does not initialize C->x. | ||
| GB_OK (GB_dup_worker (&C, C_iso, A, false, ztype)) ; | ||
| ASSERT (C->type == ztype) ; | ||
|
|
||
| //---------------------------------------------------------------------- | ||
| // via the iso kernel | ||
|
|
@@ -179,6 +178,18 @@ GrB_Info GB_colscale // C = A*D, column scale with diagonal D | |
| // determine if the values are accessed | ||
| //---------------------------------------------------------------------- | ||
|
|
||
| // Do not dup A->p, A->h into C yet; if we use CUDA, we'll do it on the | ||
| // GPU | ||
| // FIXME: Add flags to GB_dup_worker for which arrays to copy | ||
| int64_t *tmp_Ap = A->p ; | ||
| int64_t *tmp_Ah = A->h ; | ||
| A->p = NULL ; | ||
| A->h = NULL ; | ||
| GB_OK (GB_dup_worker (&C, C_iso, A, false, ztype)) ; | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hadn't considered this option for using GB_dup_worker but I see from my code that it can work. However, it might be cleaner to revise GB_dup_worker, and pass in flags that disable the copy of specific parts of the matrix, like "bool Ap_dup" or something, for each component. I can pull in this PR for now, but a "// FIXME: revise this to use flags for each component" can be added here.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense. It looks like this wouldn't be a huge change, I can make a follow-up PR for this. |
||
| A->p = tmp_Ap ; | ||
| A->h = tmp_Ah ; | ||
| ASSERT (C->type == ztype) ; | ||
|
|
||
| ASSERT (fmult != NULL) ; | ||
| bool op_is_first = (opcode == GB_FIRST_binop_code) ; | ||
| bool op_is_second = (opcode == GB_SECOND_binop_code) ; | ||
|
|
@@ -217,6 +228,38 @@ GrB_Info GB_colscale // C = A*D, column scale with diagonal D | |
| } | ||
| #endif | ||
|
|
||
| // We are using the CPU. Finish the dup from A -> C. | ||
| if (info == GrB_NO_VALUE) | ||
| { | ||
| // copy A->p, A->h into C->p, C->h | ||
| size_t A_psize = A->p_is_32 ? | ||
| sizeof (uint32_t) : sizeof (uint64_t) ; | ||
| size_t A_isize = A->i_is_32 ? | ||
| sizeof (uint32_t) : sizeof (uint64_t) ; | ||
|
|
||
| int64_t anvec = A->nvec ; | ||
| int64_t cnvec = C->nvec ; | ||
| ASSERT (cnvec == anvec) ; | ||
|
|
||
| int nthreads_max = GB_Context_nthreads_max ( ) ; | ||
|
|
||
| if (A->p != NULL) | ||
| { | ||
| size_t C_psize = C->p_is_32 ? | ||
| sizeof (uint32_t) : sizeof (uint64_t) ; | ||
|
|
||
| ASSERT (C_psize == A_psize) ; | ||
| GB_memcpy (C->p, A->p, (anvec+1) * A_psize, nthreads_max) ; | ||
| } | ||
| if (A->h != NULL) | ||
| { | ||
| size_t C_isize = C->i_is_32 ? | ||
| sizeof (uint32_t) : sizeof (uint64_t) ; | ||
|
|
||
| ASSERT (C_isize == A_isize) ; | ||
| GB_memcpy (C->h, A->h, anvec * A_isize, nthreads_max) ; | ||
| } | ||
| } | ||
| //---------------------------------------------------------------------- | ||
| // determine the number of threads to use | ||
| //---------------------------------------------------------------------- | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.