1- from firedrake .preconditioners .assembled import AssembledPC
1+ from firedrake .preconditioners .assembled import PCBase
22from firedrake .petsc import PETSc
3- from firedrake .utils import device_matrix_type
4- from firedrake .logging import logger
5- from functools import cache
6- import warnings
3+ from firedrake .utils import device_matrix_type , get_device_type
74
85import firedrake .dmhooks as dmhooks
96
107__all__ = ("OffloadPC" ,)
118
129
13- @cache
14- def offload_mat_type (pc_comm_rank ) -> str | None :
15- mat_type = device_matrix_type ()
16- if mat_type is None :
17- if pc_comm_rank == 0 :
18- warnings .warn (
19- "This installation of Firedrake is not GPU-enabled, therefore OffloadPC"
20- "will do nothing. For this preconditioner to function correctly PETSc"
21- "will need to be rebuilt with some GPU capability (e.g. '--with-cuda=1')."
22- )
23- return None
24- try :
25- dev = PETSc .Device .create ()
26- except PETSc .Error :
27- if pc_comm_rank == 0 :
28- logger .warning (
29- "This installation of Firedrake is GPU-enabled, but no GPU device has"
30- "been detected. OffloadPC will do nothing on this host"
31- )
32- return None
33- if dev .getDeviceType () == "HOST" :
34- raise RuntimeError (
35- "A GPU-enabled Firedrake build has been detected, and GPU hardware has been"
36- "detected but a GPU device was unable to be initialised."
37- )
38- dev .destroy ()
39- return mat_type
10+ _device_vector_impls = {
11+ "CUDA" : {
12+ "createWithArrays" : "createCUDAWithArrays" ,
13+ }
14+ }
4015
4116
42- class OffloadPC (AssembledPC ):
17+ class OffloadPC (PCBase ):
4318 """Offload PC from CPU to GPU and back.
4419
4520 Internally this makes a PETSc PC object that can be controlled by
@@ -50,44 +25,84 @@ class OffloadPC(AssembledPC):
5025
5126 def initialize (self , pc ):
5227 # Check if our PETSc installation is GPU enabled
53- super ().initialize (pc )
54- self .offload_mat_type = offload_mat_type (pc .comm .rank )
55- if self .offload_mat_type is not None :
28+
29+ A , P = pc .getOperators ()
30+
31+ if pc .type != "python" :
32+ raise ValueError ("Expecting PC type python" )
33+ opc = pc
34+ if P .type == "python" :
35+ context = P .getPythonContext ()
36+ # It only makes sense to preconditioner/invert a diagonal
37+ # block in general. That's all we're going to allow.
38+ if not context .on_diag :
39+ raise ValueError ("Only makes sense to invert diagonal block" )
40+
41+ prefix = pc .getOptionsPrefix () or ""
42+ options_prefix = prefix + self ._prefix
43+
44+ self .device_mat = device_matrix_type (pc .comm .rank == 0 )
45+ dm = opc .getDM ()
46+
47+ pc = PETSc .PC ().create (comm = opc .comm )
48+ pc .setDM (dm )
49+ pc .setOptionsPrefix (options_prefix )
50+ if self .device_mat is not None :
5651 with PETSc .Log .Event ("Event: initialize offload" ):
57- A , P = pc .getOperators ()
52+ P_dev = P .convert (mat_type = self .device_mat )
53+ A_dev = A .convert (mat_type = self .device_mat )
54+ P_dev .setNullSpace (P .getNullSpace ())
55+ P_dev .setTransposeNullSpace (P .getTransposeNullSpace ())
56+ P_dev .setNearNullSpace (P .getNearNullSpace ())
57+ self .vector_impls = _device_vector_impls [get_device_type ()]
58+ pc .setOperators (A_dev , P_dev )
59+ else :
60+ pc .setOperators (A , P )
5861
59- # Convert matrix to ajicusparse
60- with PETSc .Log .Event ("Event: matrix offload" ):
61- P_cu = P .convert (self .offload_mat_type ) # todo
62+ # Simplest reconstruction we can manage
63+ octx = dmhooks .get_appctx (dm )
64+ self ._ctx_ref = octx .reconstruct (
65+ problem = None , mat_type = self .device_mat , pmat_type = self .device_mat
66+ )
67+ self .pc = pc
6268
63- # Transfer nullspace
64- P_cu .setNullSpace (P .getNullSpace ())
65- P_cu .setTransposeNullSpace (P .getTransposeNullSpace ())
66- P_cu .setNearNullSpace (P .getNearNullSpace ())
69+ with dmhooks .add_hooks (dm , self , appctx = self ._ctx_ref , save = False ):
70+ self .pc .setFromOptions ()
6771
68- # Update preconditioner with GPU matrix
69- self .pc .setOperators (A , P_cu )
72+ def update (self , pc ):
73+ A , P = pc .getOperators ()
74+ A_dev , P_dev = self .pc .getOperators ()
75+ P .copy (P_dev )
76+ A .copy (A_dev )
7077
7178 # Convert vectors to CUDA, solve and get solution on CPU back
7279 def apply (self , pc , x , y ):
73- if self .offload_mat_type is None :
80+ if self .device_mat is None :
7481 self .pc .apply (x , y )
7582 else :
7683 with PETSc .Log .Event ("Event: apply offload" ): #
7784 dm = pc .getDM ()
7885 with dmhooks .add_hooks (dm , self , appctx = self ._ctx_ref ):
7986 with PETSc .Log .Event ("Event: vectors offload" ):
80- y_cu = PETSc .Vec () # begin
81- y_cu .createCUDAWithArrays (y )
82- x_cu = PETSc .Vec ()
83- # Passing a vec into another vec doesnt work because original is locked
84- x_cu .createCUDAWithArrays (x .array_r )
87+ # Create the to-be-offloaded vector
88+ y_dev = PETSc .Vec ()
89+ # Use device implementation of 'createWithArrays' function
90+ getattr (y_dev , self .vector_impls ["createWithArrays" ])(
91+ y .array_r , None
92+ )
93+ # Create the to-be-offloaded vector
94+ x_dev = PETSc .Vec ()
95+ # Use device implementation of 'createWithArrays' function
96+ getattr (x_dev , self .vector_impls ["createWithArrays" ])(
97+ x .array_r , None
98+ )
8599 with PETSc .Log .Event ("Event: solve" ):
86- self .pc .apply (x_cu , y_cu )
87- # Calling data to synchronize vector
88- tmp = y_cu .array_r # noqa: F841
89- with PETSc .Log .Event ("Event: vectors copy back" ):
90- y .copy (y_cu ) #
100+ self .pc .apply (x_dev , y_dev )
101+ with PETSc .Log .Event ("Event: vectors copy back" ):
102+ # y is already designated as host storage for y_dev, so calling
103+ # getArray is sufficient to synchronise the vector on the device
104+ # with y on the host
105+ y_dev .getArray (True )
91106
92107 def applyTranspose (self , pc , X , Y ):
93108 raise NotImplementedError
0 commit comments