Skip to content

Commit a0178b2

Browse files
committed
fix the multi instance initialization
1 parent efa1e47 commit a0178b2

3 files changed

Lines changed: 121 additions & 104 deletions

File tree

cesm/driver/ensemble_driver.F90

Lines changed: 102 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ module Ensemble_driver
77
! esm driver and its components layed out concurently across mpi tasks.
88
!-----------------------------------------------------------------------------
99

10-
use shr_kind_mod , only : cl=>shr_kind_cl, cs=>shr_kind_cs
10+
use shr_kind_mod , only : cl=>shr_kind_cl, cs=>shr_kind_cs, cx=>shr_kind_cx
1111
use shr_log_mod , only : shr_log_setLogUnit
1212
use esm_utils_mod , only : maintask, logunit, chkerr
1313

@@ -145,10 +145,10 @@ subroutine SetModelServices(ensemble_driver, rc)
145145
integer :: pio_asyncio_stride
146146
integer :: pio_asyncio_rootpe
147147
integer :: Global_Comm
148-
character(CL) :: start_type ! Type of startup
148+
character(len=CL) :: start_type ! Type of startup
149149
character(len=7) :: drvrinst
150150
character(len=5) :: inst_suffix
151-
character(len=CL) :: msgstr
151+
character(len=CX) :: msgstr
152152
character(len=CL) :: cvalue
153153
character(len=CL) :: calendar
154154
character(len=*) , parameter :: start_type_start = "startup"
@@ -272,106 +272,114 @@ subroutine SetModelServices(ensemble_driver, rc)
272272
! here we assume that pio_asyncio_stride and pio_asyncio_ntasks are only set
273273
! if asyncio is enabled.
274274
!
275-
inst = localPet/(ntasks_per_member+pio_asyncio_ntasks) + 1
276-
277-
petcnt=1
278-
iopetcnt = 1
279-
comp_task = .false.
280-
asyncio_task = .false.
281-
! Determine pet list for driver instance
282-
if(pio_asyncio_ntasks > 0) then
283-
do n=pio_asyncio_rootpe,pio_asyncio_rootpe+pio_asyncio_stride*(pio_asyncio_ntasks-1),pio_asyncio_stride
284-
asyncio_petlist(iopetcnt) = (inst-1)*(ntasks_per_member+pio_asyncio_ntasks) + n
285-
if(asyncio_petlist(iopetcnt) == localPet) asyncio_task = .true.
286-
iopetcnt = iopetcnt+1
287-
enddo
275+
logunit = 6
276+
do inst=1,number_of_members
277+
petcnt=1
288278
iopetcnt = 1
289-
endif
290-
do n=0,ntasks_per_member+pio_asyncio_ntasks-1
279+
comp_task = .false.
280+
asyncio_task = .false.
281+
! Determine pet list for driver instance
291282
if(pio_asyncio_ntasks > 0) then
292-
if( asyncio_petlist(iopetcnt)==(inst-1)*(ntasks_per_member+pio_asyncio_ntasks) + n) then
293-
! Here if asyncio is true and this is an io task
283+
do n=pio_asyncio_rootpe,pio_asyncio_rootpe+pio_asyncio_stride*(pio_asyncio_ntasks-1),pio_asyncio_stride
284+
asyncio_petlist(iopetcnt) = (inst-1)*(ntasks_per_member+pio_asyncio_ntasks) + n
285+
if(asyncio_petlist(iopetcnt) == localPet) asyncio_task = .true.
294286
iopetcnt = iopetcnt+1
295-
else if(petcnt <= ntasks_per_member) then
296-
! Here if this is a compute task
297-
petList(petcnt) = n + (inst-1)*(ntasks_per_member + pio_asyncio_ntasks)
298-
if (petList(petcnt) == localPet) then
299-
comp_task=.true.
287+
enddo
288+
iopetcnt = 1
289+
endif
290+
do n=0,ntasks_per_member+pio_asyncio_ntasks-1
291+
if(pio_asyncio_ntasks > 0) then
292+
if( asyncio_petlist(iopetcnt)==(inst-1)*(ntasks_per_member+pio_asyncio_ntasks) + n) then
293+
! Here if asyncio is true and this is an io task
294+
iopetcnt = iopetcnt+1
295+
else if(petcnt <= ntasks_per_member) then
296+
! Here if this is a compute task
297+
petList(petcnt) = n + (inst-1)*(ntasks_per_member + pio_asyncio_ntasks)
298+
if (petList(petcnt) == localPet) then
299+
comp_task=.true.
300+
endif
301+
petcnt = petcnt+1
302+
else
303+
msgstr = "ERROR task cannot be neither a compute task nor an asyncio task"
304+
call ESMF_LogSetError(ESMF_RC_NOT_VALID, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
305+
return ! bail out
300306
endif
301-
petcnt = petcnt+1
302307
else
303-
msgstr = "ERROR task cannot be neither a compute task nor an asyncio task"
308+
! Here if asyncio is false
309+
petList(petcnt) = (inst-1)*ntasks_per_member + n
310+
if (petList(petcnt) == localPet) comp_task=.true.
311+
petcnt = petcnt+1
312+
endif
313+
enddo
314+
if(inst == localPet/(ntasks_per_member+pio_asyncio_ntasks) + 1) then
315+
if(comp_task .and. asyncio_task) then
316+
write(msgstr,*) "ERROR task cannot be both a compute task and an asyncio task", inst, petlist
317+
call ESMF_LogSetError(ESMF_RC_NOT_VALID, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
318+
return ! bail out
319+
elseif (.not. comp_task .and. .not. asyncio_task) then
320+
write(msgstr,*) "ERROR task is nether a compute task nor an asyncio task", inst, petlist
304321
call ESMF_LogSetError(ESMF_RC_NOT_VALID, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
305322
return ! bail out
306323
endif
307-
else
308-
! Here if asyncio is false
309-
petList(petcnt) = (inst-1)*ntasks_per_member + n
310-
if (petList(petcnt) == localPet) comp_task=.true.
311-
petcnt = petcnt+1
312324
endif
313-
enddo
314-
if(comp_task .and. asyncio_task) then
315-
msgstr = "ERROR task cannot be both a compute task and an asyncio task"
316-
call ESMF_LogSetError(ESMF_RC_NOT_VALID, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
317-
return ! bail out
318-
elseif (.not. comp_task .and. .not. asyncio_task) then
319-
msgstr = "ERROR task is nether a compute task nor an asyncio task"
320-
call ESMF_LogSetError(ESMF_RC_NOT_VALID, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
321-
return ! bail out
322-
endif
323-
! Add driver instance to ensemble driver
324-
write(drvrinst,'(a,i4.4)') "ESM",inst
325+
! Add driver instance to ensemble driver
326+
write(drvrinst,'(a,i4.4)') "ESM",inst
325327

326-
call NUOPC_DriverAddComp(ensemble_driver, drvrinst, ESMSetServices, petList=petList, comp=driver, rc=rc)
327-
if (chkerr(rc,__LINE__,u_FILE_u)) return
328-
write(msgstr, *) ": driver added on PETS ",petlist(1),' to ',petlist(petcnt-1)
329-
call ESMF_LogWrite(trim(subname)//msgstr)
330-
331-
maintask = .false.
332-
if (comp_task) then
333-
if(number_of_members > 1) then
334-
call NUOPC_CompAttributeAdd(driver, attrList=(/'inst_suffix'/), rc=rc)
335-
if (chkerr(rc,__LINE__,u_FILE_u)) return
336-
write(inst_suffix,'(a,i4.4)') '_',inst
337-
call NUOPC_CompAttributeSet(driver, name='inst_suffix', value=inst_suffix, rc=rc)
338-
if (chkerr(rc,__LINE__,u_FILE_u)) return
339-
else
340-
inst_suffix = ''
341-
endif
342-
343-
! Set the driver instance attributes
344-
call NUOPC_CompAttributeAdd(driver, attrList=(/'read_restart'/), rc=rc)
345-
if (chkerr(rc,__LINE__,u_FILE_u)) return
346-
call NUOPC_CompAttributeSet(driver, name='read_restart', value=trim(read_restart_string), rc=rc)
347-
if (chkerr(rc,__LINE__,u_FILE_u)) return
348-
349-
call ReadAttributes(driver, config, "CLOCK_attributes::", rc=rc)
328+
call NUOPC_DriverAddComp(ensemble_driver, drvrinst, ESMSetServices, petList=petList, comp=driver, rc=rc)
350329
if (chkerr(rc,__LINE__,u_FILE_u)) return
351-
352-
call ReadAttributes(driver, config, "DRIVER_attributes::", rc=rc)
353-
if (chkerr(rc,__LINE__,u_FILE_u)) return
354-
355-
call ReadAttributes(driver, config, "DRV_modelio::", rc=rc)
356-
if (chkerr(rc,__LINE__,u_FILE_u)) return
357-
330+
write(msgstr, *) ": driver added on PETS ",petlist(1),' to ',petlist(petcnt-1), comp_task, asyncio_task
331+
call ESMF_LogWrite(trim(subname)//msgstr)
358332
! Set the driver log to the driver task 0
359-
360-
if (localPet == petList(1)) then
361-
call NUOPC_CompAttributeGet(driver, name="diro", value=diro, rc=rc)
333+
if (comp_task) then
334+
if(number_of_members > 1) then
335+
call NUOPC_CompAttributeAdd(driver, attrList=(/'inst_suffix'/), rc=rc)
336+
if (chkerr(rc,__LINE__,u_FILE_u)) return
337+
write(inst_suffix,'(a,i4.4)') '_',inst
338+
call NUOPC_CompAttributeSet(driver, name='inst_suffix', value=inst_suffix, rc=rc)
339+
if (chkerr(rc,__LINE__,u_FILE_u)) return
340+
else
341+
inst_suffix = ''
342+
endif
343+
344+
! Set the driver instance attributes
345+
call NUOPC_CompAttributeAdd(driver, attrList=(/'read_restart'/), rc=rc)
346+
if (chkerr(rc,__LINE__,u_FILE_u)) return
347+
call NUOPC_CompAttributeSet(driver, name='read_restart', value=trim(read_restart_string), rc=rc)
348+
if (chkerr(rc,__LINE__,u_FILE_u)) return
349+
350+
call ReadAttributes(driver, config, "CLOCK_attributes::", rc=rc)
362351
if (chkerr(rc,__LINE__,u_FILE_u)) return
363-
call NUOPC_CompAttributeGet(driver, name="logfile", value=logfile, rc=rc)
352+
353+
call ReadAttributes(driver, config, "DRIVER_attributes::", rc=rc)
364354
if (chkerr(rc,__LINE__,u_FILE_u)) return
365-
open (newunit=logunit,file=trim(diro)//"/"//trim(logfile))
366-
maintask = .true.
367-
else
368-
logUnit = 6
355+
356+
call ReadAttributes(driver, config, "DRV_modelio::", rc=rc)
357+
if (chkerr(rc,__LINE__,u_FILE_u)) return
358+
write(msgStr, *) trim(subname), ' instance = ',inst, 'attributes read'
359+
call ESMF_LogWrite(msgStr)
360+
if (localPet == petList(1)) then
361+
call NUOPC_CompAttributeGet(driver, name="diro", value=diro, rc=rc)
362+
if (chkerr(rc,__LINE__,u_FILE_u)) return
363+
call NUOPC_CompAttributeGet(driver, name="logfile", value=logfile, rc=rc)
364+
if (chkerr(rc,__LINE__,u_FILE_u)) return
365+
! Multiinstance logfile name needs a correction
366+
if(len_trim(inst_suffix) > 0) then
367+
n = index(logfile, '.')
368+
logfile = logfile(1:n-1)//trim(inst_suffix)//logfile(n:)
369+
endif
370+
open (newunit=logunit,file=trim(diro)//"/"//trim(logfile))
371+
maintask = .true.
372+
endif
373+
369374
endif
370375
call shr_log_setLogUnit (logunit)
371-
endif
372-
! Create a clock for each driver instance
373-
call esm_time_clockInit(ensemble_driver, driver, logunit, maintask, rc)
374-
if (chkerr(rc,__LINE__,u_FILE_u)) return
376+
! Create a clock for each driver instance
377+
378+
call esm_time_clockInit(ensemble_driver, driver, logunit, maintask, rc)
379+
if (chkerr(rc,__LINE__,u_FILE_u)) return
380+
381+
enddo
382+
inst = localPet/(ntasks_per_member+pio_asyncio_ntasks) + 1
375383

376384
deallocate(petList)
377385
call t_stopf(subname)
@@ -400,6 +408,8 @@ subroutine InitializeIO(ensemble_driver, rc)
400408
integer :: drv
401409
integer :: PetCount
402410
integer :: key, color, i
411+
type(ESMF_GridComp) :: driver
412+
character(len=7) :: drvrinst
403413
character(len=8) :: compname
404414

405415
rc = ESMF_SUCCESS
@@ -422,22 +432,19 @@ subroutine InitializeIO(ensemble_driver, rc)
422432
else
423433
Instance_Comm = Global_Comm
424434
endif
425-
nullify(dcomp)
426-
call NUOPC_DriverGetComp(ensemble_driver, complist=dcomp, rc=rc)
427-
if (chkerr(rc,__LINE__,u_FILE_u)) return
428-
if (chkerr(rc,__LINE__,u_FILE_u)) return
429-
call NUOPC_CompGet(dcomp(1), name=compname, rc=rc)
435+
write(drvrinst,'(a,i4.4)') "ESM",inst
436+
call NUOPC_DriverGetComp(ensemble_driver, drvrinst, comp=driver, rc=rc)
430437
if (chkerr(rc,__LINE__,u_FILE_u)) return
438+
431439
call ESMF_LogWrite(trim(subname)//": call driver_pio_init "//compname, ESMF_LOGMSG_INFO)
432-
call driver_pio_init(dcomp(1), rc=rc)
440+
call driver_pio_init(driver, rc=rc)
433441
if (chkerr(rc,__LINE__,u_FILE_u)) return
434442

435443
call ESMF_LogWrite(trim(subname)//": call driver_pio_component_init "//compname, ESMF_LOGMSG_INFO)
436-
call driver_pio_component_init(dcomp(1), Instance_Comm, asyncio_petlist, rc)
444+
call driver_pio_component_init(driver, Instance_Comm, asyncio_petlist, rc)
437445
if (chkerr(rc,__LINE__,u_FILE_u)) return
438446
call ESMF_LogWrite(trim(subname)//": driver_pio_component_init done "//compname, ESMF_LOGMSG_INFO)
439447

440-
deallocate(dcomp)
441448
deallocate(asyncio_petlist)
442449
call ESMF_LogWrite(trim(subname)//": done", ESMF_LOGMSG_INFO)
443450
end subroutine InitializeIO

cesm/driver/esm.F90

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,14 +610,14 @@ subroutine AddAttributes(gcomp, driver, config, compid, compname, inst_suffix, n
610610
character(len=*) , intent(in) :: inst_suffix
611611
integer , intent(in) :: nthrds
612612
integer , intent(inout) :: rc
613-
614613
! local variables
615614
integer :: inst_index
615+
logical :: computetask
616616
character(len=CL) :: cvalue
617617
character(len=CS) :: attribute
618618
character(len=*), parameter :: subname = "(esm.F90:AddAttributes)"
619619
!-------------------------------------------
620-
620+
computetask = .false.
621621
rc = ESMF_Success
622622
call ESMF_LogWrite(trim(subname)//": called", ESMF_LOGMSG_INFO)
623623
call shr_log_setLogunit(logunit)
@@ -635,6 +635,10 @@ subroutine AddAttributes(gcomp, driver, config, compid, compname, inst_suffix, n
635635
! Add driver restart flag to gcomp attributes
636636
!------
637637
attribute = 'read_restart'
638+
call NUOPC_CompAttributeGet(driver, name=trim(attribute), isPresent=computetask, rc=rc)
639+
if (chkerr(rc,__LINE__,u_FILE_u)) return
640+
if(.not. computetask) return
641+
638642
call NUOPC_CompAttributeGet(driver, name=trim(attribute), value=cvalue, rc=rc)
639643
if (chkerr(rc,__LINE__,u_FILE_u)) return
640644
call NUOPC_CompAttributeAdd(gcomp, (/trim(attribute)/), rc=rc)
@@ -649,6 +653,9 @@ subroutine AddAttributes(gcomp, driver, config, compid, compname, inst_suffix, n
649653
if (chkerr(rc,__LINE__,u_FILE_u)) return
650654
call ReadAttributes(gcomp, config, "ALLCOMP_attributes::", rc=rc)
651655
if (chkerr(rc,__LINE__,u_FILE_u)) return
656+
657+
call ESMF_LogWrite(trim(subname)//": call Readattributes for"//trim(compname), ESMF_LOGMSG_INFO)
658+
652659
call ReadAttributes(gcomp, config, trim(compname)//"_modelio::", rc=rc)
653660
if (chkerr(rc,__LINE__,u_FILE_u)) then
654661
print *,__FILE__,__LINE__,"ERROR reading ",trim(compname)," modelio from runconfig"

cesm/nuopc_cap_share/driver_pio_mod.F90

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ subroutine driver_pio_init(driver, rc)
173173

174174
end subroutine driver_pio_init
175175

176-
subroutine driver_pio_component_init(driver, Inst_comm, asyncio_petlist, rc)
176+
subroutine driver_pio_component_init(driver, inst_comm, asyncio_petlist, rc)
177177
use ESMF, only : ESMF_GridComp, ESMF_LogSetError, ESMF_RC_NOT_VALID, ESMF_GridCompIsCreated, ESMF_VM, ESMF_VMGet
178178
use ESMF, only : ESMF_GridCompGet, ESMF_GridCompIsPetLocal, ESMF_VMIsCreated, ESMF_Finalize, ESMF_PtrInt1D
179179
use ESMF, only : ESMF_LOGMSG_INFO, ESMF_LOGWRITE
@@ -182,8 +182,8 @@ subroutine driver_pio_component_init(driver, Inst_comm, asyncio_petlist, rc)
182182
use mpi, only : MPI_INTEGER, MPI_MAX, MPI_IN_PLACE, MPI_LOR, MPI_LOGICAL
183183

184184
type(ESMF_GridComp) :: driver
185-
integer, intent(in) :: Inst_comm ! The communicator associated with the ensemble_driver
186185
integer, intent(in) :: asyncio_petlist(:)
186+
integer, intent(in) :: Inst_comm ! The communicator associated with the driver
187187
integer, intent(out) :: rc
188188

189189
type(ESMF_VM) :: vm
@@ -195,6 +195,7 @@ subroutine driver_pio_component_init(driver, Inst_comm, asyncio_petlist, rc)
195195
integer, allocatable :: io_proc_list(:), asyncio_tasks(:), comp_proc_list(:,:)
196196

197197
type(ESMF_GridComp), pointer :: gcomp(:)
198+
198199
character(CS) :: cval
199200
character(CS) :: msgstr
200201
integer :: do_async_init
@@ -221,30 +222,32 @@ subroutine driver_pio_component_init(driver, Inst_comm, asyncio_petlist, rc)
221222
asyncio_ntasks = size(asyncio_petlist)
222223

223224
call shr_log_getLogUnit(logunit)
224-
call ESMF_LogWrite(trim(subname)//": called", ESMF_LOGMSG_INFO)
225+
call ESMF_LogWrite(trim(subname)//": called", ESMF_LOGMSG_INFO, rc=rc)
225226
if (chkerr(rc,__LINE__,u_FILE_u)) return
226227

227228
call MPI_Comm_rank(Inst_comm, myid, rc)
228229
call MPI_Comm_size(Inst_comm, totalpes, rc)
230+
229231
asyncio_task=.false.
230232

231233
do i=1,asyncio_ntasks
232234
! asyncio_petlist is in
233-
if(modulo(asyncio_petlist(i), totalpes) == myid) then
235+
if(asyncio_petlist(i) == myid) then
234236
asyncio_task = .true.
235237
exit
236238
endif
237239
enddo
240+
write(msgstr,*) 'asyncio_task = ', asyncio_task, myid, asyncio_petlist
241+
call ESMF_LogWrite(trim(subname)//msgstr, ESMF_LOGMSG_INFO, rc=rc)
238242
nullify(gcomp)
239243
nullify(petLists)
240244
if (.not. asyncio_task) then
241245
call ESMF_GridCompGet(gridcomp=driver, vm=vm, rc=rc)
242246
if (chkerr(rc,__LINE__,u_FILE_u)) return
243-
244-
call NUOPC_DriverGetComp(driver, compList=gcomp, petLists=petLists, rc=rc)
245-
if (chkerr(rc,__LINE__,u_FILE_u)) return
246247
call ESMF_VMGet(vm, localPet=driver_myid, rc=rc)
247248
if (chkerr(rc,__LINE__,u_FILE_u)) return
249+
call NUOPC_DriverGetComp(driver, compList=gcomp, petLists=petLists, rc=rc)
250+
if (chkerr(rc,__LINE__,u_FILE_u)) return
248251
endif
249252
if(associated(gcomp)) then
250253
total_comps = size(gcomp)

0 commit comments

Comments
 (0)