Skip to content

Commit

Permalink
Merge pull request #305 from jedwards4b/jedwards/asyncio
Browse files Browse the repository at this point in the history
first step - reorder pio_init and move to ensemble_driver
### Description of changes
Add an InitializeIO phase to the ensemble_driver, this allows ESMF to control the ASYNCIO tasks internally. 


### Specific notes
It requires however that components do not do IO initialization until the realize phase so the cice and mosart component PRs: ESCOMP/MOSART#55
ESCOMP/CICE#18 must be merged first.
  • Loading branch information
jedwards4b authored Oct 7, 2022
2 parents 962e753 + cdbd5c1 commit 5157de0
Show file tree
Hide file tree
Showing 43 changed files with 2,030 additions and 393 deletions.
202 changes: 176 additions & 26 deletions cesm/driver/ensemble_driver.F90
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ module Ensemble_driver

public :: SetServices
private :: SetModelServices
private :: ensemble_finalize

integer, allocatable :: asyncio_petlist(:)
logical :: asyncio_task=.false.
logical :: asyncIO_available=.false.
character(*),parameter :: u_FILE_u = &
__FILE__

Expand All @@ -27,9 +31,12 @@ module Ensemble_driver

subroutine SetServices(ensemble_driver, rc)

use NUOPC , only : NUOPC_CompDerive, NUOPC_CompSpecialize
use NUOPC , only : NUOPC_CompDerive, NUOPC_CompSpecialize, NUOPC_CompAttributeSet
use NUOPC , only : NUOPC_CompAttributeGet
use NUOPC_Driver , only : driver_routine_SS => SetServices
use NUOPC_Driver , only : ensemble_label_SetModelServices => label_SetModelServices
use NUOPC_Driver , only : ensemble_label_ModifyCplLists => label_ModifyCplLists
use NUOPC_Driver, only : label_Finalize
use ESMF , only : ESMF_GridComp, ESMF_GridCompSet
use ESMF , only : ESMF_Config, ESMF_ConfigCreate, ESMF_ConfigLoadFile
use ESMF , only : ESMF_SUCCESS, ESMF_LogWrite, ESMF_LOGMSG_INFO
Expand All @@ -39,7 +46,8 @@ subroutine SetServices(ensemble_driver, rc)

! local variables
type(ESMF_Config) :: config
character(len=*), parameter :: subname = "(ensemble_driver.F90:SetServices)"
logical :: isPresent ! Check to see if InitializeDataResolution attribute is available
character(len=*), parameter :: subname = '('//__FILE__//':SetServices)'
!---------------------------------------

rc = ESMF_SUCCESS
Expand All @@ -54,6 +62,14 @@ subroutine SetServices(ensemble_driver, rc)
specRoutine=SetModelServices, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! ModifyCplLists is a NUOPC specialization which happens after Advertize but before Realize
! We have overloaded this specialization location to initilize IO.
! So after all components have called Advertise but before any component calls Realize
! IO will be initialized and any async IO tasks will be split off to the PIO async IO driver.
call NUOPC_CompSpecialize(ensemble_driver, specLabel=ensemble_label_ModifyCplLists, &
specRoutine=InitializeIO, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! Create, open and set the config
config = ESMF_ConfigCreate(rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
Expand All @@ -64,6 +80,25 @@ subroutine SetServices(ensemble_driver, rc)
call ESMF_GridCompSet(ensemble_driver, config=config, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! NUOPC component drivers end the initialization process with an internal call to InitializeDataResolution.
! The ensemble_driver does not need to InitializeDataResolution and doing so will cause a hang
! if asyncronous IO is used. This attribute is available after ESMF8.4.0b03 to toggle that control.
! Cannot use asyncIO with older ESMF versions.
call NUOPC_CompAttributeGet(ensemble_driver, name="InitializeDataResolution", &
isPresent=isPresent, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

if(isPresent) then
call NUOPC_CompAttributeSet(ensemble_driver, name="InitializeDataResolution", value="false", rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
asyncIO_available = .true.
endif
! Set a finalize method, it calls pio_finalize
call NUOPC_CompSpecialize(ensemble_driver, specLabel=label_Finalize, &
specRoutine=ensemble_finalize, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return


call ESMF_LogWrite(trim(subname)//": done", ESMF_LOGMSG_INFO)

end subroutine SetServices
Expand All @@ -90,22 +125,27 @@ subroutine SetModelServices(ensemble_driver, rc)

! local variables
type(ESMF_VM) :: vm
type(ESMF_GridComp) :: driver, gridcomptmp
type(ESMF_GridComp) :: driver
type(ESMF_Config) :: config
integer :: n, n1, stat
integer, pointer :: petList(:)
character(len=20) :: model, prefix
integer :: petCount, i
integer :: petCount, i, k
integer :: localPet
logical :: is_set
character(len=512) :: diro
character(len=512) :: logfile
integer :: global_comm
logical :: read_restart
logical :: comp_task
character(len=CS) :: read_restart_string
integer :: inst
integer :: currentpet, petcnt, iopetcnt
integer :: number_of_members
integer :: ntasks_per_member
integer :: pio_asyncio_ntasks
integer :: pio_asyncio_stride
integer :: pio_asyncio_rootpe
character(CL) :: start_type ! Type of startup
character(len=7) :: drvrinst
character(len=5) :: inst_suffix
Expand All @@ -115,7 +155,7 @@ subroutine SetModelServices(ensemble_driver, rc)
character(len=*) , parameter :: start_type_start = "startup"
character(len=*) , parameter :: start_type_cont = "continue"
character(len=*) , parameter :: start_type_brnch = "branch"
character(len=*) , parameter :: subname = "(ensemble_driver.F90:SetModelServices)"
character(len=*), parameter :: subname = '('//__FILE__//':SetModelServices)'
!-------------------------------------------

rc = ESMF_SUCCESS
Expand Down Expand Up @@ -168,6 +208,8 @@ subroutine SetModelServices(ensemble_driver, rc)
write(read_restart_string,*) read_restart

! Add read_restart to ensemble_driver attributes

call ESMF_LogWrite(trim(subname)//": set read_restart "//trim(read_restart_string), ESMF_LOGMSG_INFO)
call NUOPC_CompAttributeAdd(ensemble_driver, attrList=(/'read_restart'/), rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
call NUOPC_CompAttributeSet(ensemble_driver, name='read_restart', value=trim(read_restart_string), rc=rc)
Expand All @@ -187,40 +229,93 @@ subroutine SetModelServices(ensemble_driver, rc)
call NUOPC_CompAttributeGet(ensemble_driver, name="ninst", value=cvalue, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) number_of_members

call NUOPC_CompAttributeGet(ensemble_driver, name="pio_asyncio_ntasks", value=cvalue, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) pio_asyncio_ntasks

call NUOPC_CompAttributeGet(ensemble_driver, name="pio_asyncio_stride", value=cvalue, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) pio_asyncio_stride

call NUOPC_CompAttributeGet(ensemble_driver, name="pio_asyncio_rootpe", value=cvalue, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
read(cvalue,*) pio_asyncio_rootpe

call ESMF_VMGet(vm, localPet=localPet, PetCount=PetCount, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

ntasks_per_member = PetCount/number_of_members - pio_asyncio_ntasks

ntasks_per_member = PetCount/number_of_members
if(ntasks_per_member*number_of_members .ne. PetCount) then
if(ntasks_per_member*number_of_members .ne. (PetCount - pio_asyncio_ntasks)) then
write (msgstr,'(a,i5,a,i3,a,i3,a)') &
"PetCount (",PetCount,") must be evenly divisable by number of members (",number_of_members,")"
"PetCount - Async IOtasks (",PetCount-pio_asyncio_ntasks,") must be evenly divisable by number of members (",number_of_members,")"
call ESMF_LogSetError(ESMF_RC_ARG_BAD, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
return
endif

if(pio_asyncio_ntasks > 0 .and. .not. asyncIO_available) then
call ESMF_LogSetError(ESMF_RC_ARG_BAD, msg="AsyncIO requires ESMF version 8.4.0b03 or newer", line=__LINE__, file=__FILE__, rcToReturn=rc)
return
endif

!-------------------------------------------
! Loop over number of ensemblel members
!-------------------------------------------

allocate(petList(ntasks_per_member))
! Create an asyncio petlist (a list of Pets who will be dedicated to IO). All components
! with async IO enabled will use these IO PETS. If stride = MPI_TASKS_PER_NODE then there will
! be one IO task per node.
allocate(asyncio_petlist(pio_asyncio_ntasks))
iopetcnt = 1
currentPet = 0

do n=1,pio_asyncio_ntasks
asyncio_petlist(n) = pio_asyncio_rootpe + (n-1)*pio_asyncio_stride
if (localPet == asyncio_petlist(n)) asyncio_task = .true.
enddo

k = 1
do inst=1,number_of_members

petcnt=1
comp_task = .false.
! Determine pet list for driver instance
petList(1) = (inst-1) * ntasks_per_member
do n=2,ntasks_per_member
petList(n) = petList(n-1) + 1
do n=1,ntasks_per_member+pio_asyncio_ntasks
if(pio_asyncio_stride == 0) then
petList(petcnt) = currentpet
petcnt = petcnt+1
if (currentpet == localPet) comp_task=.true.
else if(pio_asyncio_stride == 1) then
if (currentpet < asyncio_petlist(1) .or. currentpet > asyncio_petlist(pio_asyncio_ntasks)) then
petList(petcnt) = currentpet
petcnt = petcnt+1
if (currentpet == localPet) comp_task=.true.
endif
else if (currentpet .ne. asyncio_petlist(k)) then
petList(petcnt) = currentpet
petcnt = petcnt+1
if (currentpet == localPet) comp_task=.true.
else if (currentpet == asyncio_petlist(k)) then
k = modulo(k,pio_asyncio_ntasks) + 1
endif
currentpet = currentpet + 1
enddo

if(asyncio_task .and. comp_task) then
call ESMF_LogSetError(ESMF_RC_ARG_BAD, msg="task is set as both a compute task and an asyncio task", line=__LINE__, file=__FILE__, rcToReturn=rc)
return
endif
! Add driver instance to ensemble driver
write(drvrinst,'(a,i4.4)') "ESM",inst
call NUOPC_DriverAddComp(ensemble_driver, drvrinst, ESMSetServices, petList=petList, comp=gridcomptmp, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

if (localpet >= petlist(1) .and. localpet <= petlist(ntasks_per_member)) then

driver = gridcomptmp
call NUOPC_DriverAddComp(ensemble_driver, drvrinst, ESMSetServices, petList=petList, comp=driver, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) then
write(msgstr,*) 'size(petList):', size(petList), ' petcnt:', petcnt, ' petList: ',petList
call ESMF_LogSetError(ESMF_RC_ARG_BAD, msg=msgstr, line=__LINE__, file=__FILE__, rcToReturn=rc)
return
endif
mastertask = .false.
if (comp_task) then

if(number_of_members > 1) then
call NUOPC_CompAttributeAdd(driver, attrList=(/'inst_suffix'/), rc=rc)
Expand Down Expand Up @@ -248,7 +343,7 @@ subroutine SetModelServices(ensemble_driver, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

! Set the driver log to the driver task 0
if (mod(localPet, ntasks_per_member) == 0) then
if (petList(1) == localPet) then
call NUOPC_CompAttributeGet(driver, name="diro", value=diro, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
call NUOPC_CompAttributeGet(driver, name="logfile", value=logfile, rc=rc)
Expand All @@ -257,21 +352,76 @@ subroutine SetModelServices(ensemble_driver, rc)
mastertask = .true.
else
logUnit = shrlogunit
mastertask = .false.
endif
call shr_file_setLogUnit (logunit)

! Create a clock for each driver instance
call esm_time_clockInit(ensemble_driver, driver, logunit, mastertask, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

endif
! Create a clock for each driver instance
call esm_time_clockInit(ensemble_driver, driver, logunit, mastertask, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
enddo

deallocate(petList)

call ESMF_LogWrite(trim(subname)//": done", ESMF_LOGMSG_INFO)

end subroutine SetModelServices

subroutine InitializeIO(ensemble_driver, rc)
use ESMF, only: ESMF_GridComp, ESMF_LOGMSG_INFO, ESMF_LogWrite
use ESMF, only: ESMF_SUCCESS, ESMF_VM, ESMF_GridCompGet, ESMF_VMGet
use ESMF, only: ESMF_CONFIG, ESMF_GridCompIsPetLocal, ESMF_State, ESMF_Clock
use NUOPC, only: NUOPC_CompAttributeGet, NUOPC_CompGet
use NUOPC_DRIVER, only: NUOPC_DriverGetComp
use driver_pio_mod , only: driver_pio_init, driver_pio_component_init

type(ESMF_GridComp) :: ensemble_driver
type(ESMF_VM) :: ensemble_vm
integer, intent(out) :: rc
character(len=*), parameter :: subname = '('//__FILE__//':InitializeIO)'
type(ESMF_GridComp), pointer :: dcomp(:), ccomp(:)
integer :: iam
integer :: Global_Comm
integer :: drv, comp
character(len=8) :: compname

rc = ESMF_SUCCESS
call ESMF_LogWrite(trim(subname)//": called", ESMF_LOGMSG_INFO)

call ESMF_GridCompGet(ensemble_driver, vm=ensemble_vm, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

call ESMF_VMGet(ensemble_vm, localpet=iam, mpiCommunicator=Global_Comm, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

nullify(dcomp)
call NUOPC_DriverGetComp(ensemble_driver, complist=dcomp, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

do drv=1,size(dcomp)
if (ESMF_GridCompIsPetLocal(dcomp(drv), rc=rc) .or. asyncio_task) then
if (chkerr(rc,__LINE__,u_FILE_u)) return
call NUOPC_CompGet(dcomp(drv), name=compname, rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
call ESMF_LogWrite(trim(subname)//": call shr_pio_init "//compname, ESMF_LOGMSG_INFO)
call driver_pio_init(dcomp(drv), rc=rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return

call ESMF_LogWrite(trim(subname)//": call shr_pio_component_init "//compname, ESMF_LOGMSG_INFO)
call driver_pio_component_init(dcomp(drv), Global_Comm, asyncio_petlist, rc)
if (chkerr(rc,__LINE__,u_FILE_u)) return
call ESMF_LogWrite(trim(subname)//": shr_pio_component_init done "//compname, ESMF_LOGMSG_INFO)
endif
enddo
deallocate(asyncio_petlist)
call ESMF_LogWrite(trim(subname)//": done", ESMF_LOGMSG_INFO)
end subroutine InitializeIO

subroutine ensemble_finalize(ensemble_driver, rc)
use ESMF, only : ESMF_GridComp, ESMF_SUCCESS
use shr_pio_mod, only: shr_pio_finalize
type(ESMF_GridComp) :: Ensemble_driver
integer, intent(out) :: rc
rc = ESMF_SUCCESS
call shr_pio_finalize()

end subroutine ensemble_finalize
end module Ensemble_driver
Loading

0 comments on commit 5157de0

Please sign in to comment.