From 7a04c38c9fee07c08891e993c98b8e85a3391243 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Mon, 3 Jan 2022 17:13:06 -0500 Subject: [PATCH] parallal any/all, parallel MOM_file_parser fix A fix to opening an existing file in MOM_file_parser was added. If an open file is re-opened, then the root PE will detect this and `return`, but the others will proceed into `populate_param_data` and get stuck in a broadcast waiting for root. We fix this by gathering the logical test result over PEs as a global any() function. Leading to change #2... `any_across_PEs` and `all_across_PEs` have been added to MOM_coms as any/all implementations over PEs (i.e. MPI ranks). Since legacy FMS does not support logical collectives, we convert to integers and use min/max collectives as an equivalent test. --- config_src/infra/FMS1/MOM_coms_infra.F90 | 31 +++++++++++++++++++ src/framework/MOM_coms.F90 | 2 ++ src/framework/MOM_file_parser.F90 | 13 ++++---- .../unit_tests/MOM_file_parser_tests.F90 | 21 ++++++++++++- 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/config_src/infra/FMS1/MOM_coms_infra.F90 b/config_src/infra/FMS1/MOM_coms_infra.F90 index 555b4df119..7af5b8ac83 100644 --- a/config_src/infra/FMS1/MOM_coms_infra.F90 +++ b/config_src/infra/FMS1/MOM_coms_infra.F90 @@ -16,6 +16,7 @@ module MOM_coms_infra public :: PE_here, root_PE, num_PEs, set_rootPE, Set_PElist, Get_PElist public :: broadcast, sum_across_PEs, min_across_PEs, max_across_PEs +public :: any_across_PEs, all_across_PEs public :: field_chksum, MOM_infra_init, MOM_infra_end ! This module provides interfaces to the non-domain-oriented communication @@ -438,6 +439,36 @@ subroutine min_across_PEs_real_1d(field, length, pelist) call mpp_min(field, length, pelist) end subroutine min_across_PEs_real_1d +!> Implementation of any() intrinsic across PEs +function any_across_PEs(field, pelist) + logical, intent(in) :: field !< Local PE value + integer, optional, intent(in) :: pelist(:) !< List of PEs to work with + logical :: any_across_PEs + + integer :: field_flag + + ! FMS1 does not support logical collectives, so integer flags are used. + field_flag = 0 + if (field) field_flag = 1 + call max_across_PEs(field_flag, pelist) + any_across_PEs = (field_flag > 0) +end function any_across_PEs + +!> Implementation of all() intrinsic across PEs +function all_across_PEs(field, pelist) + logical, intent(in) :: field !< Local PE value + integer, optional, intent(in) :: pelist(:) !< List of PEs to work with + logical :: all_across_PEs + + integer :: field_flag + + ! FMS1 does not support logical collectives, so integer flags are used. + field_flag = 0 + if (field) field_flag = 1 + call min_across_PEs(field_flag, pelist) + all_across_PEs = (field_flag < 1) +end function all_across_PEs + !> Initialize the model framework, including PE communication over a designated communicator. !! If no communicator ID is provided, the framework's default communicator is used. subroutine MOM_infra_init(localcomm) diff --git a/src/framework/MOM_coms.F90 b/src/framework/MOM_coms.F90 index c3ed3ba7b3..9e4b811a46 100644 --- a/src/framework/MOM_coms.F90 +++ b/src/framework/MOM_coms.F90 @@ -7,12 +7,14 @@ module MOM_coms use MOM_coms_infra, only : PE_here, root_PE, num_PEs, set_rootPE, Set_PElist, Get_PElist use MOM_coms_infra, only : broadcast, field_chksum, MOM_infra_init, MOM_infra_end use MOM_coms_infra, only : sum_across_PEs, max_across_PEs, min_across_PEs +use MOM_coms_infra, only : all_across_PEs, any_across_PEs use MOM_error_handler, only : MOM_error, MOM_mesg, FATAL, WARNING implicit none ; private public :: PE_here, root_PE, num_PEs, MOM_infra_init, MOM_infra_end public :: broadcast, sum_across_PEs, min_across_PEs, max_across_PEs, field_chksum +public :: all_across_PEs, any_across_PEs public :: set_PElist, Get_PElist, Set_rootPE public :: reproducing_sum, reproducing_sum_EFP, EFP_sum_across_PEs, EFP_list_sum_across_PEs public :: EFP_plus, EFP_minus, EFP_to_real, real_to_EFP, EFP_real_diff diff --git a/src/framework/MOM_file_parser.F90 b/src/framework/MOM_file_parser.F90 index 00bea5c938..1e3696812a 100644 --- a/src/framework/MOM_file_parser.F90 +++ b/src/framework/MOM_file_parser.F90 @@ -13,6 +13,9 @@ module MOM_file_parser use MOM_string_functions, only : left_int, left_ints, slasher use MOM_string_functions, only : left_real, left_reals +! testing +use MOM_coms, only : any_across_PEs + implicit none ; private integer, parameter, public :: MAX_PARAM_FILES = 5 !< Maximum number of parameter files. @@ -125,7 +128,7 @@ subroutine open_param_file(filename, CS, checkable, component, doc_file_dir) !! the documentation files. The default is effectively './'. ! Local variables - logical :: file_exists, unit_in_use, Netcdf_file, may_check + logical :: file_exists, unit_in_use, Netcdf_file, may_check, reopened_file integer :: ios, iounit, strlen, i character(len=240) :: doc_path type(parameter_block), pointer :: block => NULL() @@ -140,6 +143,7 @@ subroutine open_param_file(filename, CS, checkable, component, doc_file_dir) ! Check that this file has not already been opened if (CS%nfiles > 0) then + reopened_file = .false. inquire(file=trim(filename), number=iounit) if (iounit /= -1) then do i = 1, CS%nfiles @@ -158,18 +162,15 @@ subroutine open_param_file(filename, CS, checkable, component, doc_file_dir) call assert(trim(CS%filename(1)) == trim(filename), & "open_param_file: internal inconsistency! "//trim(filename)// & " is registered as open but has the wrong unit number!") - - ! TODO: There are MPI-parallel issues here, since only one rank - ! will return from this function. The rest will have iounit = 1 - ! and will proceed to hang in populate_param_data()'s broadcast(). call MOM_error(WARNING, & "open_param_file: file "//trim(filename)// & " has already been opened. This should NOT happen!"// & " Did you specify the same file twice in a namelist?") - return + reopened_file = .true. endif ! unit numbers enddo ! i endif + if (any_across_PEs(reopened_file)) return endif ! Check that the file exists to readstdlog diff --git a/src/framework/unit_tests/MOM_file_parser_tests.F90 b/src/framework/unit_tests/MOM_file_parser_tests.F90 index 2c50194ea5..ef325d368c 100644 --- a/src/framework/unit_tests/MOM_file_parser_tests.F90 +++ b/src/framework/unit_tests/MOM_file_parser_tests.F90 @@ -23,6 +23,7 @@ module MOM_file_parser_tests use MOM_error_handler, only : is_root_pe use MOM_error_handler, only : disable_fatal_errors use MOM_error_handler, only : enable_fatal_errors +use MOM_error_handler, only : assert use MOM_jump, only : set_jump_point use mpp_mod, only : mpp_sync @@ -56,6 +57,7 @@ module MOM_file_parser_tests ! Constructor to support array initialization interface string module procedure init_string + module procedure init_string_int end interface string contains @@ -223,14 +225,19 @@ subroutine test_read_param_int type(param_file_type) :: param integer :: sample type(string) :: lines(1) + character(len=*), parameter :: sample_input = '123' + integer, parameter :: sample_result = 123 - lines = string(sample_param_name // ' = 123') + lines = string(sample_param_name // ' = ' // sample_input) call create_file(param_filename, lines) call open_param_file(param_filename, param) call read_param(param, sample_param_name, sample) call close_param_file(param) + ! TODO: Report the incorrect result + call assert(sample == sample_result, "Incorrect value") + call delete_file(param_filename) end subroutine test_read_param_int @@ -1680,6 +1687,18 @@ function init_string(c) result(str) end function init_string +function init_string_int(n) result(str) + integer, intent(in) :: n + type(string) :: str + character(bit_size(n)/8*3+1) :: chr + ! NOTE: Maximum number of characters required to write an integer + ! https://stackoverflow.com/a/10536254/317172 + + write(chr, '(i0)') n + str = string(chr) +end function init_string_int + + subroutine create_file(filename, lines, mode) character(len=*), intent(in) :: filename type(string), intent(in), optional :: lines(:)