Skip to content

Commit

Permalink
Minor fixes (#186)
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanhenneking committed Aug 7, 2024
1 parent fcd80dd commit 8ca538c
Show file tree
Hide file tree
Showing 13 changed files with 325 additions and 176 deletions.
52 changes: 31 additions & 21 deletions trunk/m_options_files/custom/m_options_TACC_intel19
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,28 @@ HP3D_BASE_PATH = ${WORK2}/hp3d/trunk
HP3D_COMPLEX ?= 0
HP3D_DEBUG ?= 0

HP3D_USE_INTEL_MKL ?= 1
HP3D_USE_MPI_F08 ?= 1
HP3D_USE_OPENMP ?= 1
HP3D_USE_X11 ?= 1

#------------------------------------------------------------------
# COMPILER SETTINGS (a 64bit processor architecture is assumed)
#------------------------------------------------------------------

# Enable OpenMP threading
HP3D_USE_OPENMP ?= 1

# Enable performance profiling with TAU
TAU_PERF ?= 0

# Use parallel threads to compile libraries and problems
J ?= 28

# Compiler
# Tau performance tool
ifeq ($(TAU_PERF),YES)
FC = tau_f90.sh
FF = tau_f90.sh
CC = tau_cc.sh
else
FC = mpif90
FF = mpif90
CC = mpicc
endif
FC = $(FF)
FFLAGS = -fPIC

# Additional Intel compiler flags
Expand Down Expand Up @@ -60,6 +60,11 @@ CFLAGS =
# (-DAdd_ or -DAdd__ or -DUPPER)
CDEFS = -DAdd_

# Utility used to launch MPI jobs
HP3D_MPIEXEC = ibrun
HP3D_MPIEXEC_NP = -n
HP3D_MPI_NP = 2

#------------------------------------------------------------------
# INTEL MKL LIBRARY
#------------------------------------------------------------------
Expand All @@ -72,6 +77,7 @@ MKL_LIBS = ${MKLROOT}/lib/intel64/libmkl_blas95_lp64.a \
${MKLROOT}/lib/intel64/libmkl_lapack95_lp64.a \
${MKLROOT}/lib/intel64/libmkl_scalapack_lp64.a \
-Wl,--start-group \
${MKLROOT}/lib/intel64/libmkl_cdft_core.a \
${MKLROOT}/lib/intel64/libmkl_intel_lp64.a \
${MKLROOT}/lib/intel64/libmkl_intel_thread.a \
${MKLROOT}/lib/intel64/libmkl_core.a \
Expand All @@ -91,16 +97,18 @@ MKL_INCS = -I${MKLROOT}/include/intel64/lp64 -I${MKLROOT}/include
# also adding Zoltan library dependence
# also adding PETSC library dependence (petsc needs phdf5)

PETSC_DIR = /home1/apps/intel19/impi19_0/petsc/3.15

ifeq ($(HP3D_COMPLEX),1)
TACC_PETSC32_LIB = /home1/apps/intel19/impi19_0/petsc/3.15/clx-complex/lib
TACC_PETSC32_INC = /home1/apps/intel19/impi19_0/petsc/3.15/clx-complex/include
TACC_PETSC64_LIB = /home1/apps/intel19/impi19_0/petsc/3.15/clx-complexi64/lib
TACC_PETSC64_INC = /home1/apps/intel19/impi19_0/petsc/3.15/clx-complexi64/include
TACC_PETSC32_LIB = $(PETSC_DIR)/clx-complex/lib
TACC_PETSC32_INC = $(PETSC_DIR)/clx-complex/include
TACC_PETSC64_LIB = $(PETSC_DIR)/clx-complexi64/lib
TACC_PETSC64_INC = $(PETSC_DIR)/clx-complexi64/include
else
TACC_PETSC32_LIB = /home1/apps/intel19/impi19_0/petsc/3.15/clx/lib
TACC_PETSC32_INC = /home1/apps/intel19/impi19_0/petsc/3.15/clx/include
TACC_PETSC64_LIB = /home1/apps/intel19/impi19_0/petsc/3.15/clx-i64/lib
TACC_PETSC64_INC = /home1/apps/intel19/impi19_0/petsc/3.15/clx-i64/include
TACC_PETSC32_LIB = $(PETSC_DIR)/clx/lib
TACC_PETSC32_INC = $(PETSC_DIR)/clx/include
TACC_PETSC64_LIB = $(PETSC_DIR)/clx-i64/lib
TACC_PETSC64_INC = $(PETSC_DIR)/clx-i64/include
endif

MUMPS_LIBS = -L$(TACC_PETSC32_LIB) -ldmumps -lzmumps \
Expand All @@ -112,7 +120,7 @@ MUMPS_LIBS += -L$(TACC_PETSC64_LIB) -lesmumps -lzoltan \
-lptscotch -lptscotcherr

MUMPS_INCS = -I$(TACC_PETSC32_INC) -I$(TACC_PETSC64_INC) \
-I/home1/apps/intel19/impi19_0/petsc/3.15/include
-I$(PETSC_DIR)/include

#------------------------------------------------------------------
# VIS LIBRARY
Expand All @@ -135,13 +143,13 @@ VIS_INCS = -I${TACC_HDF5_INC}
HP3D_PATH_COMPLEX = $(HP3D_BASE_PATH)/complex
HP3D_PATH_REAL = $(HP3D_BASE_PATH)/real

OBJ_PATH_COMPLEX = _obj_complex_
OBJ_PATH_REAL = _obj_real_

SRC_PATH = src
MODULE_PATH = module
LIB_PATH = lib

OBJ_PATH_COMPLEX = _obj_complex_
OBJ_PATH_REAL = _obj_real_

ifeq ($(HP3D_COMPLEX),1)
HP3D_PATH = $(HP3D_PATH_COMPLEX)
OBJ_PATH = $(OBJ_PATH_COMPLEX)
Expand Down Expand Up @@ -217,5 +225,7 @@ PROB_INCS += $(HP3D_LINK_INCS) \
HP3D_USE_INTEL_MKL = 1
PROB_PP_DEFS = -D"HP3D_COMPLEX=$(HP3D_COMPLEX)" \
-D"HP3D_DEBUG=$(HP3D_DEBUG)" \
-D"HP3D_USE_INTEL_MKL=$(HP3D_USE_INTEL_MKL)" \
-D"HP3D_USE_MPI_F08=$(HP3D_USE_MPI_F08)" \
-D"HP3D_USE_OPENMP=$(HP3D_USE_OPENMP)" \
-D"HP3D_USE_INTEL_MKL=$(HP3D_USE_INTEL_MKL)"
-D"HP3D_USE_X11=$(HP3D_USE_X11)"
41 changes: 39 additions & 2 deletions trunk/problems/LASER/UW_COUPLED/elem/elem_heat.F90
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ subroutine elem_heat(Mdle, &
use data_structure3D
use laserParam
use commonParam
use mpi_wrapper
!
!..no implicit statements
implicit none
!
!..declare input/output variables
integer, intent(in) :: Mdle
integer, intent(in) :: NrTest
Expand Down Expand Up @@ -161,6 +164,9 @@ subroutine elem_heat(Mdle, &
real(8) :: rfval,therm_Load
complex(8) :: zfval
!
!..timer
! real(8) :: start_time,end_time
!
!..for lapack eigensolve
! complex(8), allocatable :: Z(:,:), WORK(:)
! real(8), allocatable :: W(:), RWORK(:)
Expand Down Expand Up @@ -188,6 +194,9 @@ subroutine elem_heat(Mdle, &
endif
#endif
!
!..TIMER
!start_time = MPI_Wtime()
!
!..allocate auxiliary matrices
allocate(gramP(NrTest*(NrTest+1)/2))
allocate(stiff_HH (NrTest ,NrdofH))
Expand Down Expand Up @@ -279,6 +288,14 @@ subroutine elem_heat(Mdle, &
stiff_HH = rZERO
stiff_HV = rZERO
!
!..end timer
! end_time = MPI_Wtime()
! !$OMP CRITICAL
! write(*,11) 'Allocate: ', end_time-start_time
! !$OMP END CRITICAL
!11 format(A,f12.5,' s')
! start_time = MPI_Wtime()
!
!---------------------------------------------------------------------
! E L E M E N T I N T E G R A L S |
!---------------------------------------------------------------------
Expand Down Expand Up @@ -441,7 +458,14 @@ subroutine elem_heat(Mdle, &
!..end loop through integration points
enddo
!
#if HP3D_DEBUG
!#if HP3D_DEBUG
!..end timer
! end_time = MPI_Wtime()
! !$OMP CRITICAL
! write(*,11) 'Interior: ', end_time-start_time
! !$OMP END CRITICAL
! start_time = MPI_Wtime()
!
!..printing Gram matrix
! iprint = 0
! if (iprint.eq.1) then
Expand All @@ -457,7 +481,7 @@ subroutine elem_heat(Mdle, &
! enddo
! pause
! endif
#endif
!#endif
!
!---------------------------------------------------------------------
! B O U N D A R Y I N T E G R A L S |
Expand Down Expand Up @@ -550,6 +574,13 @@ subroutine elem_heat(Mdle, &
!..end loop through element faces
enddo
!
!..end timer
! end_time = MPI_Wtime()
! !$OMP CRITICAL
! write(*,11) 'Boundary: ', end_time-start_time
! !$OMP END CRITICAL
! start_time = MPI_Wtime()
!
!..Compute condition number of Gram matrix
!kk = NrTest*(NrTest+1)/2
!allocate(gramEigen(kk))
Expand Down Expand Up @@ -630,5 +661,11 @@ subroutine elem_heat(Mdle, &
!
deallocate(raloc)
!
!..end timer
! end_time = MPI_Wtime()
! !$OMP CRITICAL
! write(*,11) 'Lin Alg : ', end_time-start_time
! !$OMP END CRITICAL
!
end subroutine elem_heat

10 changes: 8 additions & 2 deletions trunk/problems/LASER/UW_COUPLED/exec_job.F90
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ subroutine exec_job
integer :: flag(7)
logical :: iPvAttr(7)
integer :: physNick,nstop
logical :: ires
logical :: ires,balanced
!
integer :: i,ierr,numPts,fld
real(8) :: start_time,end_time
Expand All @@ -38,6 +38,7 @@ subroutine exec_job
call distr_mesh
!..set Zoltan partitioner
call zoltan_w_set_lb(ZOLTAN_LB_DEFAULT)
balanced = .false.
!
do i=1,IMAX+JMAX
!
Expand Down Expand Up @@ -69,7 +70,12 @@ subroutine exec_job
if (NUM_PROCS .eq. 1) goto 30
!
! ...set partitioner for load balancing, redistributes mesh in 'distr_mesh'
if (i .eq. IMAX-2) then
if (2**i .eq. NUM_PROCS) then
call zoltan_w_set_lb(ZOLTAN_LB_FIBER) ! fiber partitioner
balanced = .true.
elseif ((i.eq.9) .and. (.not.balanced)) then
call zoltan_w_set_lb(ZOLTAN_LB_FIBER) ! fiber partitioner
elseif ((i .eq. IMAX) .and. (.not. balanced)) then
call zoltan_w_set_lb(ZOLTAN_LB_FIBER) ! fiber partitioner
elseif (i .gt. IMAX) then
!call zoltan_w_set_lb(ZOLTAN_LB_GRAPH) ! graph partitioner
Expand Down
2 changes: 1 addition & 1 deletion trunk/problems/LASER/UW_COUPLED/main.F90
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ program main
QUIET_MODE = .true.
endif
enddo
1020 format (A,I4,A,A,A)
1020 format (A,I6,A,A,A)
call MPI_BARRIER (MPI_COMM_WORLD, ierr); start_time = MPI_Wtime()
call initialize
call MPI_BARRIER (MPI_COMM_WORLD, ierr); end_time = MPI_Wtime()
Expand Down
70 changes: 70 additions & 0 deletions trunk/problems/MAXWELL/GALERKIN/batch/clx.slurm
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash
#----------------------------------------------------
# Example Slurm job script
# for TACC Stampede2 SKX nodes
#
# *** Hybrid Job on SKX Normal Queue ***
#
# This sample script specifies:
# 10 nodes (capital N)
# 40 total MPI tasks (lower case n); this is 4 tasks/node
# 12 OpenMP threads per MPI task (48 threads per node)
#
# Last revised: 20 Oct 2017
#
# Notes:
#
# -- Launch this script by executing
# "sbatch skx.slurm" on Stampede2 login node.
#
# -- Check current queue with: "squeue -u sh43394"
#
# -- Use ibrun to launch MPI codes on TACC systems.
# Do not use mpirun or mpiexec.
#
# -- In most cases it's best to keep
# ( MPI ranks per node ) x ( threads per rank )
# to a number no more than 48 (total cores).
#
# -- If you're running out of memory, try running
# fewer tasks and/or threads per node to give each
# process access to more memory.
#
# -- IMPI and MVAPICH2 both do sensible process pinning by default.
#
#----------------------------------------------------

#SBATCH -J hp3d # Job name
#SBATCH -o hp3d.o%j # Name of stdout output file
#SBATCH -e hp3d.e%j # Name of stderr error file
#SBATCH -p development # Queue (partition) name
#SBATCH -N 1 # Total # of nodes
#SBATCH -n 1 # Total # of mpi tasks
#SBATCH -t 00:02:00 # Run time (hh:mm:ss)
#SBATCH --mail-user=stefan@oden.utexas.edu
#SBATCH --mail-type=all # Send email at begin and end of job
#SBATCH -A DMSXXXXXX # Allocation name (req'd if you have more than 1)

# Other commands must follow all #SBATCH directives...

export PROFILEDIR=/work2/05246/sh43394/frontera/hp3d/trunk/problems/MAXWELL/GALERKIN/tau/profiles
export TRACEDIR=/work2/05246/sh43394/frontera/hp3d/trunk/problems/MAXWELL/GALERKIN/tau/traces
export TAU_PROFILE=1
export TAU_TRACE=0

module list
pwd
date

# Set thread count (default value is 1)...
# export KMP_NUM_THREADS=12
nthreads=1

# Set OpenMP stack size per thread
export KMP_STACKSIZE=64M

# Launch MPI code...
ibrun ./maxw -job 1 -p 5 -isol 0 -imax 2 -maxnods 1025000 -nthreads ${nthreads}

date
# ---------------------------------------------------
4 changes: 3 additions & 1 deletion trunk/problems/MAXWELL/GALERKIN/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ job=0
# - number of refinements (if job=1)
imax=3

# - MAXNODS
# MAXNODS
maxnods=525000

# OMP Stacksize
export KMP_STACKSIZE=32M

args=" -job ${job} -maxnods ${maxnods} -p ${p}"
args+=" -isol ${isol} -imax ${imax} -nthreads ${nthreads}"

mpirun -np ${nproc} ./maxw ${args}
#ibrun -n ${nproc} ./maxw ${args}
8 changes: 4 additions & 4 deletions trunk/problems/POISSON/GALERKIN/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ imax=2
# Initial mesh
geom='./geometries/hexa_orient0'

# - MAXNODS
# MAXNODS
maxnods=525000

export KMP_STACKSIZE=64M # p=5


# OMP stack size
export KMP_STACKSIZE=64M

args=" -job ${job} -maxnods ${maxnods} -p ${p}"
args+=" -file-geometry ${geom}"
args+=" -isol ${isol} -imax ${imax} -nthreads ${nthreads}"

mpirun -np ${nproc} ./pois ${args}
#ibrun -n ${nproc} ./pois ${args}
4 changes: 4 additions & 0 deletions trunk/problems/POISSON/PRIMAL_DPG/common/set_environment.F90
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ subroutine set_environment
use environment
use common_prob_data
use paraview
use parametersDPG
!
implicit none
!
Expand Down Expand Up @@ -97,5 +98,8 @@ subroutine set_environment
!..Boundary condition flag
IBC_PROB = BC_DIRICHLET
!
!..Set order increment for DPG method
NORD_ADD = 1
!
end subroutine set_environment

2 changes: 1 addition & 1 deletion trunk/src/hpinterp/edge/dhpedgeE.F90
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ subroutine dhpedgeE(Mdle,Iflag,No,Etav,Ntype,Icase,Bcond,&
+ zvalEeta(2,1:MAXEQNE)*vEeta(2) &
+ zvalEeta(3,1:MAXEQNE)*vEeta(3))*weight
!
! loop through element face trial functions
! loop through element edge trial functions
do i=1,ndofE_edge
ki = Iedge-1 + i
!
Expand Down
2 changes: 1 addition & 1 deletion trunk/src/hpinterp/edge/dhpedgeH.F90
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ subroutine dhpedgeH(Mdle,Iflag,No,Etav,Ntype,Icase,Bcond, &
+ zdvalHdeta(1:MAXEQNH,2)*dvHdeta(2) &
+ zdvalHdeta(1:MAXEQNH,3)*dvHdeta(3))*weight
!
! loop through element face trial functions
! loop through element edge trial functions
do i=1,ndofH_edge
ki = nrdofH + i
!
Expand Down
Loading

0 comments on commit 8ca538c

Please sign in to comment.