From 8e4357b6153b644c83a30ada39f224b4ae6cb809 Mon Sep 17 00:00:00 2001
From: Dom Heinzeller <climbfuji@ymail.com>
Date: Tue, 8 Feb 2022 19:44:22 -0700
Subject: [PATCH] GPU-enabled version of Grell-Freitas convection

---
 physics/cu_gf_deep.F90        | 617 +++++++++++++++++++++++++++++-----
 physics/cu_gf_driver.F90      | 133 +++++++-
 physics/cu_gf_driver_post.F90 |   3 +
 physics/cu_gf_driver_pre.F90  |  11 +
 physics/cu_gf_sh.F90          | 118 ++++++-
 5 files changed, 780 insertions(+), 102 deletions(-)

diff --git a/physics/cu_gf_deep.F90 b/physics/cu_gf_deep.F90
index e26afef3e..f59a985cd 100644
--- a/physics/cu_gf_deep.F90
+++ b/physics/cu_gf_deep.F90
@@ -47,6 +47,27 @@ module cu_gf_deep
 
 contains
 
+   integer function my_maxloc1d(A,N,dir)
+!$acc routine vector
+      implicit none
+      real(kind_phys), intent(in) :: A(:)
+      integer, intent(in) :: N,dir
+
+      real(kind_phys) :: imaxval
+      integer :: i
+
+      imaxval = MAXVAL(A)
+      my_maxloc1d = 1
+!$acc loop
+      do i = 1, N
+         if ( A(i) == imaxval ) then
+            my_maxloc1d = i
+            return
+         endif
+      end do
+      return
+   end function my_maxloc1d
+
 !>\ingroup cu_gf_deep_group
 !> \section general_gf_deep GF Deep Convection General Algorithm
 !> @{
@@ -126,13 +147,16 @@ subroutine cu_gf_deep_run(        &
         ,intent (in  )                   ::  rand_clos
      real(kind=kind_phys),  dimension (its:ite)                   &
         ,intent (in  )                   ::  rand_mom,rand_vmas
+!$acc declare copyin(rand_clos,rand_mom,rand_vmas)
 
      integer, intent(in) :: do_capsuppress
-     real(kind=kind_phys), intent(in), optional, dimension(:) :: cap_suppress_j
+     real(kind=kind_phys), intent(in), dimension(:) :: cap_suppress_j
+!$acc declare create(cap_suppress_j)
   !
   ! 
   !
       real(kind=kind_phys),    dimension (its:ite,1:maxens3) :: xf_ens,pr_ens
+!$acc declare create(xf_ens,pr_ens)
   ! outtem = output temp tendency (per s)
   ! outq   = output q tendency (per s)
   ! outqc  = output qc tendency (per s)
@@ -146,15 +170,19 @@ subroutine cu_gf_deep_run(        &
      real(kind=kind_phys),    dimension (its:ite)                                      &
         ,intent (inout  )                   ::                         &
         pre,xmb_out
+!$acc declare copy(cnvwt,outu,outv,outt,outq,outqc,cupclw,frh_out,pre,xmb_out)
      real(kind=kind_phys),    dimension (its:ite)                                      &
         ,intent (in  )                   ::                            &
         hfx,qfx,xmbm_in,xmbs_in
+!$acc declare copyin(hfx,qfx,xmbm_in,xmbs_in)
      integer,    dimension (its:ite)                                   &
         ,intent (inout  )                ::                            &
         kbcon,ktop
+!$acc declare copy(kbcon,ktop)
      integer,    dimension (its:ite)                                   &
         ,intent (in  )                   ::                            &
         kpbl,tropics
+!$acc declare copyin(kpbl,tropics)
   !
   ! basic environmental input includes moisture convergence (mconv)
   ! omega (omeg), windspeed (us,vs), and a flag (ierr) to turn off
@@ -163,18 +191,23 @@ subroutine cu_gf_deep_run(        &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                              &
         ,intent (in   )                   ::                           &
         dhdt,rho,t,po,us,vs,tn
+!$acc declare copyin(dhdt,rho,t,po,us,vs,tn)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                              &
         ,intent (inout   )                ::                           &
         omeg
+!$acc declare copy(omeg)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                              &
         ,intent (inout)                   ::                           &
          q,qo,zuo,zdo,zdm
+!$acc declare copy(q,qo,zuo,zdo,zdm)
      real(kind=kind_phys), dimension (its:ite)                                         &
         ,intent (in   )                   ::                           &
         dx,z1,psur,xland
+!$acc declare copyin(dx,z1,psur,xland)
      real(kind=kind_phys), dimension (its:ite)                                         &
         ,intent (inout   )                ::                           &
         mconv,ccn
+!$acc declare copy(mconv,ccn)
 
        
        real(kind=kind_phys)                                                            &
@@ -191,6 +224,7 @@ subroutine cu_gf_deep_run(        &
         edtc
      real(kind=kind_phys),    dimension (its:ite,kts:kte,1) ::                         &
         dellat_ens,dellaqc_ens,dellaq_ens,pwo_ens
+!$acc declare create(xaa0_ens,edtc,dellat_ens,dellaqc_ens,dellaq_ens,pwo_ens)
 !
 !
 !
@@ -275,6 +309,17 @@ subroutine cu_gf_deep_run(        &
 
         cd,cdd,dellah,dellaq,dellat,dellaqc,                            &
         u_cup,v_cup,uc,vc,ucd,vcd,dellu,dellv
+!$acc declare create( &
+!$acc        entr_rate_2d,mentrd_rate_2d,he,hes,qes,z, heo,heso,qeso,zo,     &                    
+!$acc        xhe,xhes,xqes,xz,xt,xq,qes_cup,q_cup,he_cup,hes_cup,z_cup,      &
+!$acc        p_cup,gamma_cup,t_cup, qeso_cup,qo_cup,heo_cup,heso_cup,        &
+!$acc        zo_cup,po_cup,gammao_cup,tn_cup,                                &    
+!$acc        xqes_cup,xq_cup,xhe_cup,xhes_cup,xz_cup,                        &
+!$acc        xt_cup, dby,hc,zu,clw_all,                                      &
+!$acc        dbyo,qco,qrcdo,pwdo,pwo,hcdo,qcdo,dbydo,hco,qrco,               &
+!$acc        dbyt,xdby,xhc,xzu,                                              &
+!$acc        cd,cdd,dellah,dellaq,dellat,dellaqc,                            &
+!$acc        u_cup,v_cup,uc,vc,ucd,vcd,dellu,dellv)
 
   ! aa0 cloud work function for downdraft
   ! edt = epsilon
@@ -294,9 +339,18 @@ subroutine cu_gf_deep_run(        &
      integer,    dimension (its:ite) ::                                  &
        kzdown,kdet,k22,jmin,kstabi,kstabm,k22x,xland1,                   &  
        ktopdby,kbconx,ierr2,ierr3,kbmax
+!$acc declare create(edt,edto,edtm,aa1,aa0,xaa0,hkb,                     &
+!$acc       hkbo,xhkb,                                                   &
+!$acc       xmb,pwavo,ccnloss,                                           &
+!$acc       pwevo,bu,bud,cap_max,                                        &
+!$acc       cap_max_increment,closure_n,psum,psumh,sig,sigd,             &
+!$acc       axx,edtmax,edtmin,entr_rate,                                 &
+!$acc       kzdown,kdet,k22,jmin,kstabi,kstabm,k22x,xland1,              &  
+!$acc       ktopdby,kbconx,ierr2,ierr3,kbmax)
 
      integer,  dimension (its:ite), intent(inout) :: ierr
      integer,  dimension (its:ite), intent(in) :: csum
+!$acc declare copy(ierr) copyin(csum)
      integer                              ::                             &
        iloop,nens3,ki,kk,i,k
      real(kind=kind_phys)                            ::                             &
@@ -307,9 +361,11 @@ subroutine cu_gf_deep_run(        &
       detup,subdown,entdoj,entupk,detupk,totmas
 
      real(kind=kind_phys), dimension (its:ite) :: lambau,flux_tun,zws,ztexec,zqexec
+!$acc declare create(lambau,flux_tun,zws,ztexec,zqexec)
 
      integer :: jprnt,jmini,start_k22
      logical :: keep_going,flg(its:ite)
+!$acc declare create(flg)
      
      character*50 :: ierrc(its:ite)
      character*4  :: cumulus
@@ -318,9 +374,12 @@ subroutine cu_gf_deep_run(        &
       ,up_massentro,up_massdetro,dd_massentro,dd_massdetro
      real(kind=kind_phys),    dimension (its:ite,kts:kte) ::                              &
        up_massentru,up_massdetru,dd_massentru,dd_massdetru
+!$acc declare create(up_massentr,up_massdetr,c1d,up_massentro,up_massdetro,dd_massentro,dd_massdetro, &
+!$acc                up_massentru,up_massdetru,dd_massentru,dd_massdetru)
      real(kind=kind_phys) c1_max,buo_flux,pgcon,pgc,blqe
     
      real(kind=kind_phys) :: xff_mid(its:ite,2)
+!$acc declare create(xff_mid)
      integer :: iversion=1
      real(kind=kind_phys) :: denom,h_entr,umean,t_star,dq
      integer, intent(in) :: dicycle
@@ -329,32 +388,46 @@ subroutine cu_gf_deep_run(        &
                                               ,qeso_cup_bl,qo_cup_bl, heo_cup_bl,heso_cup_bl &
                                               ,gammao_cup_bl,tn_cup_bl,hco_bl,dbyo_bl
      real(kind=kind_phys), dimension(its:ite) :: xf_dicycle
+!$acc declare create(aa1_bl,hkbo_bl,tau_bl,tau_ecmwf,wmean,             &
+!$acc                tn_bl, qo_bl, qeso_bl, heo_bl, heso_bl,            &
+!$acc                qeso_cup_bl,qo_cup_bl, heo_cup_bl,heso_cup_bl,     &
+!$acc                gammao_cup_bl,tn_cup_bl,hco_bl,dbyo_bl,xf_dicycle)
      real(kind=kind_phys), intent(inout), dimension(its:ite,10) :: forcing
+!$acc declare copy(forcing)
      integer :: turn,pmin_lev(its:ite),start_level(its:ite),ktopkeep(its:ite)
      real(kind=kind_phys),    dimension (its:ite,kts:kte) :: dtempdz
      integer, dimension (its:ite,kts:kte) ::  k_inv_layers 
      real(kind=kind_phys),    dimension (its:ite) :: c0    ! HCB
+!$acc declare create(pmin_lev,start_level,ktopkeep,dtempdz,k_inv_layers,c0)
  
 ! rainevap from sas
      real(kind=kind_phys) zuh2(40)
      real(kind=kind_phys), dimension (its:ite) :: rntot,delqev,delq2,qevap,rn,qcond
+!$acc declare create(zuh2,rntot,delqev,delq2,qevap,rn,qcond)
      real(kind=kind_phys) :: rain,t1,q1,elocp,evef,el2orc,evfact,evfactl,g_rain,e_dn,c_up
      real(kind=kind_phys) :: pgeoh,dts,fp,fpi,pmin,x_add,beta,beta_u
      real(kind=kind_phys) :: cbeg,cmid,cend,const_a,const_b,const_c
 !---meltglac-------------------------------------------------
 
      real(kind=kind_phys),    dimension (its:ite,kts:kte) ::  p_liq_ice,melting_layer,melting
+!$acc declare create(p_liq_ice,melting_layer,melting)
+
+     integer :: itemp
 
 !---meltglac-------------------------------------------------
+!$acc kernels
       melting_layer(:,:)=0.
       melting(:,:)=0.
       flux_tun(:)=fluxtune
+!$acc end kernels
 !      if(imid.eq.1)flux_tun(:)=fluxtune+.5
       cumulus='deep'
       if(imid.eq.1)cumulus='mid'
       pmin=150.
       if(imid.eq.1)pmin=75.
+!$acc kernels
       ktopdby(:)=0
+!$acc end kernels
       c1_max=c1
       elocp=xlv/cp
       el2orc=xlv*xlv/(r_v*cp)
@@ -370,18 +443,21 @@ subroutine cu_gf_deep_run(        &
 !
 ! ecmwf
        pgcon=0.
+!$acc kernels
        lambau(:)=2.0
        if(imid.eq.1)lambau(:)=2.0
 ! here random must be between -1 and 1
        if(nranflag == 1)then
            lambau(:)=1.5+rand_mom(:)
        endif
+!$acc end kernels
 ! sas
 !     lambau=0.
 !     pgcon=-.55
 !
 !---------------------------------------------------- ! HCB
 ! Set cloud water to rain water conversion rate (c0)
+!$acc kernels
       c0(:)=0.004
       do i=its,itf
          xland1(i)=int(xland(i)+.0001) ! 1.
@@ -393,8 +469,10 @@ subroutine cu_gf_deep_run(        &
            c0(i)=0.002
          endif
       enddo
+!$acc end kernels
 
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!$acc kernels
       ztexec(:)     = 0.
       zqexec(:)     = 0.
       zws(:)        = 0.
@@ -419,10 +497,12 @@ subroutine cu_gf_deep_run(        &
          zws(i) = 1.2*zws(i)**.3333
          zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct
       enddo
+!$acc end kernels
 !     cap_maxs=225.
 !     if(imid.eq.1)cap_maxs=150.
       cap_maxs=75. ! 150.
 !     if(imid.eq.1)cap_maxs=100.
+!$acc kernels
       do i=its,itf
         edto(i)=0.
         closure_n(i)=16.
@@ -441,14 +521,20 @@ subroutine cu_gf_deep_run(        &
             if(ztexec(i).gt.0.)cap_max(i)=cap_max(i)+25.
             if(ztexec(i).lt.0.)cap_max(i)=cap_max(i)-25.
         endif
+#ifndef _OPENACC
         ierrc(i)=" "
+#endif
 !       cap_max_increment(i)=1.
       enddo
+!$acc end kernels
       if(use_excess == 0 )then
+!$acc kernels
        ztexec(:)=0
        zqexec(:)=0
+!$acc end kernels
       endif
       if(do_capsuppress == 1) then
+!$acc kernels
          do i=its,itf
             cap_max(i)=cap_maxs
             if (abs(cap_suppress_j(i) - 1.0 ) < 0.1 ) then
@@ -457,12 +543,18 @@ subroutine cu_gf_deep_run(        &
                cap_max(i)=10.0
             endif
          enddo
+!$acc end kernels
       endif
 !
 !--- initial entrainment rate (these may be changed later on in the
 !--- program
 !
+!$acc kernels
       start_level(:)=kte
+!$acc end kernels
+
+!$acc kernels
+!$acc loop private(radius,frh)
       do i=its,ite
          c1d(i,:)= 0. !c1 ! 0. ! c1 ! max(.003,c1+float(csum(i))*.0001)
          entr_rate(i)=7.e-5 - min(20.,float(csum(i))) * 3.e-6
@@ -479,6 +571,7 @@ subroutine cu_gf_deep_run(        &
          sig(i)=(1.-frh)**2
          frh_out(i) = frh
       enddo
+!$acc end kernels
       sig_thresh = (1.-frh_thresh)**2
 
       
@@ -488,6 +581,7 @@ subroutine cu_gf_deep_run(        &
 !
 !--- initial detrainmentrates
 !
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
         cnvwt(i,k)=0.
@@ -504,14 +598,17 @@ subroutine cu_gf_deep_run(        &
         dellaqc(i,k)=0.
       enddo
       enddo
+!$acc end kernels
 !
 !--- max/min allowed value for epsilon (ratio downdraft base mass flux/updraft
 !    base mass flux
 !
+!$acc kernels
       edtmax(:)=1.
       if(imid.eq.1)edtmax(:)=.15
       edtmin(:)=.1
       if(imid.eq.1)edtmin(:)=.05
+!$acc end kernels
 !
 !--- minimum depth (m), clouds must have
 !
@@ -521,6 +618,7 @@ subroutine cu_gf_deep_run(        &
 !--- maximum depth (mb) of capping 
 !--- inversion (larger cap = no convection)
 !
+!$acc kernels
       do i=its,itf
 !        if(imid.eq.0)then
 !          edtmax(i)=max(0.5,.8-float(csum(i))*.015) !.3)
@@ -533,8 +631,9 @@ subroutine cu_gf_deep_run(        &
         kstabm(i)=ktf-1
         ierr2(i)=0
         ierr3(i)=0
-        x_add=0.
       enddo
+!$acc end kernels
+      x_add=0.
 !     do i=its,itf
 !         cap_max(i)=cap_maxs
 !         cap_max3(i)=25.
@@ -559,13 +658,14 @@ subroutine cu_gf_deep_run(        &
 !
 !--- environmental conditions, first heights
 !
+!$acc kernels
       do i=its,itf
             do k=1,maxens3
                xf_ens(i,k)=0.
                pr_ens(i,k)=0.
             enddo
       enddo
-
+!$acc end kernels
 !
 !> - Call cup_env() to calculate moist static energy, heights, qes
 !
@@ -596,6 +696,7 @@ subroutine cu_gf_deep_run(        &
       call get_partition_liq_ice(ierr,tn,po_cup,p_liq_ice,melting_layer,&
                                  itf,ktf,its,ite,kts,kte,cumulus)
 !---meltglac-------------------------------------------------
+!$acc kernels
       do i=its,itf
         if(ierr(i).eq.0)then
           if(kpbl(i).gt.5 .and. imid.eq.1)cap_max(i)=po_cup(i,kpbl(i))
@@ -629,29 +730,36 @@ subroutine cu_gf_deep_run(        &
 !
         endif
       enddo
+!$acc end kernels
+
 !
 !
 !
 !> - Determine level with highest moist static energy content (\p k22)
 !
       start_k22=2
+!$acc parallel loop
        do 36 i=its,itf
          if(ierr(i).eq.0)then
             k22(i)=maxloc(heo_cup(i,start_k22:kbmax(i)+2),1)+start_k22-1
             if(k22(i).ge.kbmax(i))then
              ierr(i)=2
+#ifndef _OPENACC
              ierrc(i)="could not find k22"
+#endif
              ktop(i)=0
              k22(i)=0
              kbcon(i)=0
            endif
          endif
  36   continue
+!$acc end parallel
+
 !
 !> - call get_cloud_bc() and cup_kbcon() to determine the 
 !! level of convective cloud base (\p kbcon)
 !
-
+!$acc parallel loop private(x_add)
       do i=its,itf
        if(ierr(i).eq.0)then
          x_add = xlv*zqexec(i)+cp*ztexec(i)
@@ -659,6 +767,8 @@ subroutine cu_gf_deep_run(        &
          call get_cloud_bc(kte,heo_cup (i,1:kte),hkbo (i),k22(i),x_add)
        endif ! ierr
       enddo
+!$acc end parallel
+
       jprnt=0
       iloop=1
       if(imid.eq.1)iloop=5
@@ -674,6 +784,7 @@ subroutine cu_gf_deep_run(        &
       call cup_minimi(heso_cup,kbcon,kstabm,kstabi,ierr,                        &
            itf,ktf,                                                             &
            its,ite, kts,kte)
+!$acc parallel loop private(frh,x_add)
       do i=its,itf
          if(ierr(i) == 0)then
            frh = min(qo_cup(i,kbcon(i))/qeso_cup(i,kbcon(i)),1.)
@@ -686,6 +797,7 @@ subroutine cu_gf_deep_run(        &
 !
 !           if(imid.eq.0 .and. xland1(i).eq.0)x_add=150.
            x_add=0.
+!$acc loop seq
            do k=kbcon(i)+1,ktf
              if(po(i,kbcon(i))-po(i,k) > pmin+x_add)then
                 pmin_lev(i)=k
@@ -700,6 +812,8 @@ subroutine cu_gf_deep_run(        &
             call get_cloud_bc(kte,he_cup (i,1:kte),hkb (i),k22(i),x_add)
          endif
       enddo
+!$acc end parallel
+
 !
 !--- get inversion layers for mid level cloud tops
 !
@@ -707,6 +821,7 @@ subroutine cu_gf_deep_run(        &
       call get_inversion_layers(ierr,p_cup,t_cup,z_cup,q_cup,qes_cup,k_inv_layers, &
                                kbcon,kstabi,dtempdz,itf,ktf,its,ite, kts,kte)
       endif
+!$acc kernels
       do i=its,itf
          if(kstabi(i).lt.kbcon(i))then
            kbcon(i)=1
@@ -729,6 +844,7 @@ subroutine cu_gf_deep_run(        &
                  ktop(i)=min(kstabi(i),k_inv_layers(i,2))
                  ktopdby(i)=ktop(i)
                else
+!$acc loop seq
                  do k=kbcon(i)+1,ktf
                   if((po_cup(i,k22(i))-po_cup(i,k)).gt.500.)then
                     ktop(i)=k
@@ -741,6 +857,8 @@ subroutine cu_gf_deep_run(        &
 
           endif
       enddo
+!$acc end kernels
+
 !
 !> - Call rates_up_pdf() to get normalized mass flux, entrainment and detrainmentrates for updraft
 !
@@ -757,20 +875,24 @@ subroutine cu_gf_deep_run(        &
 !
 !
 !
+!$acc kernels
       do i=its,itf
        if(ierr(i).eq.0)then
 
          if(k22(i).gt.1)then
+!$acc loop independent
             do k=1,k22(i) -1
               zuo(i,k)=0.
               zu (i,k)=0.
               xzu(i,k)=0.
             enddo
          endif
+!$acc loop independent
          do k=k22(i),ktop(i)
           xzu(i,k)= zuo(i,k)
           zu (i,k)= zuo(i,k)
          enddo
+!$acc loop independent
          do k=ktop(i)+1,kte
            zuo(i,k)=0.
            zu (i,k)=0.
@@ -778,6 +900,7 @@ subroutine cu_gf_deep_run(        &
          enddo
         endif
       enddo
+!$acc end kernels
 !
 !> - Call get_lateral_massflux() to calculate mass entrainment and detrainment
 !
@@ -785,12 +908,12 @@ subroutine cu_gf_deep_run(        &
       call get_lateral_massflux(itf,ktf, its,ite, kts,kte                                &
                                 ,ierr,ktop,zo_cup,zuo,cd,entr_rate_2d                    &
                                 ,up_massentro, up_massdetro ,up_massentr, up_massdetr    &
-                                ,'mid',kbcon,k22,up_massentru,up_massdetru,lambau)
+                                ,3,kbcon,k22,up_massentru,up_massdetru,lambau)
      else
       call get_lateral_massflux(itf,ktf, its,ite, kts,kte                                &
                                 ,ierr,ktop,zo_cup,zuo,cd,entr_rate_2d                    &
                                 ,up_massentro, up_massdetro ,up_massentr, up_massdetr    &
-                                ,'deep',kbcon,k22,up_massentru,up_massdetru,lambau)
+                                ,1,kbcon,k22,up_massentru,up_massdetru,lambau)
      endif
 
 
@@ -798,6 +921,7 @@ subroutine cu_gf_deep_run(        &
 !   note: ktop here already includes overshooting, ktopdby is without
 !   overshooting
 !
+!$acc kernels
       do k=kts,ktf
        do i=its,itf
          uc  (i,k)=0.
@@ -823,17 +947,19 @@ subroutine cu_gf_deep_run(        &
          hco(i,k)=hkbo(i)
        endif 
       enddo
-
+!$acc end kernels
 !
 !---meltglac-------------------------------------------------
      !
      !--- 1st guess for moist static energy and dbyo (not including ice phase)
      !
+!$acc parallel loop private(denom,kk,ki)
       do i=its,itf
        ktopkeep(i)=0
        dbyt(i,:)=0.
        if(ierr(i) /= 0) cycle                 
        ktopkeep(i)=ktop(i)
+!$acc loop seq
        do k=start_level(i) +1,ktop(i)  !mass cons option
          
           denom=zuo(i,k-1)-.5*up_massdetro(i,k-1)+up_massentro(i,k-1)
@@ -847,8 +973,9 @@ subroutine cu_gf_deep_run(        &
           dbyo(i,k)=hco(i,k)-heso_cup(i,k)
        enddo
        ! for now no overshooting (only very little)
-       kk=maxloc(dbyt(i,:),1)
-       ki=maxloc(zuo(i,:),1)
+       !kk=maxloc(dbyt(i,:),1)
+       !ki=maxloc(zuo(i,:),1)
+!$acc loop seq
        do k=ktop(i)-1,kbcon(i),-1
            if(dbyo(i,k).gt.0.)then
               ktopkeep(i)=k+1
@@ -858,12 +985,16 @@ subroutine cu_gf_deep_run(        &
         !ktop(i)=ktopkeep(i)
         !if(ierr(i).eq.0)ktop(i)=ktopkeep(i)
       enddo
+!$acc end parallel
+
+!$acc kernels
       do 37 i=its,itf
          kzdown(i)=0
          if(ierr(i).eq.0)then
             zktop=(zo_cup(i,ktop(i))-z1(i))*.6
             if(imid.eq.1)zktop=(zo_cup(i,ktop(i))-z1(i))*.4
             zktop=min(zktop+z1(i),zcutdown+z1(i))
+!$acc loop seq
             do k=kts,ktf
               if(zo_cup(i,k).gt.zktop)then
                  kzdown(i)=k
@@ -873,12 +1004,15 @@ subroutine cu_gf_deep_run(        &
               enddo
          endif
  37   continue
+!$acc end kernels
+
 !
 !--- downdraft originating level - jmin
 !
       call cup_minimi(heso_cup,k22,kzdown,jmin,ierr, &
            itf,ktf, &
            its,ite, kts,kte)
+!$acc kernels
       do 100 i=its,itf
          if(ierr(i).eq.0)then
 !
@@ -899,6 +1033,7 @@ subroutine cu_gf_deep_run(        &
            hcdo(i,ki)=heso_cup(i,ki)
            dz=zo_cup(i,ki+1)-zo_cup(i,ki)
            dh=0.
+!$acc loop seq
            do k=ki-1,1,-1
              hcdo(i,k)=heso_cup(i,jmini)
              dz=zo_cup(i,k+1)-zo_cup(i,k)
@@ -909,7 +1044,9 @@ subroutine cu_gf_deep_run(        &
                  keep_going = .true.
                else
                  ierr(i) = 9
+#ifndef _OPENACC
                  ierrc(i) = "could not find jmini9"
+#endif
                  exit
                endif
              endif
@@ -918,7 +1055,9 @@ subroutine cu_gf_deep_run(        &
          jmin(i) = jmini 
          if ( jmini .le. 5 ) then
            ierr(i)=4
+#ifndef _OPENACC
            ierrc(i) = "could not find jmini4"
+#endif
          endif
        endif
 100   continue
@@ -945,12 +1084,13 @@ subroutine cu_gf_deep_run(        &
 !          endif
 !         enddo
 !         if(imid.eq.1)c1d(i,:)=0.003
-         
+!$acc loop independent
        do k=ktop(i)+1,ktf
            hco(i,k)=heso_cup(i,k)
            dbyo(i,k)=0.
        enddo
       enddo
+!$acc end kernels
      !
 !> - Call cup_up_moisture() to calculate moisture properties of updraft
      !
@@ -975,13 +1115,14 @@ subroutine cu_gf_deep_run(        &
 !                             ,itf,ktf,its,ite, kts,kte, cumulus                    )
 !---meltglac-------------------------------------------------
 
-
+!$acc kernels
       do i=its,itf
 
        ktopkeep(i)=0
        dbyt(i,:)=0.
        if(ierr(i) /= 0) cycle                 
        ktopkeep(i)=ktop(i)
+!$acc loop seq
        do k=start_level(i) +1,ktop(i)  !mass cons option
          
           denom=zuo(i,k-1)-.5*up_massdetro(i,k-1)+up_massentro(i,k-1)
@@ -1027,6 +1168,7 @@ subroutine cu_gf_deep_run(        &
 !         ierr(i)=423
 !       endif
 !
+!$acc loop seq
         do k=ktop(i)-1,kbcon(i),-1
            if(dbyo(i,k).gt.0.)then
               ktopkeep(i)=k+1
@@ -1036,7 +1178,10 @@ subroutine cu_gf_deep_run(        &
         !ktop(i)=ktopkeep(i)
         !if(ierr(i).eq.0)ktop(i)=ktopkeep(i)
       enddo
+!$acc end kernels
+
 41    continue
+!$acc kernels
       do i=its,itf
        if(ierr(i) /= 0) cycle                 
        do k=ktop(i)+1,ktf
@@ -1061,10 +1206,14 @@ subroutine cu_gf_deep_run(        &
         if(ierr(i)/=0)cycle
         if(ktop(i).lt.kbcon(i)+2)then
               ierr(i)=5
+#ifndef _OPENACC
               ierrc(i)='ktop too small deep'
+#endif
               ktop(i)=0
         endif
       enddo
+!$acc end kernels
+
 !!      do 37 i=its,itf
 !         kzdown(i)=0
 !         if(ierr(i).eq.0)then
@@ -1133,20 +1282,25 @@ subroutine cu_gf_deep_run(        &
 ! - must have at least depth_min m between cloud convective base
 !     and cloud top.
 !
+!$acc kernels
       do i=its,itf
          if(ierr(i).eq.0)then
             if ( jmin(i) - 1 .lt. kdet(i)   ) kdet(i) = jmin(i)-1
             if(-zo_cup(i,kbcon(i))+zo_cup(i,ktop(i)).lt.depth_min)then
                ierr(i)=6
+#ifndef _OPENACC
                ierrc(i)="cloud depth very shallow"
+#endif
             endif
          endif
       enddo
+!$acc end kernels
 
 !
 !--- normalized downdraft mass flux profile,also work on bottom detrainment
 !--- in this routine
 !
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
        zdo(i,k)=0.
@@ -1162,6 +1316,9 @@ subroutine cu_gf_deep_run(        &
        mentrd_rate_2d(i,k)=entr_rate(i)
       enddo
       enddo
+!$acc end kernels
+
+!$acc parallel loop private(beta,itemp,dzo,h_entr)
       do i=its,itf
         if(ierr(i)/=0)cycle
         beta=max(.025,.055-float(csum(i))*.0015)  !.02
@@ -1174,7 +1331,8 @@ subroutine cu_gf_deep_run(        &
         cdd(i,jmin(i))=0.
         dd_massdetro(i,:)=0.
         dd_massentro(i,:)=0.
-        call get_zu_zd_pdf_fim(0,po_cup(i,:),rand_vmas(i),0.0_kind_phys,ipr,xland1(i),zuh2,"down",ierr(i),kdet(i),jmin(i)+1,zdo(i,:),kts,kte,ktf,beta,kpbl(i),csum(i),pmin_lev(i))
+        call get_zu_zd_pdf_fim(0,po_cup(i,:),rand_vmas(i),0.0_kind_phys,ipr,xland1(i),zuh2,4, &
+            ierr(i),kdet(i),jmin(i)+1,zdo(i,:),kts,kte,ktf,beta,kpbl(i),csum(i),pmin_lev(i))
         if(zdo(i,jmin(i)) .lt.1.e-8)then
           zdo(i,jmin(i))=0.
           jmin(i)=jmin(i)-1
@@ -1185,8 +1343,9 @@ subroutine cu_gf_deep_run(        &
              cycle
           endif
         endif
-        
-        do ki=jmin(i)  ,maxloc(zdo(i,:),1),-1
+
+        itemp = maxloc(zdo(i,:),1)
+        do ki=jmin(i)  , itemp,-1
           !=> from jmin to maximum value zd -> change entrainment
           dzo=zo_cup(i,ki+1)-zo_cup(i,ki)
           dd_massdetro(i,ki)=cdd(i,ki)*dzo*zdo(i,ki+1)
@@ -1199,7 +1358,7 @@ subroutine cu_gf_deep_run(        &
           if(zdo(i,ki+1).gt.0.)mentrd_rate_2d(i,ki)=dd_massentro(i,ki)/(dzo*zdo(i,ki+1))
         enddo
         mentrd_rate_2d(i,1)=0.
-        do ki=maxloc(zdo(i,:),1)-1,1,-1
+        do ki=itemp-1,1,-1
           !=> from maximum value zd to surface -> change detrainment
           dzo=zo_cup(i,ki+1)-zo_cup(i,ki)
           dd_massentro(i,ki)=mentrd_rate_2d(i,ki)*dzo*zdo(i,ki+1)
@@ -1244,6 +1403,7 @@ subroutine cu_gf_deep_run(        &
             dbydo(i,jmin(i))=hcdo(i,jmin(i))-heso_cup(i,jmin(i))
             bud(i)=dbydo(i,jmin(i))*(zo_cup(i,jmin(i)+1)-zo_cup(i,jmin(i)))
             ucd(i,jmin(i)+1)=.5*(uc(i,jmin(i)+1)+u_cup(i,jmin(i)+1))
+!$acc loop seq
             do ki=jmin(i)  ,1,-1
              dzo=zo_cup(i,ki+1)-zo_cup(i,ki)
              h_entr=.5*(heo(i,ki)+.5*(hco(i,ki)+hco(i,ki+1)))
@@ -1268,9 +1428,13 @@ subroutine cu_gf_deep_run(        &
 
         if(bud(i).gt.0)then
           ierr(i)=7
+#ifndef _OPENACC
           ierrc(i)='downdraft is not negatively buoyant '
+#endif
         endif
       enddo
+!$acc end parallel
+
 !
 !> - Call cup_dd_moisture() to calculate moisture properties of downdraft
 !
@@ -1299,6 +1463,7 @@ subroutine cu_gf_deep_run(        &
 !             its,ite, kts,kte)
 !      endif
 !---meltglac-------------------------------------------------
+!$acc kernels
       do i=its,itf
         if(ierr(i)/=0)cycle
         do k=kts+1,ktop(i)
@@ -1307,6 +1472,7 @@ subroutine cu_gf_deep_run(        &
           cnvwt(i,k)=zuo(i,k)*cupclw(i,k)*g/dp
         enddo
       enddo
+!$acc end kernels
 !
 !> - Call cup_up_aa0() to calculate workfunctions for updrafts
 !
@@ -1318,20 +1484,28 @@ subroutine cu_gf_deep_run(        &
            kbcon,ktop,ierr,                                                      &
            itf,ktf,                                                              &
            its,ite, kts,kte)
+
+!$acc kernels
       do i=its,itf
         if(ierr(i)/=0)cycle
            if(aa1(i).eq.0.)then
                ierr(i)=17
+#ifndef _OPENACC
                ierrc(i)="cloud work function zero"
+#endif
            endif
       enddo
+!$acc end kernels
+
 !
 !--- diurnal cycle closure 
 !
       !--- aa1 from boundary layer (bl) processes only
+!$acc kernels
       aa1_bl       (:) = 0.0
       xf_dicycle   (:) = 0.0
       tau_ecmwf    (:) = 0.
+!$acc end kernels
       !- way to calculate the fraction of cape consumed by shallow convection
       iversion=1 ! ecmwf  
       !iversion=0 ! orig    
@@ -1341,6 +1515,7 @@ subroutine cu_gf_deep_run(        &
 ! wmean is of no meaning over land....
 ! still working on replacing it over water
 !
+!$acc kernels
       do i=its,itf
             if(ierr(i).eq.0)then
                 !- mean vertical velocity 
@@ -1353,8 +1528,11 @@ subroutine cu_gf_deep_run(        &
             endif
       enddo
       tau_bl(:)     = 0.
+!$acc end kernels
+
       !
       if(dicycle == 1) then
+!$acc kernels
         do i=its,itf
             
             if(ierr(i).eq.0)then
@@ -1369,6 +1547,7 @@ subroutine cu_gf_deep_run(        &
 
             endif
         enddo
+!$acc end kernels
 
         if(iversion == 1) then 
         !-- version ecmwf
@@ -1380,7 +1559,7 @@ subroutine cu_gf_deep_run(        &
                               zo_cup,zuo,dbyo_bl,gammao_cup_bl,tn_cup_bl,        &
                               kbcon,ktop,ierr,                                   &
                               itf,ktf,its,ite, kts,kte)
-
+!$acc kernels
             do i=its,itf
 
             if(ierr(i).eq.0)then
@@ -1395,11 +1574,13 @@ subroutine cu_gf_deep_run(        &
                !endif 
             endif
             enddo
+!$acc end kernels
             
         else
         
           !- version for real cloud-work function
           
+!$acc kernels
           !-get the profiles modified only by bl tendencies
           do i=its,itf
            tn_bl(i,:)=0.;qo_bl(i,:)=0.
@@ -1412,6 +1593,7 @@ subroutine cu_gf_deep_run(        &
             qo_bl(i,kbcon(i)+1:ktf) = q(i,kbcon(i)+1:ktf)
            endif 
           enddo
+!$acc end kernels
           !--- calculate moist static energy, heights, qes, ... only by bl tendencies
           call cup_env(zo,qeso_bl,heo_bl,heso_bl,tn_bl,qo_bl,po,z1,                              &
                      psur,ierr,tcrit,-1,                                                         &
@@ -1421,6 +1603,7 @@ subroutine cu_gf_deep_run(        &
                               heo_cup_bl,heso_cup_bl,zo_cup,po_cup,gammao_cup_bl,tn_cup_bl,psur, &
                               ierr,z1,                                                           &
                               itf,ktf,its,ite, kts,kte)
+!$acc kernels
           do i=its,itf
             if(ierr(i).eq.0)then
                hkbo_bl(i)=heo_cup_bl(i,k22(i)) 
@@ -1458,12 +1641,12 @@ subroutine cu_gf_deep_run(        &
                enddo
             endif
           enddo
-        
+!$acc end kernels
           !--- calculate workfunctions for updrafts
           call cup_up_aa0(aa1_bl,zo,zuo,dbyo_bl,gammao_cup_bl,tn_cup_bl,        &
                         kbcon,ktop,ierr,                                        &
                         itf,ktf,its,ite, kts,kte)
-
+!$acc kernels
           do i=its,itf
             
             if(ierr(i).eq.0)then
@@ -1476,14 +1659,18 @@ subroutine cu_gf_deep_run(        &
                 !   !- multiply aa1_bl the "normalized time-scale" - tau_bl/ model_timestep
                    aa1_bl(i) = aa1_bl(i)* tau_bl(i)/ dtime
                 !endif 
-                print*,'aa0,aa1bl=',aa0(i),aa1_bl(i),aa0(i)-aa1_bl(i),tau_bl(i)!,dtime,xland(i)     
+#ifndef _OPENACC
+                print*,'aa0,aa1bl=',aa0(i),aa1_bl(i),aa0(i)-aa1_bl(i),tau_bl(i)!,dtime,xland(i)   
+#endif  
             endif
            enddo
+!$acc end kernels
         endif
      endif  ! version of implementation
 
-
+!$acc kernels
        axx(:)=aa1(:)
+!$acc end kernels
 
 !
 !> - Call cup_dd_edt() to determine downdraft strength in terms of windshear
@@ -1501,6 +1688,7 @@ subroutine cu_gf_deep_run(        &
      call get_melting_profile(ierr,tn_cup,po_cup, p_liq_ice,melting_layer,qrco    &
                              ,pwo,edto,pwdo,melting                                &
                              ,itf,ktf,its,ite, kts,kte, cumulus                    )
+!$acc kernels
         do k=kts,ktf
         do i=its,itf
            dellat_ens (i,k,1)=0.
@@ -1524,6 +1712,7 @@ subroutine cu_gf_deep_run(        &
         dellaqc(i,k)=0.
       enddo
       enddo
+!$acc end kernels
 !
 !----------------------------------------------  cloud level ktop
 !
@@ -1563,7 +1752,7 @@ subroutine cu_gf_deep_run(        &
 !----------------------------------------------  cloud level 2
 !
 !- - - - - - - - - - - - - - - - - - - - - - - - model level 1
-
+!$acc kernels
       do i=its,itf
         if(ierr(i)/=0)cycle
          dp=100.*(po_cup(i,1)-po_cup(i,2))
@@ -1603,8 +1792,10 @@ subroutine cu_gf_deep_run(        &
             totmas=subin-subdown+detup-entup-entdo+ &
                    detdo-entupk-entdoj+detupk+zuo(i,k+1)-zuo(i,k)
             if(abs(totmas).gt.1.e-6)then
+#ifndef _OPENACC
                write(0,123)'totmas=',k22(i),kbcon(i),k,entup,detup,edto(i),zdo(i,k+1),dd_massdetro(i,k),dd_massentro(i,k)
 123     format(a7,1x,3i3,2e12.4,1(1x,f5.2),3e12.4)
+#endif
             endif
             dp=100.*(po_cup(i,k)-po_cup(i,k+1))
              pgc=pgcon
@@ -1706,11 +1897,14 @@ subroutine cu_gf_deep_run(        &
         endif
 
       enddo
+!$acc end kernels
+
 444   format(1x,i2,1x,7e12.4) !,1x,f7.2,2x,e13.5)
 !
 !--- using dellas, calculate changed environmental profiles
 !
       mbdt=.1
+!$acc kernels
       do i=its,itf
       xaa0_ens(i,1)=0.
       enddo
@@ -1743,6 +1937,7 @@ subroutine cu_gf_deep_run(        &
       xt(i,ktf)=tn(i,ktf)
       endif
       enddo
+!$acc end kernels
 !
 !--- calculate moist static energy, heights, qes
 !
@@ -1764,12 +1959,15 @@ subroutine cu_gf_deep_run(        &
 !
 !--- moist static energy inside cloud
 !
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
          xhc(i,k)=0.
          xdby(i,k)=0.
       enddo
       enddo
+!$acc end kernels
+!$acc parallel loop private(x_add,k)
       do i=its,itf
         if(ierr(i).eq.0)then
          x_add = xlv*zqexec(i)+cp*ztexec(i)
@@ -1781,10 +1979,13 @@ subroutine cu_gf_deep_run(        &
          xhc(i,k)=xhkb(i)
         endif !ierr
       enddo
+!$acc end parallel
 !
 !
+!$acc kernels
       do i=its,itf
        if(ierr(i).eq.0)then
+!$acc loop seq
         do k=start_level(i)  +1,ktop(i)
          xhc(i,k)=(xhc(i,k-1)*xzu(i,k-1)-.5*up_massdetro(i,k-1)*xhc(i,k-1)  + &
                                             up_massentro(i,k-1)*xhe(i,k-1)) / &
@@ -1800,13 +2001,14 @@ subroutine cu_gf_deep_run(        &
 
          xdby(i,k)=xhc(i,k)-xhes_cup(i,k)
         enddo
+!$acc loop independent
         do k=ktop(i)+1,ktf
            xhc (i,k)=xhes_cup(i,k)
            xdby(i,k)=0.
         enddo
        endif
       enddo
-
+!$acc end kernels
 !
 !--- workfunctions for updraft
 !
@@ -1814,10 +2016,13 @@ subroutine cu_gf_deep_run(        &
            kbcon,ktop,ierr,                              &
            itf,ktf,                                      &
            its,ite, kts,kte)
+!$acc parallel loop
       do i=its,itf 
          if(ierr(i).eq.0)then
            xaa0_ens(i,1)=xaa0(i)
+!$acc loop seq
            do k=kts,ktop(i)
+!$acc loop independent
                  do nens3=1,maxens3
                  if(nens3.eq.7)then
 !--- b=0
@@ -1839,7 +2044,9 @@ subroutine cu_gf_deep_run(        &
            enddo
          if(pr_ens(i,7).lt.1.e-6)then
             ierr(i)=18
+#ifndef _OPENACC
             ierrc(i)="total normalized condensate too small"
+#endif
             do nens3=1,maxens3
                pr_ens(i,nens3)=0.
             enddo
@@ -1851,6 +2058,7 @@ subroutine cu_gf_deep_run(        &
          enddo
          endif
       enddo
+!$acc end parallel
  200  continue
 !
 !--- large scale forcing
@@ -1860,11 +2068,13 @@ subroutine cu_gf_deep_run(        &
 !        ensemble is chosen
 !
 !
+!$acc kernels
       do i=its,itf
          ierr2(i)=ierr(i)
          ierr3(i)=ierr(i)
          k22x(i)=k22(i)
       enddo
+!$acc end kernels
         call cup_maximi(heo_cup,2,kbmax,k22x,ierr,                        &
              itf,ktf,                                                     &
              its,ite, kts,kte)
@@ -1885,15 +2095,18 @@ subroutine cu_gf_deep_run(        &
 !
 !--- calculate cloud base mass flux
 !
-
+!$acc kernels
       do i = its,itf
         mconv(i) = 0
         if(ierr(i)/=0)cycle
+!$acc loop independent
         do k=1,ktop(i)
           dq=(qo_cup(i,k+1)-qo_cup(i,k))
+!$acc atomic update
           mconv(i)=mconv(i)+omeg(i,k)*dq/g
         enddo
       enddo
+!$acc end kernels
       call cup_forcing_ens_3d(closure_n,xland1,aa0,aa1,xaa0_ens,mbdt,dtime, &
            ierr,ierr2,ierr3,xf_ens,axx,forcing,                             &
            maxens3,mconv,rand_clos,                                         &
@@ -1903,6 +2116,7 @@ subroutine cu_gf_deep_run(        &
            its,ite, kts,kte,                                                &
            dicycle,tau_ecmwf,aa1_bl,xf_dicycle)
 !
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
         if(ierr(i).eq.0)then
@@ -1918,11 +2132,14 @@ subroutine cu_gf_deep_run(        &
         endif
       enddo
       enddo
+!$acc end kernels
+
  250  continue
 !
 !--- feedback
 !
        if(imid.eq.1 .and. ichoice .le.2)then
+!$acc kernels
          do i=its,itf
           !-boundary layer qe 
           xff_mid(i,1)=0.
@@ -1941,6 +2158,7 @@ subroutine cu_gf_deep_run(        &
              xff_mid(i,2)=min(0.1,.03*zws(i))
           endif
          enddo
+!$acc end kernels
        endif
        call cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat_ens,dellaq_ens, &
             dellaqc_ens,outt,                                            &
@@ -1959,6 +2177,7 @@ subroutine cu_gf_deep_run(        &
            po_cup,qes_cup,pwavo,edto,pwevo,pre,outt,outq)      !,outbuoy)
 
       k=1
+!$acc kernels
       do i=its,itf
           if(ierr(i).eq.0 .and.pre(i).gt.0.) then
              pre(i)=max(pre(i),0.)
@@ -1980,9 +2199,11 @@ subroutine cu_gf_deep_run(        &
              enddo
           endif
       enddo
+!$acc end kernels
 ! rain evaporation as in sas
 !
       if(irainevap.eq.1)then
+!$acc kernels
       do i = its,itf
        rntot(i) = 0.
        delqev(i) = 0.
@@ -1991,8 +2212,10 @@ subroutine cu_gf_deep_run(        &
        rntot(i)    = 0.
        rain=0.
        if(ierr(i).eq.0)then
+!$acc loop independent
          do k = ktop(i), 1, -1
               rain =  pwo(i,k) + edto(i) * pwdo(i,k)
+!$acc atomic
               rntot(i) = rntot(i) + rain * xmb(i)* .001 * dtime
          enddo
        endif
@@ -2003,6 +2226,7 @@ subroutine cu_gf_deep_run(        &
          if(ierr(i).eq.0)then
          evef = edt(i) * evfact * sig(i)**2
          if(xland(i).gt.0.5 .and. xland(i).lt.1.5) evef = edt(i) * evfactl * sig(i)**2
+!$acc loop seq
          do k = ktop(i), 1, -1
               rain =  pwo(i,k) + edto(i) * pwdo(i,k)
               rn(i) = rn(i) + rain * xmb(i) * .001 * dtime
@@ -2037,8 +2261,10 @@ subroutine cu_gf_deep_run(        &
 !       pre(i)=1000.*rn(i)/dtime
       endif
       enddo
+!$acc end kernels
       endif
 
+!$acc kernels
       do i=its,itf
          if(ierr(i).eq.0) then
             if(aeroevap.gt.1)then
@@ -2048,9 +2274,12 @@ subroutine cu_gf_deep_run(        &
             endif
          endif
       enddo
+!$acc end kernels
+
 !
 ! since kinetic energy is being dissipated, add heating accordingly (from ecmwf)
 !
+!$acc kernels
       do i=its,itf
           if(ierr(i).eq.0) then
              dts=0.
@@ -2070,7 +2299,7 @@ subroutine cu_gf_deep_run(        &
              endif
           endif
       enddo
-
+!$acc end kernels
 
 !
 !---------------------------done------------------------------
@@ -2083,7 +2312,7 @@ end subroutine cu_gf_deep_run
 
 
    subroutine fct1d3 (ktop,n,dt,z,tracr,massflx,trflx_in,dellac,g)
-
+!$acc routine vector
 ! --- modify a 1-D array of tracer fluxes for the purpose of maintaining
 ! --- monotonicity (including positive-definiteness) in the tracer field
 ! --- during tracer transport.
@@ -2188,9 +2417,10 @@ subroutine fct1d3 (ktop,n,dt,z,tracr,massflx,trflx_in,dellac,g)
        / (1.0001*dtovdz(k)))
      clipout(k)=min(damp,(soln_lo(k)-trmin(k))/max(epsil,totlout(k))    &
        / (1.0001*dtovdz(k)))
-
+#ifndef _OPENACC
      if (NaN(clipin(k)))  print *,'(fct1d) error: clipin is NaN,  k=',k
      if (NaN(clipout(k))) print *,'(fct1d) error: clipout is NaN,  k=',k
+#endif
 
      if (clipin(k).lt.0.) then
 !       print 100,'(fct1d) error: clipin < 0 at k =',k,			&
@@ -2215,7 +2445,9 @@ subroutine fct1d3 (ktop,n,dt,z,tracr,massflx,trflx_in,dellac,g)
      end if
      trflx_out(k)=flx_lo(k)+clipped(k)
      if (NaN(trflx_out(k)))  then
+#ifndef _OPENACC
        print *,'(fct1d) error: trflx_out is NaN,  k=',k
+#endif
        error=.true.
      end if
    end do
@@ -2227,6 +2459,7 @@ subroutine fct1d3 (ktop,n,dt,z,tracr,massflx,trflx_in,dellac,g)
      !dellac(k)=soln_hi(k)
    end do
 
+#ifndef _OPENACC
    if (vrbos .or. error) then
 !     do k=2,ktop
 !       write(32,99)k,						&
@@ -2256,6 +2489,7 @@ subroutine fct1d3 (ktop,n,dt,z,tracr,massflx,trflx_in,dellac,g)
 !     end do
      if (error) stop '(fct1d error)'
    end if
+#endif
 
    return
    end subroutine fct1d3
@@ -2277,6 +2511,8 @@ subroutine rain_evap_below_cloudbase(itf,ktf, its,ite, kts,kte,ierr,    &
    real(kind=kind_phys), dimension(its:ite,kts:kte),intent(in)    :: po_cup,qo_cup,qes_cup
    real(kind=kind_phys), dimension(its:ite)        ,intent(inout) :: pre
    real(kind=kind_phys), dimension(its:ite,kts:kte),intent(inout) :: outt,outq  !,outbuoy
+!$acc declare copyin(ierr,kbcon,psur,xland,pwavo,edto,pwevo,xmb,po_cup,qo_cup,qes_cup)
+!$acc declare copy(pre,outt,outq)
 
   !real,    dimension(its:ite)        ,intent(out)      :: tot_evap_bcb
   !real,    dimension(its:ite,kts:kte),intent(out) :: evap_bcb,net_prec_bcb
@@ -2286,7 +2522,9 @@ subroutine rain_evap_below_cloudbase(itf,ktf, its,ite, kts,kte,ierr,    &
    real(kind=kind_phys) :: RH_cr , del_t,del_q,dp,q_deficit
    real(kind=kind_phys),  dimension(its:ite,kts:kte) :: evap_bcb,net_prec_bcb
    real(kind=kind_phys),  dimension(its:ite)         :: tot_evap_bcb
+!$acc declare create(evap_bcb,net_prec_bcb,tot_evap_bcb)
 
+!$acc kernels
    do i=its,itf
      evap_bcb    (i,:)= 0.0
      net_prec_bcb(i,:)= 0.0
@@ -2303,6 +2541,7 @@ subroutine rain_evap_below_cloudbase(itf,ktf, its,ite, kts,kte,ierr,    &
     !net_prec_bcb(i,k) = xmb(i)*(pwavo(i)+edto(i)*pwevo(i)) !-- pwevo<0.
      net_prec_bcb(i,k) = pre(i)
 
+!$acc loop seq
      do k=kbcon(i)-1, kts, -1
 
        q_deficit = max(0.,(RH_cr*qes_cup(i,k) -qo_cup(i,k)))
@@ -2340,6 +2579,7 @@ subroutine rain_evap_below_cloudbase(itf,ktf, its,ite, kts,kte,ierr,    &
        pre(i) = pre(i) - evap_bcb(i,k)
       enddo
     enddo
+!$acc end kernels
 
    end subroutine rain_evap_below_cloudbase
 
@@ -2384,6 +2624,8 @@ subroutine cup_dd_edt(ierr,us,vs,z,ktop,kbcon,edt,p,pwav, &
      integer, dimension (its:ite)                            &
         ,intent (inout)                   ::                 &
         ierr
+!$acc declare copyin(rho,us,vs,z,p,pw,pwav,pwev,psum2,psumh,edtmax,edtmin,ktop,kbcon)
+!$acc declare copyout(edtc,edt) copy(ccn,ierr)
 !
 !  local variables in this routine
 !
@@ -2392,6 +2634,7 @@ subroutine cup_dd_edt(ierr,us,vs,z,ktop,kbcon,edt,p,pwav, &
      real(kind=kind_phys)    einc,pef,pefb,prezk,zkbc
      real(kind=kind_phys),    dimension (its:ite)         ::                 &
       vshear,sdp,vws
+!$acc declare create(vshear,sdp,vws)
      real(kind=kind_phys) :: prop_c,aeroadd,alpha3,beta3
      prop_c=0. !10.386
      alpha3 = 0.75
@@ -2405,6 +2648,7 @@ subroutine cup_dd_edt(ierr,us,vs,z,ktop,kbcon,edt,p,pwav, &
 !
 ! */ calculate an average wind shear over the depth of the cloud
 !
+!$acc kernels
        do i=its,itf
         edt(i)=0.
         vws(i)=0.
@@ -2480,6 +2724,7 @@ subroutine cup_dd_edt(ierr,us,vs,z,ktop,kbcon,edt,p,pwav, &
                if(edtc(i,1).lt.edtmin(i))edtc(i,1)=edtmin(i)
          endif
       enddo
+!$acc end kernels
 
    end subroutine cup_dd_edt
 
@@ -2517,21 +2762,25 @@ subroutine cup_dd_moisture(ierrc,zd,hcd,hes_cup,qcd,qes_cup,  &
         ,intent (in   )                   ::            &
         zd,hes_cup,hcd,qes_cup,q_cup,z_cup,             &
         dd_massentr,dd_massdetr,gamma_cup,q,he 
+!$acc declare copyin(zd,hes_cup,hcd,qes_cup,q_cup,z_cup,dd_massentr,dd_massdetr,gamma_cup,q,he)
      integer                                            &
         ,intent (in   )                   ::            &
         iloop
      integer, dimension (its:ite)                       &
         ,intent (in   )                   ::            &
         jmin
+!$acc declare copyin(jmin)
      integer, dimension (its:ite)                       &
         ,intent (inout)                   ::            &
         ierr
+!$acc declare copy(ierr)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)&
         ,intent (out  )                   ::            &
         qcd,qrcd,pwd
      real(kind=kind_phys),    dimension (its:ite)&
         ,intent (out  )                   ::            &
         pwev,bu
+!$acc declare copyout(qcd,qrcd,pwd,pwev,bu)
      character*50 :: ierrc(its:ite)
 !
 !  local variables in this routine
@@ -2542,6 +2791,7 @@ subroutine cup_dd_moisture(ierrc,zd,hcd,hes_cup,qcd,qes_cup,  &
      real(kind=kind_phys)                                 ::            &
         denom,dh,dz,dqeva
 
+!$acc kernels
       do i=its,itf
          bu(i)=0.
          pwev(i)=0.
@@ -2573,6 +2823,7 @@ subroutine cup_dd_moisture(ierrc,zd,hcd,hes_cup,qcd,qes_cup,  &
       pwev(i)=pwev(i)+pwd(i,jmin(i)) ! *dz
 !
       bu(i)=dz*dh
+!$acc loop seq
       do ki=jmin(i)-1,1,-1
          dz=z_cup(i,ki+1)-z_cup(i,ki)
 !        qcd(i,ki)=(qcd(i,ki+1)*(1.-.5*cdd(i,ki+1)*dz) &
@@ -2617,15 +2868,20 @@ subroutine cup_dd_moisture(ierrc,zd,hcd,hes_cup,qcd,qes_cup,  &
        if( (pwev(i).eq.0.) .and. (iloop.eq.1))then
 !        print *,'problem with buoy in cup_dd_moisture',i
          ierr(i)=7
+#ifndef _OPENACC
          ierrc(i)="problem with buoy in cup_dd_moisture"
+#endif
        endif
        if(bu(i).ge.0.and.iloop.eq.1)then
 !        print *,'problem with buoy in cup_dd_moisture',i
          ierr(i)=7
+#ifndef _OPENACC
          ierrc(i)="problem2 with buoy in cup_dd_moisture"
+#endif
        endif
       endif
 100    continue
+!$acc end kernels
 
    end subroutine cup_dd_moisture
 
@@ -2664,18 +2920,23 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                &
         ,intent (in   )                   ::             &
         p,t,q
+!$acc declare copyin(p,t,q)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                &
         ,intent (out  )                   ::             &
         he,hes,qes
+!$acc declare copyout(he,hes,qes)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                &
         ,intent (inout)                   ::             &
         z
+!$acc declare copy(z)
      real(kind=kind_phys),    dimension (its:ite)                        &
         ,intent (in   )                   ::             &
         psur,z1
+!$acc declare copyin(psur,z1)
      integer, dimension (its:ite)                        &
         ,intent (inout)                   ::             &
         ierr
+!$acc declare copy(ierr)
      integer                                             &
         ,intent (in   )                   ::             &
         itest
@@ -2687,6 +2948,7 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
        i,k
 !     real(kind=kind_phys), dimension (1:2) :: ae,be,ht
       real(kind=kind_phys), dimension (its:ite,kts:kte) :: tv
+!$acc declare create(tv)
       real(kind=kind_phys) :: tcrit,e,tvbar
 !     real(kind=kind_phys), external :: satvap
 !     real(kind=kind_phys) :: satvap
@@ -2698,6 +2960,7 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
 !      ae(1)=be(1)/273.+alog(610.71)
 !      be(2)=.622*ht(2)/.286
 !      ae(2)=be(2)/273.+alog(610.71)
+!$acc parallel loop collapse(2) private(e)
       do k=kts,ktf
       do i=its,itf
         if(ierr(i).eq.0)then
@@ -2717,11 +2980,13 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
         endif
       enddo
       enddo
+!$acc end parallel
 !
 !--- z's are calculated with changed h's and q's and t's
 !--- if itest=2
 !
       if(itest.eq.1 .or. itest.eq.0)then
+!$acc kernels
          do i=its,itf
            if(ierr(i).eq.0)then
              z(i,1)=max(0.,z1(i))-(log(p(i,1))- &
@@ -2730,7 +2995,9 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
          enddo
 
 ! --- calculate heights
+!$acc loop seq
          do k=kts+1,ktf
+!$acc loop private(tvbar)
          do i=its,itf
            if(ierr(i).eq.0)then
               tvbar=.5*tv(i,k)+.5*tv(i,k-1)
@@ -2739,7 +3006,9 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
            endif
          enddo
          enddo
+!$acc end kernels
       else if(itest.eq.2)then
+!$acc kernels
          do k=kts,ktf
          do i=its,itf
            if(ierr(i).eq.0)then
@@ -2748,12 +3017,14 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
            endif
          enddo
          enddo
+!$acc end kernels
       else if(itest.eq.-1)then
       endif
 !
 !--- calculate moist static energy - he
 !    saturated moist static energy - hes
 !
+!$acc kernels
        do k=kts,ktf
        do i=its,itf
          if(ierr(i).eq.0)then
@@ -2763,6 +3034,7 @@ subroutine cup_env(z,qes,he,hes,t,q,p,z1,                &
          endif
       enddo
       enddo
+!$acc end kernels
 
    end subroutine cup_env
 
@@ -2802,15 +3074,19 @@ subroutine cup_env_clev(t,qes,q,he,hes,z,p,qes_cup,q_cup,        &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                        &
         ,intent (in   )                   ::                     &
         qes,q,he,hes,z,p,t
+!$acc declare copyin(qes,q,he,hes,z,p,t)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                        &
         ,intent (out  )                   ::                     &
         qes_cup,q_cup,he_cup,hes_cup,z_cup,p_cup,gamma_cup,t_cup
+!$acc declare copyout(qes_cup,q_cup,he_cup,hes_cup,z_cup,p_cup,gamma_cup,t_cup)
      real(kind=kind_phys),    dimension (its:ite)                                &
         ,intent (in   )                   ::                     &
         psur,z1
+!$acc declare copyin(psur,z1)
      integer, dimension (its:ite)                                &
         ,intent (inout)                   ::                     &
         ierr
+!$acc declare copy(ierr)
 !
 !  local variables in this routine
 !
@@ -2818,7 +3094,7 @@ subroutine cup_env_clev(t,qes,q,he,hes,z,p,qes_cup,q_cup,        &
      integer                              ::                     &
        i,k
 
-
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
         qes_cup(i,k)=0.
@@ -2864,7 +3140,7 @@ subroutine cup_env_clev(t,qes,q,he,hes,z,p,qes_cup,q_cup,        &
                        *t_cup(i,1)))*qes_cup(i,1)
         endif
       enddo
-
+!$acc end kernels
    end subroutine cup_env_clev
 
 !>\ingroup cu_gf_deep_group
@@ -2911,6 +3187,7 @@ subroutine cup_forcing_ens_3d(closure_n,xland,aa0,aa1,xaa0,mbdt,dtime,ierr,ierr2
      real(kind=kind_phys),    dimension (its:ite,1:maxens3)                            &
         ,intent (inout  )                 ::                           &
         xf_ens
+!$acc declare copy(pr_ens,xf_ens)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                              &
         ,intent (in   )                   ::                           &
         zd,zu,p_cup,zdm
@@ -2929,9 +3206,11 @@ subroutine cup_forcing_ens_3d(closure_n,xland,aa0,aa1,xaa0,mbdt,dtime,ierr,ierr2
      real(kind=kind_phys),    dimension (its:ite)                                      &
         ,intent (in   )                   ::                           &
         mconv,axx
+!$acc declare copyin(zd,zu,p_cup,zdm,omeg,xaa0,rand_clos,aa1,edt,edtm,mconv,axx)
      real(kind=kind_phys),    dimension (its:ite)                                      &
         ,intent (inout)                   ::                           &
         aa0,closure_n
+!$acc declare copy(aa0,closure_n)
      real(kind=kind_phys)                                                              &
         ,intent (in   )                   ::                           &
         mbdt
@@ -2947,6 +3226,7 @@ subroutine cup_forcing_ens_3d(closure_n,xland,aa0,aa1,xaa0,mbdt,dtime,ierr,ierr2
      integer, dimension (its:ite)                                      &
         ,intent (inout)                   ::                           &
         ierr,ierr2,ierr3
+!$acc declare copy(k22,kbcon,ktop,ierr,ierr2,ierr3) copyin(xland)
      integer                                                           &
         ,intent (in   )                   ::                           &
         ichoice
@@ -2954,6 +3234,7 @@ subroutine cup_forcing_ens_3d(closure_n,xland,aa0,aa1,xaa0,mbdt,dtime,ierr,ierr2
       real(kind=kind_phys),    intent(in)   , dimension (its:ite) :: aa1_bl,tau_ecmwf
       real(kind=kind_phys),    intent(inout), dimension (its:ite) :: xf_dicycle
       real(kind=kind_phys),    intent(inout), dimension (its:ite,10) :: forcing
+!$acc declare copyin(aa1_bl,tau_ecmwf) copy(xf_dicycle,forcing)
       !- local var
       real(kind=kind_phys)  :: xff_dicycle
 !
@@ -2974,15 +3255,20 @@ subroutine cup_forcing_ens_3d(closure_n,xland,aa0,aa1,xaa0,mbdt,dtime,ierr,ierr2
        a1,a_ave,xff0,xomg!,aclim1,aclim2,aclim3,aclim4
 
      real(kind=kind_phys), dimension (its:ite) :: ens_adj
+!$acc declare create(kloc,ens_adj)
 
 
 
 !
+!$acc kernels
        ens_adj(:)=1.
+!$acc end kernels
        xff_dicycle = 0.
 
 !--- large scale forcing
 !
+!$acc kernels
+!$acc loop private(xff_ens3,xk)
        do 100 i=its,itf
           kloc(i)=1
           if(ierr(i).eq.0)then
@@ -3218,13 +3504,15 @@ subroutine cup_forcing_ens_3d(closure_n,xland,aa0,aa1,xaa0,mbdt,dtime,ierr,ierr2
              enddo
           endif ! ierror
  100   continue
+ !$acc end kernels
 
 
 !-
 !- diurnal cycle mass flux
 !-              
 if(dicycle == 1 )then
-
+!$acc kernels
+!$acc loop private(xk)
        do i=its,itf           
           xf_dicycle(i) = 0.
           if(ierr(i) /=  0)cycle
@@ -3238,9 +3526,11 @@ subroutine cup_forcing_ens_3d(closure_n,xland,aa0,aa1,xaa0,mbdt,dtime,ierr,ierr2
  
             xf_dicycle(i)= xf_ens(i,10)-xf_dicycle(i)
        enddo
+!$acc end kernels
 else
+!$acc kernels
        xf_dicycle(:) = 0.
-
+!$acc end kernels
 endif
 !---------
 
@@ -3273,24 +3563,31 @@ subroutine cup_kbcon(ierrc,cap_inc,iloop_in,k22,kbcon,he_cup,hes_cup, &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                              &
         ,intent (in   )                   ::                           &
         he_cup,hes_cup,p_cup
+!$acc declare copyin(he_cup,hes_cup,p_cup)
      real(kind=kind_phys),    dimension (its:ite)                                      &
         ,intent (in   )                   ::                           &
         entr_rate,ztexec,zqexec,cap_inc,cap_max
+!$acc declare copyin(entr_rate,ztexec,zqexec,cap_inc,cap_max)
      real(kind=kind_phys),    dimension (its:ite)                                      &
         ,intent (inout   )                   ::                        &
         hkb !,cap_max
+!$acc declare copy(hkb)
      integer, dimension (its:ite)                                      &
         ,intent (in   )                   ::                           &
         kbmax
+!$acc declare copyin(kbmax)
      integer, dimension (its:ite)                                      &
         ,intent (inout)                   ::                           &
         kbcon,k22,ierr
+!$acc declare copy(kbcon,k22,ierr)
      integer                                                           &
         ,intent (in   )                   ::                           &
         iloop_in
      character*50 :: ierrc(its:ite)
      real(kind=kind_phys), dimension (its:ite,kts:kte),intent (in) :: z_cup,heo
+!$acc declare copyin(z_cup,heo)
      integer, dimension (its:ite)      ::     iloop,start_level
+!$acc declare create(iloop,start_level)
 !
 !  local variables in this routine
 !
@@ -3300,10 +3597,16 @@ subroutine cup_kbcon(ierrc,cap_inc,iloop_in,k22,kbcon,he_cup,hes_cup, &
      real(kind=kind_phys)                                ::                           &
         x_add,pbcdif,plus,hetest,dz
      real(kind=kind_phys), dimension (its:ite,kts:kte) ::hcot
+!$acc declare create(hcot)
+
 !
 !--- determine the level of convective cloud base  - kbcon
 !
+!$acc kernels
       iloop(:)=iloop_in
+!$acc end kernels
+
+!$acc parallel loop
        do 27 i=its,itf
       kbcon(i)=1
 !
@@ -3317,6 +3620,7 @@ subroutine cup_kbcon(ierrc,cap_inc,iloop_in,k22,kbcon,he_cup,hes_cup, &
 !      if(iloop_in.eq.5)start_level(i)=kbcon(i)
        !== including entrainment for hetest
         hcot(i,1:start_level(i)) = hkb(i)
+!$acc loop seq
         do k=start_level(i)+1,kbmax(i)+3
            dz=z_cup(i,k)-z_cup(i,k-1)
            hcot(i,k)= ( (1.-0.5*entr_rate(i)*dz)*hcot(i,k-1)   &
@@ -3331,7 +3635,9 @@ subroutine cup_kbcon(ierrc,cap_inc,iloop_in,k22,kbcon,he_cup,hes_cup, &
       if(kbcon(i).gt.kbmax(i)+2)then
          if(iloop(i).ne.4)then
                 ierr(i)=3
+#ifndef _OPENACC
                 ierrc(i)="could not find reasonable kbcon in cup_kbcon"
+#endif
          endif
         go to 27
       endif
@@ -3364,6 +3670,7 @@ subroutine cup_kbcon(ierrc,cap_inc,iloop_in,k22,kbcon,he_cup,hes_cup, &
         start_level(i)=k22(i)
 !        if(iloop_in.eq.5)start_level(i)=kbcon(i)
         hcot(i,1:start_level(i)) = hkb(i)
+!$acc loop seq
         do k=start_level(i)+1,kbmax(i)+3
            dz=z_cup(i,k)-z_cup(i,k-1)
 
@@ -3377,13 +3684,16 @@ subroutine cup_kbcon(ierrc,cap_inc,iloop_in,k22,kbcon,he_cup,hes_cup, &
         if(kbcon(i).gt.kbmax(i)+2)then
             if(iloop(i).ne.4)then
                 ierr(i)=3
+#ifndef _OPENACC
                 ierrc(i)="could not find reasonable kbcon in cup_kbcon"
+#endif
             endif
             go to 27
         endif
         go to 32
       endif
  27   continue
+ !$acc end parallel
 
    end subroutine cup_kbcon
 
@@ -3410,27 +3720,33 @@ subroutine cup_maximi(array,ks,ke,maxx,ierr,              &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                              &
         ,intent (in   )                   ::                           &
          array
+!$acc declare copyin(array)
      integer, dimension (its:ite)                                      &
         ,intent (in   )                   ::                           &
          ierr,ke
+!$acc declare copyin(ierr,ke)
      integer                                                           &
         ,intent (in   )                   ::                           &
          ks
      integer, dimension (its:ite)                                      &
         ,intent (out  )                   ::                           &
          maxx
+!$acc declare copyout(maxx)
      real(kind=kind_phys),    dimension (its:ite)         ::                           &
          x
+!$acc declare create(x)
      real(kind=kind_phys)                                ::                           &
          xar
      integer                              ::                           &
          i,k
 
+!$acc kernels
        do 200 i=its,itf
        maxx(i)=ks
        if(ierr(i).eq.0)then
       x(i)=array(i,ks)
 !
+!$acc loop seq
        do 100 k=ks,ke(i)
          xar=array(i,k)
          if(xar.ge.x(i)) then
@@ -3440,6 +3756,7 @@ subroutine cup_maximi(array,ks,ke,maxx,ierr,              &
  100  continue
       endif
  200  continue
+ !$acc end kernels
 
    end subroutine cup_maximi
 
@@ -3466,23 +3783,29 @@ subroutine cup_minimi(array,ks,kend,kt,ierr,              &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                    &
         ,intent (in   )                   ::                 &
          array
+!$acc declare copyin(array)
      integer, dimension (its:ite)                            &
         ,intent (in   )                   ::                 &
          ierr,ks,kend
+!$acc declare copyin(ierr,ks,kend)
      integer, dimension (its:ite)                            &
         ,intent (out  )                   ::                 &
          kt
+!$acc declare copyout(kt)
      real(kind=kind_phys),    dimension (its:ite)         ::                 &
          x
+!$acc declare create(x)
      integer                              ::                 &
          i,k,kstop
 
+!$acc kernels
        do 200 i=its,itf
       kt(i)=ks(i)
       if(ierr(i).eq.0)then
       x(i)=array(i,ks(i))
        kstop=max(ks(i)+1,kend(i))
 !
+!$acc loop seq
        do 100 k=ks(i)+1,kstop
          if(array(i,k).lt.x(i)) then
               x(i)=array(i,k)
@@ -3491,6 +3814,7 @@ subroutine cup_minimi(array,ks,kend,kt,ierr,              &
  100  continue
       endif
  200  continue
+ !$acc end kernels
 
    end subroutine cup_minimi
 
@@ -3525,6 +3849,7 @@ subroutine cup_up_aa0(aa0,z,zu,dby,gamma_cup,t_cup,       &
      integer, dimension (its:ite)                             &
         ,intent (in   )                   ::                  &
         kbcon,ktop
+!$acc declare copyin(z,zu,gamma_cup,t_cup,dby,kbcon,ktop)
 !
 ! input and output
 !
@@ -3533,9 +3858,11 @@ subroutine cup_up_aa0(aa0,z,zu,dby,gamma_cup,t_cup,       &
      integer, dimension (its:ite)                             &
         ,intent (inout)                   ::                  &
         ierr
+!$acc declare copy(ierr)
      real(kind=kind_phys),    dimension (its:ite)                             &
         ,intent (out  )                   ::                  &
         aa0
+!$acc declare copyout(aa0)
 !
 !  local variables in this routine
 !
@@ -3545,6 +3872,7 @@ subroutine cup_up_aa0(aa0,z,zu,dby,gamma_cup,t_cup,       &
      real(kind=kind_phys)                                 ::                  &
         dz,da
 !
+!$acc kernels
         do i=its,itf
          aa0(i)=0.
         enddo
@@ -3562,6 +3890,7 @@ subroutine cup_up_aa0(aa0,z,zu,dby,gamma_cup,t_cup,       &
            if(aa0(i).lt.0.)aa0(i)=0.
           enddo
         enddo
+!$acc end kernels
 
    end subroutine cup_up_aa0
 
@@ -3582,6 +3911,7 @@ subroutine neg_check(name,j,dt,q,outq,outt,outu,outv,                      &
      real(kind=kind_phys), dimension (its:ite  )                            ,                 &
       intent(inout   ) ::                                                     &
        pret
+!$acc declare copy(outq,outt,outqc,outu,outv,q,pret)
       character *(*), intent (in)         ::                                  &
        name
      real(kind=kind_phys)                                                                     &
@@ -3601,11 +3931,14 @@ subroutine neg_check(name,j,dt,q,outq,outt,outu,outv,                      &
         names=1.
       endif
       scalef=86400.
+!$acc kernels
+!$acc loop private(qmemf,qmem,icheck)
       do i=its,itf
       if(ktop(i) <= 2)cycle
       icheck=0
       qmemf=1.
       qmem=0.
+!$acc loop reduction(min:qmemf)
       do k=kts,ktop(i)
          qmem=(outt(i,k))*86400.
          if(qmem.gt.thresh)then
@@ -3633,6 +3966,7 @@ subroutine neg_check(name,j,dt,q,outq,outt,outu,outv,                      &
       enddo
       pret(i)=pret(i)*qmemf 
       enddo
+!$acc end kernels
 !      return
 !
 ! check whether routine produces negative q's. this can happen, since 
@@ -3643,9 +3977,12 @@ subroutine neg_check(name,j,dt,q,outq,outt,outu,outv,                      &
 !      return
 !      write(14,*)'return'
       thresh=1.e-32
+!$acc kernels
+!$acc loop private(qmemf,qmem,icheck)
       do i=its,itf
       if(ktop(i) <= 2)cycle
       qmemf=1.
+!$acc loop reduction(min:qmemf)
       do k=kts,ktop(i)
          qmem=outq(i,k)
          if(abs(qmem).gt.0. .and. q(i,k).gt.1.e-6)then
@@ -3670,7 +4007,7 @@ subroutine neg_check(name,j,dt,q,outq,outt,outu,outv,                      &
       enddo
       pret(i)=pret(i)*qmemf 
       enddo
-
+!$acc end kernels
    end subroutine neg_check
 
 !>\ingroup cu_gf_deep_group
@@ -3744,6 +4081,8 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
         ierr,ierr2,ierr3
      integer, intent(in) :: dicycle
      real(kind=kind_phys),    intent(in), dimension (its:ite) :: xf_dicycle
+!$acc declare copyin(zu,pwd,p_cup,sig,xmbm_in,xmbs_in,edt,xff_mid,dellat,dellaqc,dellaq,pw,ktop,xland1,xf_dicycle)
+!$acc declare copy(xf_ens,pr_ens,outtem,outq,outqc,pre,xmb,closure_n,ierr,ierr2,ierr3)
 !
 !  local variables in this routine
 !
@@ -3754,11 +4093,13 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
         clos_wei,dtt,dp,dtq,dtqc,dtpw,dtpwd
      real(kind=kind_phys),    dimension (its:ite)         ::                           &
        pre2,xmb_ave,pwtot
+!$acc declare create(pre2,xmb_ave,pwtot)
 !
       character *(*), intent (in)         ::                           &
        name
 
 !
+!$acc kernels
       do k=kts,kte
       do i=its,ite
         outtem (i,k)=0.
@@ -3779,6 +4120,7 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
         enddo
         endif
       enddo
+!$acc end kernels
 !
 !--- calculate ensemble average mass fluxes
 !
@@ -3788,10 +4130,12 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
 !
 !!!!! deep convection !!!!!!!!!!
       if(imid.eq.0)then
+!$acc kernels
       do i=its,itf
         if(ierr(i).eq.0)then
          k=0
          xmb_ave(i)=0.
+!$acc loop seq
          do n=1,maxens3
           k=k+1
           xmb_ave(i)=xmb_ave(i)+xf_ens(i,n)
@@ -3825,8 +4169,10 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
 
         endif
       enddo
+!$acc end kernels
 !!!!! not so deep convection !!!!!!!!!!
       else  ! imid == 1
+!$acc kernels
          do i=its,itf
          xmb_ave(i)=0.
          if(ierr(i).eq.0)then
@@ -3836,6 +4182,7 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
               xmb_ave(i)=sig(i)*xff_mid(i,ichoice)
            else if(ichoice.gt.2)then
               k=0
+!$acc loop seq
               do n=1,maxens3
                     k=k+1
                     xmb_ave(i)=xmb_ave(i)+xf_ens(i,n)
@@ -3856,8 +4203,10 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
            endif   ! dicycle=1,2
          endif     ! ierr >0
          enddo     ! i
+!$acc end kernels
       endif        ! imid=1
 
+!$acc kernels
        do i=its,itf
          if(ierr(i).eq.0)then
              dtpw=0.
@@ -3870,8 +4219,10 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
             PRE(I)=PRE(I)+XMB(I)*dtpw
           endif
        enddo
+!$acc end kernels
  return
 
+!$acc kernels
       do i=its,itf
         pwtot(i)=0.
         pre2(i)=0.
@@ -3907,10 +4258,12 @@ subroutine cup_output_ens_3d(xff_mid,xf_ens,ierr,dellat,dellaq,dellaqc,  &
           enddo
           pre(i)=-pre(i)+xmb(i)*pwtot(i)
         endif
+#ifndef _OPENACC
 124     format(1x,i3,4e13.4)
 125     format(1x,2e13.4)
+#endif
       enddo
-
+!$acc end kernels
 
    end subroutine cup_output_ens_3d
 !-------------------------------------------------------
@@ -3957,6 +4310,7 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
      integer, dimension (its:ite)                                 &
         ,intent (in   )                   ::                      &
         kbcon,ktop,k22,xland1
+!$acc declare copyin(p_cup,rho,q,zu,gamma_cup,qe_cup,up_massentr,up_massdetr,dby,qes_cup,z_cup,zqexec,c0,kbcon,ktop,k22,xland1)
      real(kind=kind_phys),    intent (in  ) ::                    & ! HCB
         ccnclean
 !
@@ -3968,6 +4322,7 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
      integer, dimension (its:ite)                                  &
         ,intent (inout)                   ::                       &
         ierr
+!$acc declare copy(ierr)
       character *(*), intent (in)         ::                       &
        name
    ! qc = cloud q (including liquid water) after entrainment
@@ -3980,19 +4335,25 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                          &
         ,intent (out  )                   ::                       &
         qc,qrc,pw,clw_all
+!$acc declare copy(qc,qrc,pw,clw_all)
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                          &
         ,intent (inout)                   ::                       &
         c1d
+!$acc declare copy(c1d)
      real(kind=kind_phys),    dimension (its:ite,kts:kte) ::                       &
         qch,qrcb,pwh,clw_allh,c1d_b,t
+!$acc declare create(qch,qrcb,pwh,clw_allh,c1d_b,t)
      real(kind=kind_phys),    dimension (its:ite)         ::                       &
         pwavh
+!$acc declare create(pwavh)
      real(kind=kind_phys),    dimension (its:ite)                                  &
         ,intent (out  )                   ::                       &
         pwav,psum,psumh
+!$acc declare copyout(pwav,psum,psumh)
      real(kind=kind_phys),    dimension (its:ite)                                  &
         ,intent (in  )                    ::                       &
         ccn
+!$acc declare copyin(ccn)
 !
 !  local variables in this routine
 !
@@ -4000,6 +4361,7 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
      integer                              ::                       &
         iprop,iall,i,k
      integer :: start_level(its:ite),kklev(its:ite)
+!$acc declare create(start_level,kklev)
      real(kind=kind_phys)                                 ::                       &
         prop_ave,qrcb_h,bdsp,dp,rhoc,qrch,qaver,clwdet,                   &
         dz,berryc0,q1,berryc
@@ -4007,19 +4369,30 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
         denom, c0t, c0_iceconv
      real(kind=kind_phys),    dimension (kts:kte)         ::                       &
         prop_b
+!$acc declare create(prop_b)
 !
+     real(kind=kind_phys), parameter:: zero = 0
+     logical :: is_mid, is_deep
+
+        is_mid = (name == 'mid')
+        is_deep = (name == 'deep')
+
+!$acc kernels
         prop_b(kts:kte)=0
+!$acc end kernels
         iall=0
         clwdet=0.1 !0.02
         c0_iceconv=0.01
         c1d_b=c1d
         bdsp=bdispm
+
 !
 !--- no precip for small clouds
 !
 !        if(name.eq.'shallow')then
 !            c0=0.002
 !        endif
+!$acc kernels
         do i=its,itf
           pwav(i)=0.
           pwavh(i)=0.
@@ -4039,10 +4412,13 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
           qrcb(i,k)=0.
         enddo
         enddo
+!$acc end kernels
+
+!$acc parallel loop private(start_level,qaver,k)
       do i=its,itf
       if(ierr(i).eq.0)then
          start_level=k22(i)
-         call get_cloud_bc(kte,qe_cup (i,1:kte),qaver,k22(i))
+         call get_cloud_bc(kte,qe_cup (i,1:kte),qaver,k22(i),zero)
          qaver = qaver 
          k=start_level(i)
          qc (i,k)= qaver 
@@ -4056,7 +4432,10 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
 !
       endif
       enddo
+!$acc end parallel
+
 
+!$acc kernels
        do 100 i=its,itf
          !c0=.004 HCB tuning
          if(ierr(i).eq.0)then
@@ -4064,6 +4443,7 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
 ! below lfc, but maybe above lcl
 !
 !            if(name == "deep" )then
+!$acc loop seq
             do k=k22(i)+1,kbcon(i)
               if(t(i,k) > 273.16) then
                c0t = c0(i)
@@ -4090,13 +4470,14 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
 !now do the rest
 !
             kklev(i)=maxloc(zu(i,:),1)
+!$acc loop seq
             do k=kbcon(i)+1,ktop(i)
                if(t(i,k) > 273.16) then
                   c0t = c0(i)
                else
                   c0t = c0(i) * exp(c0_iceconv * (t(i,k) - 273.16))
                endif
-               if(name == "mid")c0t=0.004
+               if(is_mid)c0t=0.004
 
                denom=zu(i,k-1)-.5*up_massdetr(i,k-1)+up_massentr(i,k-1)
                if(denom.lt.1.e-16)then
@@ -4138,7 +4519,7 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
                qrc(i,k)=max(0.,(qc(i,k)-qrch)) ! /(1.+c0(i)*dz*zu(i,k))
                clw_allh(i,k)=max(0.,qch(i,k)-qrch) 
                qrcb(i,k)=max(0.,(qch(i,k)-qrch)) ! /(1.+c0(i)*dz*zu(i,k))
-               if(name == "deep" )then
+               if(is_deep)then
                  clwdet=0.1 !0.02                 ! 05/11/2021
                  if(k.lt.kklev(i)) clwdet=0.    ! 05/05/2021
                else
@@ -4220,7 +4601,9 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
                psum(i)=psum(i)+pw(i,k) ! HCB
             enddo ! k=kbcon,ktop
 ! do not include liquid/ice in qc
+!$acc loop independent
        do k=k22(i)+1,ktop(i)
+!$acc atomic
            qc(i,k)=qc(i,k)-qrc(i,k)
        enddo
       endif ! ierr
@@ -4228,12 +4611,15 @@ subroutine cup_up_moisture(name,ierr,z_cup,qc,qrc,pw,pwav,     &
 !--- integrated normalized ondensate
 !
  100     continue
+!$acc end kernels
        prop_ave=0.
        iprop=0
+!$acc parallel loop reduction(+:prop_ave,iprop)
        do k=kts,kte
         prop_ave=prop_ave+prop_b(k)
         if(prop_b(k).gt.0)iprop=iprop+1
        enddo
+!$acc end parallel
        iprop=max(iprop,1)
 
  end subroutine cup_up_moisture
@@ -4241,6 +4627,7 @@ end subroutine cup_up_moisture
 !--------------------------------------------------------------------
 !>\ingroup cu_gf_deep_group
  real function satvap(temp2)
+!$acc routine seq
       implicit none
       real(kind=kind_phys) :: temp2, temp, toot, toto, eilog, tsot,            &
      &        ewlog, ewlog2, ewlog3, ewlog4
@@ -4266,10 +4653,11 @@ real function satvap(temp2)
 !--------------------------------------------------------------------
 !>\ingroup cu_gf_deep_group
  subroutine get_cloud_bc(mzp,array,x_aver,k22,add)
+!$acc routine seq
     implicit none
     integer, intent(in)     :: mzp,k22
-    real(kind=kind_phys)   , intent(in)     :: array(mzp)
-    real(kind=kind_phys)   , optional , intent(in)  :: add
+    real(kind=kind_phys)   , dimension(:), intent(in)     :: array
+    real(kind=kind_phys)   , intent(in)  :: add
     real(kind=kind_phys)   , intent(out)    :: x_aver
     integer :: i,local_order_aver,order_aver
 
@@ -4286,7 +4674,7 @@ subroutine get_cloud_bc(mzp,array,x_aver,k22,add)
       x_aver = x_aver + array(k22-i+1)
     enddo
       x_aver = x_aver/float(local_order_aver)
-    if(present(add)) x_aver = x_aver + add
+    x_aver = x_aver + add
 
  end subroutine get_cloud_bc
  !========================================================================================
@@ -4301,19 +4689,31 @@ subroutine rates_up_pdf(rand_vmas,ipr,name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo
      real(kind=kind_phys), dimension (its:ite),intent (in) :: hkbo,rand_vmas
      integer, dimension (its:ite),intent (in) :: kstabi,k22,kpbl,csum,xland,pmin_lev
      integer, dimension (its:ite),intent (inout) :: kbcon,ierr,ktop,ktopdby
+!$acc declare copy(entr_rate_2d,zuo,kbcon,ierr,ktop,ktopdby) &
+!$acc         copyin(p_cup, heo,heso_cup,z_cup,hkbo,rand_vmas,kstabi,k22,kpbl,csum,xland,pmin_lev)
+
      !-local vars
      real(kind=kind_phys), dimension (its:ite,kts:kte) :: hcot
+!$acc declare create(hcot)
      real(kind=kind_phys) :: entr_init,beta_u,dz,dbythresh,dzh2,zustart,zubeg,massent,massdetr
      real(kind=kind_phys) :: dby(kts:kte),dbm(kts:kte),zux(kts:kte)
      real(kind=kind_phys) zuh2(40),zh2(40)
      integer :: kklev,i,kk,kbegin,k,kfinalzu
-     integer, dimension (its:ite) :: start_level 
+     integer, dimension (its:ite) :: start_level
+!$acc declare create(start_level)
+     logical :: is_deep, is_mid, is_shallow
      !
      zustart=.1
      dbythresh= 0.8 !.0.95 ! 0.85, 0.6
      if(name == 'shallow' .or. name == 'mid') dbythresh=1.
-     dby(:)=0.
 
+     !dby(:)=0.
+
+      is_deep = (name .eq. 'deep')
+      is_mid = (name .eq. 'mid')
+      is_shallow = (name .eq. 'shallow')
+
+!$acc parallel loop private(beta_u,entr_init,dz,massent,massdetr,zubeg,kklev,kfinalzu,dby,dbm,zux,zuh2,zh2)
      do i=its,itf
       if(ierr(i) > 0 )cycle
       zux(:)=0.
@@ -4326,6 +4726,7 @@ subroutine rates_up_pdf(rand_vmas,ipr,name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo
        zuo(i,start_level(i))=zustart
         zux(start_level(i))=zustart
         entr_init=entr_rate_2d(i,kts)
+!$acc loop seq
         do k=start_level(i)+1,kbcon(i)
           dz=z_cup(i,k)-z_cup(i,k-1)
           massent=dz*entr_rate_2d(i,k-1)*zuo(i,k-1)
@@ -4335,10 +4736,11 @@ subroutine rates_up_pdf(rand_vmas,ipr,name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo
           zux(k)=zuo(i,k)
         enddo
        zubeg=zustart !zuo(i,kbcon(i))
-       if(name .eq. 'deep')then
+       if(is_deep)then
         ktop(i)=0
         hcot(i,start_level(i))=hkbo(i)
         dz=z_cup(i,start_level(i))-z_cup(i,start_level(i)-1)
+!$acc loop seq
         do k=start_level(i)+1,ktf-2
            dz=z_cup(i,k)-z_cup(i,k-1)
 
@@ -4350,6 +4752,7 @@ subroutine rates_up_pdf(rand_vmas,ipr,name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo
         enddo
         ktopdby(i)=maxloc(dby(:),1)
         kklev=maxloc(dbm(:),1)
+!$acc loop seq
         do k=maxloc(dby(:),1)+1,ktf-2
           if(dby(k).lt.dbythresh*maxval(dby))then
               kfinalzu=k  - 1
@@ -4374,38 +4777,41 @@ subroutine rates_up_pdf(rand_vmas,ipr,name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo
 !           call get_zu_zd_pdf_fim(ipr,xland(i),zuh2,"up",ierr(i),start_level(i),             &
 !           call get_zu_zd_pdf_fim(rand_vmas(i),zubeg,ipr,xland(i),zuh2,"up",ierr(i),kbcon(i), &
 !            kfinalzu,zuo(i,kts:kte),kts,kte,ktf,beta_u,kpbl(i),csum(i),pmin_lev(i))
-           call get_zu_zd_pdf_fim(kklev,p_cup(i,:),rand_vmas(i),zubeg,ipr,xland(i),zuh2,"up",ierr(i),k22(i), &
+           call get_zu_zd_pdf_fim(kklev,p_cup(i,:),rand_vmas(i),zubeg,ipr,xland(i),zuh2,1,ierr(i),k22(i), &
             kfinalzu+1,zuo(i,kts:kte),kts,kte,ktf,beta_u,kbcon(i),csum(i),pmin_lev(i))
         endif
       endif ! end deep
-      if ( name == 'mid' ) then
+      if ( is_mid ) then
        if(ktop(i) <= kbcon(i)+2)then
               ierr(i)=41
               ktop(i)= 0
        else
            kfinalzu=ktop(i)
            ktopdby(i)=ktop(i)+1
-          call get_zu_zd_pdf_fim(kklev,p_cup(i,:),rand_vmas(i),zubeg,ipr,xland(i),zuh2,"mid",ierr(i),k22(i),ktopdby(i)+1,zuo(i,kts:kte),kts,kte,ktf,beta_u,kbcon(i),csum(i),pmin_lev(i))
+          call get_zu_zd_pdf_fim(kklev,p_cup(i,:),rand_vmas(i),zubeg,ipr,xland(i),zuh2,3, &
+            ierr(i),k22(i),ktopdby(i)+1,zuo(i,kts:kte),kts,kte,ktf,beta_u,kbcon(i),csum(i),pmin_lev(i))
        endif
       endif ! mid
-      if ( name == 'shallow' ) then
+      if ( is_shallow ) then
        if(ktop(i) <= kbcon(i)+2)then
            ierr(i)=41
            ktop(i)= 0
        else
            kfinalzu=ktop(i)
            ktopdby(i)=ktop(i)+1
-           call get_zu_zd_pdf_fim(kbcon(i),p_cup(i,:),rand_vmas(i),zubeg,ipr,xland(i),zuh2,"sh2",ierr(i),k22(i), &
+           call get_zu_zd_pdf_fim(kbcon(i),p_cup(i,:),rand_vmas(i),zubeg,ipr,xland(i),zuh2,2,ierr(i),k22(i), &
              ktopdby(i)+1,zuo(i,kts:kte),kts,kte,ktf,beta_u,kbcon(i),csum(i),pmin_lev(i))
 
          endif
          endif ! shal
      enddo
+!$acc end parallel loop
 
   end subroutine rates_up_pdf
 !-------------------------------------------------------------------------
 !>\ingroup cu_gf_deep_group
  subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,kb,kt,zu,kts,kte,ktf,max_mass,kpbli,csum,pmin_lev)
+!$acc routine vector
 
  implicit none
 ! real(kind=kind_phys), parameter :: beta_deep=1.3,g_beta_deep=0.8974707
@@ -4421,7 +4827,7 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
  real(kind=kind_phys), intent(in) :: p(kts:kte)
  real(kind=kind_phys)  :: trash,beta_deep,zuh(kts:kte),zuh2(1:40)
  integer, intent(inout) :: ierr
- character*(*), intent(in) ::draft
+ integer, intent(in) ::draft
 
  !- local var
  integer :: k1,kk,k,kb_adj,kpbli_adj,kmax
@@ -4431,22 +4837,18 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
 ! very simple lookup tables
 !
         real(kind=kind_phys), dimension(30) :: alpha,g_alpha
-        data   (alpha(k),k=4,27)/3.699999,                               &
+        data   (alpha(k),k=1,30)/3.699999,3.699999,3.699999,3.699999,&
                       3.024999,2.559999,2.249999,2.028571,1.862500, &
                       1.733333,1.630000,1.545454,1.475000,1.415385, &
                       1.364286,1.320000,1.281250,1.247059,1.216667, &
                       1.189474,1.165000,1.142857,1.122727,1.104348, &
-                      1.087500,1.075000,1.075000/
-        data (g_alpha(k),k=4,27)/4.170645,                               &
+                      1.087500,1.075000,1.075000,1.075000,1.075000,1.075000/
+        data (g_alpha(k),k=1,30)/4.170645,4.170645,4.170645,4.170645,    &
                       2.046925 , 1.387837, 1.133003, 1.012418,0.9494680, &
                       0.9153771,0.8972442,0.8885444,0.8856795,0.8865333, &
                       0.8897996,0.8946404,0.9005030,0.9070138,0.9139161, &
                       0.9210315,0.9282347,0.9354376,0.9425780,0.9496124, &
-                      0.9565111,0.9619183,0.9619183/
-        alpha(1:3)=alpha(4)
-        g_alpha(1:3)=g_alpha(4)
-        alpha(28:30)=alpha(27)
-        g_alpha(28:30)=g_alpha(27)
+                      0.9565111,0.9619183,0.9619183,0.9619183,0.9619183,0.9619183/
 
  !- kb cannot be at 1st level
 
@@ -4454,7 +4856,15 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
  zu(:)=0.0
  zuh(:)=0.0
    kb_adj=max(kb,2)
- if(draft == "up") then
+
+! Dan: replaced draft string with integer
+!  up = 1
+!  sh2 = 2
+!  mid = 3
+!  down = 4
+!  downm = 5
+
+ if(draft == 1) then
    lev_start=min(.9,.1+csum*.013)
    kb_adj=max(kb,2)
    tunning=max(p(kklev+1),.5*(p(kpbli)+p(kt)))
@@ -4495,7 +4905,7 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
 
    if(zu(kpbli).gt.0.)  &
       zu(kts:min(ktf,kt-1))= zu(kts:min(ktf,kt-1))/zu(kpbli)
-     do k=maxloc(zu(:),1),1,-1
+     do k=my_maxloc1d(zu(:),kte,1),1,-1
        if(zu(k).lt.1.e-6)then
          kb_adj=k+1
          exit
@@ -4514,9 +4924,10 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
 !        if(p(kt).gt.400.)write(32,122)k,p(k),zu(k),trash
         endif
       enddo
+#ifndef _OPENACC
 122  format(1x,i4,1x,f8.1,1x,f6.2,1x,f6.2)
-
- elseif(draft == "sh2") then
+#endif
+ elseif(draft == 2) then
    k=kklev
    if(kpbli.gt.5)k=kpbli
 !new nov18
@@ -4553,7 +4964,7 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
 !      zu(kts:min(ktf,kt+1))= zu(kts:min(ktf,kt+1))/maxval(zu(kts:min(ktf,kt+1)))
    if(zu(kpbli).gt.0.)  &
       zu(kts:min(ktf,kt-1))= zu(kts:min(ktf,kt-1))/zu(kpbli)
-     do k=maxloc(zu(:),1),1,-1
+     do k=my_maxloc1d(zu(:),kte,1),1,-1
        if(zu(k).lt.1.e-6)then
          kb_adj=k+1
          exit
@@ -4566,7 +4977,7 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
 !       write(32,122)k,p(k),zu(k)
       enddo
 
- elseif(draft == "mid") then
+ elseif(draft == 3) then
    kb_adj=max(kb,2)
    tunning=.5*(p(kt)+p(kpbli)) !p(kt)+(p(kb_adj)-p(kt))*.9 !*.33
 !new nov18
@@ -4602,7 +5013,7 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
 
    if(zu(kpbli).gt.0.)  &
       zu(kts:min(ktf,kt-1))= zu(kts:min(ktf,kt-1))/zu(kpbli)
-     do k=maxloc(zu(:),1),1,-1
+     do k=my_maxloc1d(zu(:),kte,1),1,-1
        if(zu(k).lt.1.e-6)then
          kb_adj=k+1
          exit
@@ -4619,7 +5030,7 @@ subroutine get_zu_zd_pdf_fim(kklev,p,rand_vmas,zubeg,ipr,xland,zuh2,draft,ierr,k
 !       write(33,122)k,p(k),zu(k)
       enddo
 
- elseif(draft == "down" .or. draft == "downm") then
+ elseif(draft == 4 .or. draft == 5) then
 
    tunning=p(kb)
    tunning =min(0.95, (tunning-p(1))/(p(kt)-p(1))) !=.6
@@ -4712,21 +5123,23 @@ subroutine cup_up_aa1bl(aa0,t,tn,q,qo,dtime,  &
      real(kind=kind_phys)                                 ::                           &
         dz,da
 !
+!$acc kernels
         do i=its,itf
          aa0(i)=0.
         enddo
         do i=its,itf
+!$acc loop independent
           do k=kts,kbcon(i)
             if(ierr(i).ne.0 ) cycle
 !           if(k.gt.kbcon(i)) cycle
 
             dz = (z_cup (i,k+1)-z_cup (i,k))*g
             da = dz*(tn(i,k)*(1.+0.608*qo(i,k))-t(i,k)*(1.+0.608*q(i,k)))/dtime
-
+!$acc atomic
             aa0(i)=aa0(i)+da
           enddo
         enddo
-             
+!$acc end kernels
 
  end subroutine cup_up_aa1bl
 !---------------------------------------------------------------------- 
@@ -4738,11 +5151,15 @@ subroutine get_inversion_layers(ierr,p_cup,t_cup,z_cup,qo_cup,qeso_cup,k_inv_lay
         implicit none
         integer                      ,intent (in ) :: itf,ktf,its,ite,kts,kte
         integer, dimension (its:ite) ,intent (in ) :: ierr,kstart,kend
+!$acc declare copyin(ierr,kstart,kend)
         integer, dimension (its:ite) :: kend_p3
+!$acc declare create(kend_p3)
                     
         real(kind=kind_phys),    dimension (its:ite,kts:kte), intent (in ) :: p_cup,t_cup,z_cup,qo_cup,qeso_cup                            
         real(kind=kind_phys),    dimension (its:ite,kts:kte), intent (out) :: dtempdz                    
         integer, dimension (its:ite,kts:kte), intent (out) :: k_inv_layers
+!$acc declare copyin(p_cup,t_cup,z_cup,qo_cup,qeso_cup)
+!$acc declare copyout(dtempdz,k_inv_layers)
         !-local vars
         real(kind=kind_phys)   :: dp,l_mid,l_shal,first_deriv(kts:kte),sec_deriv(kts:kte)
         integer:: ken,kadd,kj,i,k,ilev,kk,ix,k800,k550,mid,shal
@@ -4750,7 +5167,10 @@ subroutine get_inversion_layers(ierr,p_cup,t_cup,z_cup,qo_cup,qeso_cup,k_inv_lay
         !-initialize k_inv_layers as undef
         l_mid=300.
         l_shal=100.
+!$acc kernels
         k_inv_layers(:,:) = 1
+!$acc end kernels
+!$acc parallel loop private(first_deriv,sec_deriv,ilev,ix,k,kadd,ken)
          do i = its,itf
            if(ierr(i) == 0)then
            sec_deriv(:)=0.
@@ -4770,6 +5190,7 @@ subroutine get_inversion_layers(ierr,p_cup,t_cup,z_cup,qo_cup,qeso_cup,k_inv_lay
          ix=1
          k=ilev
          do while (ilev < kend_p3(i)) !(z_cup(i,ilev)<15000.)
+!$acc loop seq
            do kk=k,kend_p3(i)+2 !k,ktf-2
              
              if(sec_deriv(kk) <        sec_deriv(kk+1) .and.  &
@@ -4786,6 +5207,7 @@ subroutine get_inversion_layers(ierr,p_cup,t_cup,z_cup,qo_cup,qeso_cup,k_inv_lay
         !- 2nd criteria
          kadd=0
          ken=maxloc(k_inv_layers(i,:),1)
+!$acc loop seq
          do k=1,ken
            kk=k_inv_layers(i,k+kadd)
            if(kk.eq.1)exit
@@ -4801,8 +5223,10 @@ subroutine get_inversion_layers(ierr,p_cup,t_cup,z_cup,qo_cup,qeso_cup,k_inv_lay
          enddo
         endif
         enddo
+!$acc end parallel
 100 format(1x,16i3)        
         !- find the locations of inversions around 800 and 550 hpa
+!$acc parallel loop private(sec_deriv,shal,mid)
         do i = its,itf
          if(ierr(i) /= 0) cycle
 
@@ -4827,13 +5251,14 @@ subroutine get_inversion_layers(ierr,p_cup,t_cup,z_cup,qo_cup,qeso_cup,k_inv_lay
          k_inv_layers(i,mid )=k_inv_layers(i,k550) ! this is for mid/congestus convection
          k_inv_layers(i,mid+1:kte)=-1
         enddo
-
+!$acc end parallel
         
  end subroutine get_inversion_layers
 !-----------------------------------------------------------------------------------
 !>\ingroup cu_gf_deep_group
 !> This function calcualtes
  function deriv3(xx, xi, yi, ni, m)
+!$acc routine vector
     !============================================================================*/
     ! evaluate first- or second-order derivatives 
     ! using three-point lagrange interpolation 
@@ -4863,7 +5288,11 @@ function deriv3(xx, xi, yi, ni, m)
     ! if x is ouside the xi(1)-xi(ni) interval set deriv3=0.0
     if (xx < xi(1) .or. xx > xi(ni)) then
       deriv3 = 0.0
+#ifndef _OPENACC
       stop "problems with finding the 2nd derivative"
+#else
+      return
+#endif
     end if
 
     ! a binary (bisectional) search to find i so that xi(i-1) < x < xi(i)
@@ -4918,9 +5347,10 @@ subroutine get_lateral_massflux(itf,ktf, its,ite, kts,kte
                                   ,draft,kbcon,k22,up_massentru,up_massdetru,lambau)
 
      implicit none
-     character *(*), intent (in) :: draft
+     integer, intent (in) :: draft
      integer, intent(in):: itf,ktf, its,ite, kts,kte
      integer, intent(in)   , dimension(its:ite)         :: ierr,ktop,kbcon,k22
+!$acc declare copyin(ierr,ktop,kbcon,k22)
     !real(kind=kind_phys),    intent(in),  optional , dimension(its:ite):: lambau
      real(kind=kind_phys),    intent(inout),  optional , dimension(its:ite):: lambau
      real(kind=kind_phys),    intent(in)   , dimension(its:ite,kts:kte) :: zo_cup,zuo
@@ -4929,10 +5359,13 @@ subroutine get_lateral_massflux(itf,ktf, its,ite, kts,kte
                                                           ,up_massentr,  up_massdetr
      real(kind=kind_phys),    intent(  out), dimension(its:ite,kts:kte),  optional ::                  &
                                                           up_massentru,up_massdetru
+!$acc declare copy(lambau,cd,entr_rate_2d) copyin(zo_cup,zuo) copyout(up_massentro, up_massdetro,up_massentr,  up_massdetr)
+!$acc declare copyout(up_massentro, up_massdetro,up_massentr,  up_massdetr, up_massentru,up_massdetru)
      !-- local vars
      integer :: i,k, incr1,incr2,turn
      real(kind=kind_phys) :: dz,trash,trash2
      
+!$acc kernels
      do k=kts,kte
       do i=its,ite
          up_massentro(i,k)=0.
@@ -4941,17 +5374,22 @@ subroutine get_lateral_massflux(itf,ktf, its,ite, kts,kte
          up_massdetr (i,k)=0.
       enddo
      enddo
+!$acc end kernels
      if(present(up_massentru) .and. present(up_massdetru))then
+!$acc kernels
        do k=kts,kte
         do i=its,ite
           up_massentru(i,k)=0.
           up_massdetru(i,k)=0.
         enddo
        enddo
+!$acc end kernels
      endif
+!$acc parallel loop
      do i=its,itf
        if(ierr(i).eq.0)then
-         
+
+!$acc loop private(dz)
           do k=max(2,k22(i)+1),maxloc(zuo(i,:),1)
            !=> below maximum value zu -> change entrainment
            dz=zo_cup(i,k)-zo_cup(i,k-1)
@@ -4965,6 +5403,7 @@ subroutine get_lateral_massflux(itf,ktf, its,ite, kts,kte
            endif
            if(zuo(i,k-1).gt.0.)entr_rate_2d(i,k-1)=(up_massentro(i,k-1))/(dz*zuo(i,k-1))
          enddo
+!$acc loop private(dz)
          do k=maxloc(zuo(i,:),1)+1,ktop(i)
            !=> above maximum value zu -> change detrainment
            dz=zo_cup(i,k)-zo_cup(i,k-1)
@@ -4989,8 +5428,12 @@ subroutine get_lateral_massflux(itf,ktf, its,ite, kts,kte
          do k=2,ktf-1
            up_massentr (i,k-1)=up_massentro(i,k-1)
            up_massdetr (i,k-1)=up_massdetro(i,k-1)
-         enddo         
-         if(present(up_massentru) .and. present(up_massdetru) .and. draft == 'deep')then
+         enddo        
+! Dan: draft
+!  deep = 1
+!  shallow = 2
+!  mid = 3 
+         if(present(up_massentru) .and. present(up_massdetru) .and. draft == 1)then
           !turn=maxloc(zuo(i,:),1)
           !do k=2,turn
           ! up_massentru(i,k-1)=up_massentro(i,k-1)+.1*lambau(i)*up_massentro(i,k-1)
@@ -5001,12 +5444,12 @@ subroutine get_lateral_massflux(itf,ktf, its,ite, kts,kte
            up_massentru(i,k-1)=up_massentro(i,k-1)+lambau(i)*up_massdetro(i,k-1)
            up_massdetru(i,k-1)=up_massdetro(i,k-1)+lambau(i)*up_massdetro(i,k-1)
           enddo
-         else if(present(up_massentru) .and. present(up_massdetru) .and. draft == 'shallow')then
+         else if(present(up_massentru) .and. present(up_massdetru) .and. draft == 2)then
           do k=2,ktf-1
            up_massentru(i,k-1)=up_massentro(i,k-1)+lambau(i)*up_massdetro(i,k-1)
            up_massdetru(i,k-1)=up_massdetro(i,k-1)+lambau(i)*up_massdetro(i,k-1)
           enddo
-         else if(present(up_massentru) .and. present(up_massdetru) .and. draft == 'mid')then
+         else if(present(up_massentru) .and. present(up_massdetru) .and. draft == 3)then
           lambau(i)=0.
           do k=2,ktf-1
            up_massentru(i,k-1)=up_massentro(i,k-1)+lambau(i)*up_massdetro(i,k-1)
@@ -5025,6 +5468,7 @@ subroutine get_lateral_massflux(itf,ktf, its,ite, kts,kte
   
        endif
     enddo
+!$acc end parallel
  end subroutine get_lateral_massflux
 !---meltglac-------------------------------------------------
 !------------------------------------------------------------------------------------
@@ -5036,17 +5480,23 @@ subroutine get_partition_liq_ice(ierr,tn,po_cup, p_liq_ice,melting_layer
      integer  ,intent (in   )	                          :: itf,ktf, its,ite, kts,kte
      real(kind=kind_phys),     intent (in   ), dimension(its:ite,kts:kte) :: tn,po_cup
      real(kind=kind_phys),     intent (inout), dimension(its:ite,kts:kte) :: p_liq_ice,melting_layer
+!$acc declare copyin(tn,po_cup) copy(p_liq_ice,melting_layer)
      integer  , intent (in  ), dimension(its:ite) :: ierr
+!$acc declare copyin(ierr)
      integer :: i,k
      real(kind=kind_phys)    :: dp     
-     real(kind=kind_phys), dimension(its:ite) :: norm     
+     real(kind=kind_phys), dimension(its:ite) :: norm
+!$acc declare create(norm) 
      real(kind=kind_phys), parameter ::  t1=276.16    
      
      ! hli initialize at the very beginning
+!$acc kernels
         p_liq_ice     (:,:) = 1.
         melting_layer(:,:) = 0.
+!$acc end kernels
      !-- get function of t for partition of total condensate into liq and ice phases.     
      if(melt_glac .and. cumulus == 'deep') then
+!$acc kernels
           do i=its,itf
            if(ierr(i).eq.0)then
            do k=kts,ktf
@@ -5089,8 +5539,10 @@ subroutine get_partition_liq_ice(ierr,tn,po_cup, p_liq_ice,melting_layer
         !do k=kts,ktf
           do i=its,itf
            if(ierr(i).eq.0)then
+!$acc loop independent
            do k=kts,ktf-1
              dp = 100.*(po_cup(i,k)-po_cup(i,k+1)) 
+!$acc atomic update
              norm(i) = norm(i) + melting_layer(i,k)*dp/g
           enddo
           endif
@@ -5111,10 +5563,12 @@ subroutine get_partition_liq_ice(ierr,tn,po_cup, p_liq_ice,melting_layer
 !             !print*,"n=",i,k,norm(i)
 !          enddo
 !        enddo
-        
+!$acc end kernels    
      else
+!$acc kernels
         p_liq_ice     (:,:) = 1.
         melting_layer(:,:) = 0.
+!$acc end kernels
      endif
    end  subroutine get_partition_liq_ice
 
@@ -5131,13 +5585,15 @@ subroutine get_melting_profile(ierr,tn_cup,po_cup, p_liq_ice,melting_layer,qrco
      real(kind=kind_phys)     ,intent (in   ), dimension(its:ite,kts:kte) :: tn_cup,po_cup,qrco,pwo &
                                                             ,pwdo,p_liq_ice,melting_layer
      real(kind=kind_phys)     ,intent (inout), dimension(its:ite,kts:kte) :: melting
+!$acc declare copyin(ierr,edto,tn_cup,po_cup,qrco,pwo,pwdo,p_liq_ice,melting_layer,melting)
      integer :: i,k
      real(kind=kind_phys)    :: dp     
      real(kind=kind_phys), dimension(its:ite) :: norm,total_pwo_solid_phase
      real(kind=kind_phys), dimension(its:ite,kts:kte) :: pwo_solid_phase,pwo_eff
+!$acc declare create(norm,total_pwo_solid_phase,pwo_solid_phase,pwo_eff)
      
      if(melt_glac .and. cumulus == 'deep') then
-
+!$acc kernels
         !-- set melting mixing ratio to zero for columns that do not have deep convection
         do i=its,itf
             if(ierr(i) > 0) melting(i,:) = 0.
@@ -5185,10 +5641,12 @@ subroutine get_melting_profile(ierr,tn_cup,po_cup, p_liq_ice,melting_layer,qrco
 !         print*,"cons=",i,norm(i),total_pwo_solid_phase(i)
 !        enddo
 !--        
-        
+!$acc end kernels
      else
+!$acc kernels
         !-- no melting allowed in this run
         melting     (:,:) = 0.
+!$acc end kernels
      endif
    end  subroutine get_melting_profile
 !---meltglac-------------------------------------------------
@@ -5203,12 +5661,15 @@ subroutine get_cloud_top(name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo,heso_cup,z_c
      real(kind=kind_phys), dimension (its:ite),intent (in) :: hkbo
      integer, dimension (its:ite),intent (in) :: kstabi,k22,kbcon,kpbl,klcl
      integer, dimension (its:ite),intent (inout) :: ierr,ktop
+!$acc declare copy(entr_rate_2d,zuo,ierr,ktop) copyin(p_cup, heo,heso_cup,z_cup,hkbo,kstabi,k22,kbcon,kpbl,klcl)
      real(kind=kind_phys), dimension (its:ite,kts:kte) :: hcot
+!$acc declare create(hcot)
      character *(*), intent (in) ::    name
      real(kind=kind_phys) :: dz,dh, dbythresh
      real(kind=kind_phys) :: dby(kts:kte)
      integer :: i,k,ipr,kdefi,kstart,kbegzu,kfinalzu
      integer, dimension (its:ite) :: start_level
+!$acc declare create(start_level)
      integer,parameter :: find_ktop_option = 1 !0=original, 1=new
      
      dbythresh=0.8 !0.95  ! the range of this parameter is 0-1, higher => lower
@@ -5219,6 +5680,7 @@ subroutine get_cloud_top(name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo,heso_cup,z_c
          dbythresh=1.0
      endif
      !         print*,"================================cumulus=",name; call flush(6)
+!$acc parallel loop private(dby,kfinalzu,dz)
      do i=its,itf
        kfinalzu=ktf-2
        ktop(i)=kfinalzu
@@ -5233,7 +5695,7 @@ subroutine get_cloud_top(name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo,heso_cup,z_c
        dby(start_level(i))=(hcot(i,start_level(i))-heso_cup(i,start_level(i)))*dz
        
        !print*,'hco1=',start_level(i),kbcon(i),hcot(i,start_level(i))/heso_cup(i,start_level(i))
-       
+!$acc loop seq
        do k=start_level(i)+1,ktf-2
            dz=z_cup(i,k)-z_cup(i,k-1)
 
@@ -5273,6 +5735,7 @@ subroutine get_cloud_top(name,ktop,ierr,p_cup,entr_rate_2d,hkbo,heo,heso_cup,z_c
       !
       endif
      enddo
+!$acc end parallel
   end subroutine get_cloud_top
 !------------------------------------------------------------------------------------
 
diff --git a/physics/cu_gf_driver.F90 b/physics/cu_gf_driver.F90
index f83f673ba..43e82a745 100644
--- a/physics/cu_gf_driver.F90
+++ b/physics/cu_gf_driver.F90
@@ -7,7 +7,7 @@ module cu_gf_driver
    ! DH* TODO: replace constants with arguments to cu_gf_driver_run
    !use physcons  , g => con_g, cp => con_cp, xlv => con_hvap, r_v => con_rv
    use machine   , only: kind_phys
-   use cu_gf_deep, only: cu_gf_deep_run,neg_check,fct1d3
+   use cu_gf_deep, only: cu_gf_deep_run,neg_check,autoconv,aeroevap,fct1d3
    use cu_gf_sh  , only: cu_gf_sh_run
 
    implicit none
@@ -40,11 +40,14 @@ subroutine cu_gf_driver_init(imfshalcnv, imfshalcnv_gf, imfdeepcnv, &
          errflg = 0
 
          ! DH* temporary
-         if (mpirank==mpiroot) then
-            write(0,*) ' -----------------------------------------------------------------------------------------------------------------------------'
-            write(0,*) ' --- WARNING --- the CCPP Grell Freitas convection scheme is currently under development, use at your own risk --- WARNING ---'
-            write(0,*) ' -----------------------------------------------------------------------------------------------------------------------------'
-         end if
+         ! if (mpirank==mpiroot) then
+         !    write(0,*) ' ----------------------------------------------------------'//&
+         !                '-------------------------------------------------------------------'
+         !    write(0,*) ' --- WARNING --- the CCPP Grell Freitas convection scheme is'//&
+         !                ' currently under development, use at your own risk --- WARNING ---'
+         !    write(0,*) ' --------------------------------------------------------------------'//&
+         !                '---------------------------------------------------------'
+         ! end if
          ! *DH temporary
 
          ! Consistency checks
@@ -116,15 +119,19 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
    logical, intent(in   ) :: ldiag3d
 
    real(kind=kind_phys), intent(inout)                      :: dtend(:,:,:)
+!$acc declare copy(dtend)
    integer, intent(in)                                      :: dtidx(:,:), &
         index_of_x_wind, index_of_y_wind, index_of_temperature,            &
         index_of_process_scnv, index_of_process_dcnv, ntqv, ntcw, ntiw
-
+!$acc declare copyin(dtidx)
    real(kind=kind_phys),  dimension( : , : ), intent(in    ) :: forcet,forceqv_spechum,w,phil
    real(kind=kind_phys),  dimension( : , : ), intent(inout ) :: t,us,vs
    real(kind=kind_phys),  dimension( : , : ), intent(inout ) :: qci_conv
    real(kind=kind_phys),  dimension( : , : ), intent(out   ) :: cnvw_moist,cnvc
    real(kind=kind_phys),  dimension( : , : ), intent(inout ) :: cliw, clcw
+!$acc declare copyin(forcet,forceqv_spechum,w,phil)
+!$acc declare copy(t,us,vs,qci_conv,cliw, clcw)
+!$acc declare copyout(cnvw_moist,cnvc)
 
    real(kind=kind_phys), allocatable :: clcw_save(:,:), cliw_save(:,:)
 
@@ -132,28 +139,38 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
    real(kind=kind_phys), intent(in) :: fhour, fh_dfi_radar(:)
    integer, intent(in) :: num_dfi_radar, ix_dfi_radar(:)
    real(kind=kind_phys), intent(in) :: cap_suppress(:,:)
+!$acc declare copyin(fh_dfi_radar,ix_dfi_radar,cap_suppress)
 
    integer, dimension (:), intent(out) :: hbot,htop,kcnv
    integer, dimension (:), intent(in)  :: xland
    real(kind=kind_phys),    dimension (:), intent(in) :: pbl
+!$acc declare copyout(hbot,htop,kcnv)
+!$acc declare copyin(xland,pbl)
    integer, dimension (im) :: tropics
+!$acc declare create(tropics)
 !  ruc variable
    real(kind=kind_phys), dimension (:),   intent(in)  :: hfx2,qfx2,psuri
    real(kind=kind_phys), dimension (:,:), intent(out) :: ud_mf,dd_mf,dt_mf
    real(kind=kind_phys), dimension (:),   intent(out) :: raincv,cld1d
    real(kind=kind_phys), dimension (:,:), intent(in)  :: t2di,p2di
+!$acc declare copyin(hfx2,qfx2,psuri,t2di,p2di)
+!$acc declare copyout(ud_mf,dd_mf,dt_mf,raincv,cld1d)
    ! Specific humidity from FV3
    real(kind=kind_phys), dimension (:,:), intent(in) :: qv2di_spechum
    real(kind=kind_phys), dimension (:,:), intent(inout) :: qv_spechum
    real(kind=kind_phys), dimension (:), intent(inout) :: aod_gf
+!$acc declare copyin(qv2di_spechum) copy(qv_spechum,aod_gf)
    ! Local water vapor mixing ratios and cloud water mixing ratios
    real(kind=kind_phys), dimension (im,km) :: qv2di, qv, forceqv, cnvw
+!$acc declare create(qv2di, qv, forceqv, cnvw)
    !
    real(kind=kind_phys), dimension(:),intent(in) :: garea
+!$acc declare copyin(garea)
    real(kind=kind_phys), intent(in   ) :: dt
 
    integer, intent(in   ) :: imfshalcnv
    integer, dimension(:), intent(inout) :: cactiv,cactiv_m
+!$acc declare copy(cactiv,cactiv_m)
 
    character(len=*), intent(out) :: errmsg
    integer,          intent(out) :: errflg
@@ -182,11 +199,23 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
    integer, dimension (im) :: kbcon, ktop,ierr,ierrs,ierrm,kpbli
    integer, dimension (im) :: k22s,kbcons,ktops,k22,jmin,jminm
    integer, dimension (im) :: kbconm,ktopm,k22m
+!$acc declare create(k22_shallow,kbcon_shallow,ktop_shallow,rand_mom,rand_vmas,        &
+!$acc                rand_clos,gdc,gdc2,ht,ccn_gf,ccn_m,dx,frhm,frhd, &
+!$acc                outt,outq,outqc,phh,subm,cupclw,cupclws, &
+!$acc                dhdt,zu,zus,zd,phf,zum,zdm,outum,outvm,   &
+!$acc                outts,outqs,outqcs,outu,outv,outus,outvs, &
+!$acc                outtm,outqm,outqcm,submm,cupclwm,         &
+!$acc                cnvwt,cnvwts,cnvwtm,hco,hcdo,zdo,zdd,hcom,hcdom,zdom, &
+!$acc                tau_ecmwf,edt,edtm,edtd,ter11,aa0,xlandi, &
+!$acc                pret,prets,pretm,hexec,forcing,forcing2,  &
+!$acc                kbcon, ktop,ierr,ierrs,ierrm,kpbli, &
+!$acc                k22s,kbcons,ktops,k22,jmin,jminm,kbconm,ktopm,k22m)
 
    integer :: iens,ibeg,iend,jbeg,jend,n
    integer :: ibegh,iendh,jbegh,jendh
    integer :: ibegc,iendc,jbegc,jendc,kstop
    real(kind=kind_phys), dimension(im,km) :: rho_dryar
+!$acc declare create(rho_dryar)
    real(kind=kind_phys) :: pten,pqen,paph,zrho,pahfs,pqhfl,zkhvfl,pgeoh
    integer, parameter :: ipn = 0
 
@@ -200,6 +229,9 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
    real(kind=kind_phys), dimension (im)    :: z1,psur,cuten,cutens,cutenm
    real(kind=kind_phys), dimension (im)    :: umean,vmean,pmean
    real(kind=kind_phys), dimension (im)    :: xmbs,xmbs2,xmb,xmbm,xmb_dumm,mconv
+!$acc declare create(qcheck,zo,t2d,q2d,po,p2d,rhoi,clw_ten,tn,qo,tshall,qshall,dz8w,omeg, &
+!$acc                z1,psur,cuten,cutens,cutenm,umean,vmean,pmean,           &
+!$acc                xmbs,xmbs2,xmb,xmbm,xmb_dumm,mconv)
 
    integer :: i,j,k,icldck,ipr,jpr,jpr_deep,ipr_deep,uidx,vidx,tidx,qidx
    integer :: itf,jtf,ktf,iss,jss,nbegin,nend,cliw_idx,clcw_idx
@@ -209,6 +241,7 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
    real(kind=kind_phys), dimension(km)   :: massflx,trcflx_in1,clw_in1,po_cup
 !  real(kind=kind_phys), dimension(km)   :: trcflx_in2,clw_in2,clw_ten2
    real(kind=kind_phys), dimension (im)  :: flux_tun,tun_rad_mid,tun_rad_shall,tun_rad_deep
+!$acc declare create(flux_tun,tun_rad_mid,tun_rad_shall,tun_rad_deep)
    character*50 :: ierrc(im),ierrcm(im)
    character*50 :: ierrcs(im)
 !  ruc variable
@@ -216,13 +249,16 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
 !  qfx2 -- latent heat flux (kg/kg m/s), positive upward from sfc
 !  gf needs them in w/m2. define hfx and qfx after simple unit conversion
    real(kind=kind_phys), dimension (im)  :: hfx,qfx
+!$acc declare create(hfx,qfx)
    real(kind=kind_phys) tem,tem1,tf,tcr,tcrf
    real(kind=kind_phys) :: cliw_shal,clcw_shal,tem_shal, cliw_both, weight_sum
    real(kind=kind_phys) :: cliw_deep,clcw_deep,tem_deep, clcw_both
    integer :: cliw_deep_idx, clcw_deep_idx, cliw_shal_idx, clcw_shal_idx
 
    real(kind=kind_phys) :: cap_suppress_j(im)
+!$acc declare create(cap_suppress_j)
    integer :: itime, do_cap_suppress_here
+   logical :: exit_func
 
   !parameter (tf=243.16, tcr=270.16, tcrf=1.0/(tcr-tf)) ! FV3 original
   !parameter (tf=263.16, tcr=273.16, tcrf=1.0/(tcr-tf))
@@ -233,19 +269,25 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
      errflg = 0
 
      if(do_cap_suppress) then
+!$acc serial
        do itime=1,num_dfi_radar
          if(ix_dfi_radar(itime)<1) cycle
          if(fhour<fh_dfi_radar(itime)) cycle
          if(fhour>=fh_dfi_radar(itime+1)) cycle
          exit
        enddo
+!$acc end serial
      endif
      if(do_cap_suppress .and. itime<=num_dfi_radar) then
         do_cap_suppress_here = 1
-        cap_suppress_j = cap_suppress(:,itime)
+!$acc kernels
+        cap_suppress_j(:) = cap_suppress(:,itime)
+!$acc end kernels
      else
         do_cap_suppress_here = 0
-        cap_suppress_j = 0
+!$acc kernels
+        cap_suppress_j(:) = 0
+!$acc end kernels
      endif
 
      if(ldiag3d) then
@@ -266,14 +308,18 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
        if(cliw_deep_idx>=1 .or. clcw_deep_idx>=1 .or. &
             cliw_shal_idx>=1 .or.  clcw_shal_idx>=1) then
          allocate(clcw_save(im,km), cliw_save(im,km))
-         clcw_save=clcw
-         cliw_save=cliw
+!$acc enter data create(clcw_save,cliw_save)
+!$acc kernels
+         clcw_save(:,:)=clcw(:,:)
+         cliw_save(:,:)=cliw(:,:)
+!$acc end kernels
        endif
      endif
 
 !
 ! Scale specific humidity to dry mixing ratio
 !
+!$acc kernels
      ! state in before physics
      qv2di = qv2di_spechum/(1.0_kind_phys-qv2di_spechum)
      ! forcing by dynamics, based on state in
@@ -285,10 +331,10 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
 ! these should be coming in from outside
 !
 !    cactiv(:)      = 0
-!    cactiv_m(:)    = 0
      rand_mom(:)    = 0.
      rand_vmas(:)   = 0.
      rand_clos(:,:) = 0.
+!$acc end kernels
 !
      its=1
      ite=im
@@ -299,7 +345,8 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
      kts=1
      kte=km
      ktf=kte-1
-!
+!$acc kernels
+! 
      tropics(:)=0
 !
 !> - Set tuning constants for radiation coupling
@@ -316,6 +363,7 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
 ! dx for scale awareness
 !    dx=40075000./float(lonf)
 !    tscl_kf=dx/25000.
+!$acc end kernels
 
      if (imfshalcnv == 3) then
       ishallow_g3 = 1
@@ -342,13 +390,17 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
      ztq=0.
      hfm=0.
      qfm=0.
-     ud_mf =0.
-     dd_mf =0.
-     dt_mf =0.
+!$acc kernels
+     ud_mf(:,:) =0.
+     dd_mf(:,:) =0.
+     dt_mf(:,:) =0.
      tau_ecmwf(:)=0.
+!$acc end kernels
 !
      j=1
+!$acc kernels
      ht(:)=phil(:,1)/g
+!$acc loop private(zh)
      do i=its,ite
       cld1d(i)=0.
       zo(i,:)=phil(i,:)/g
@@ -358,6 +410,7 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
       do k=kts+1,ktf
        dz8w(i,k)=zo(i,k+1)-zo(i,k)
       enddo
+!$acc loop seq
       do k=kts+1,ktf
        zh(k)=zh(k-1)+dz8w(i,k-1)
        if(zh(k).gt.pbl(i))then
@@ -366,7 +419,9 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
        endif
       enddo
      enddo
+!$acc end kernels
 
+!$acc kernels
      do i= its,itf
       forcing(i,:)=0.
       forcing2(i,:)=0.
@@ -434,7 +489,10 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
      cuten(:)=0.
      cutenm(:)=0.
      cutens(:)=0.
+!$acc end kernels
      ierrc(:)=" "
+!$acc kernels
+     
 
      kbcon(:)=0
      kbcons(:)=0
@@ -516,7 +574,9 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
         qshall(i,k)=q2d(i,k)
       enddo
      enddo
+!$acc end kernels
 123  format(1x,i2,1x,2(1x,f8.0),1x,2(1x,f8.3),3(1x,e13.5))
+!$acc kernels
      do i=its,itf
       do k=kts,kpbli(i)
          tshall(i,k)=t(i,k)
@@ -549,12 +609,16 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
 !      qshall(i,k)=qv(i,k)
       enddo
      enddo
+!$acc loop collapse(2) independent private(dp)
      do k=  kts+1,ktf-1
       do i = its,itf
        if((p2d(i,1)-p2d(i,k)).gt.150.and.p2d(i,k).gt.300)then
          dp=-.5*(p2d(i,k+1)-p2d(i,k-1))
+!$acc atomic
          umean(i)=umean(i)+us(i,k)*dp
+!$acc atomic
          vmean(i)=vmean(i)+vs(i,k)*dp
+!$acc atomic
          pmean(i)=pmean(i)+dp
        endif
       enddo
@@ -569,15 +633,18 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
      do i = its,itf
       if(mconv(i).lt.0.)mconv(i)=0.
      enddo
+!$acc end kernels
 !
 !---- call cumulus parameterization
 !
        if(ishallow_g3.eq.1)then
 
+!$acc kernels
           do i=its,ite
            ierrs(i)=0
            ierrm(i)=0
           enddo
+!$acc end kernels
 !
 !> - Call shallow: cu_gf_sh_run()
 !
@@ -593,10 +660,11 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
 ! dimesnional variables
                          itf,ktf,its,ite, kts,kte,ipr,tropics)
 
-
+!$acc kernels
           do i=its,itf
            if(xmbs(i).gt.0.)cutens(i)=1.
           enddo
+!$acc end kernels
 !> - Call neg_check() for GF shallow convection
           call neg_check('shallow',ipn,dt,qcheck,outqs,outts,outus,outvs,   &
                                  outqcs,prets,its,ite,kts,kte,itf,ktf,ktops)
@@ -673,12 +741,13 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
               ,do_cap_suppress_here,cap_suppress_j &
               ,k22m          &
               ,jminm,tropics)
-
+!$acc kernels
             do i=its,itf
              do k=kts,ktf
               qcheck(i,k)=qv(i,k) +outqs(i,k)*dt
              enddo
             enddo
+!$acc end kernels
 !> - Call neg_check() for middle GF convection
       call neg_check('mid',ipn,dt,qcheck,outqm,outtm,outum,outvm,   &
                      outqcm,pretm,its,ite,kts,kte,itf,ktf,ktopm)
@@ -756,11 +825,13 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
               ,jmin,tropics)
           jpr=0
           ipr=0
+!$acc kernels
           do i=its,itf
            do k=kts,ktf
             qcheck(i,k)=qv(i,k) +(outqs(i,k)+outqm(i,k))*dt
            enddo
           enddo
+!$acc end kernels
 !> - Call neg_check() for deep GF convection
        call neg_check('deep',ipn,dt,qcheck,outq,outt,outu,outv,   &
                       outqc,pret,its,ite,kts,kte,itf,ktf,ktop)
@@ -785,6 +856,7 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
 !                 cutenm(i)=0.
 !              endif   ! pret > 0
 !            enddo
+!$acc kernels
             do i=its,itf
               kcnv(i)=0
               if(pretm(i).gt.0.)then
@@ -809,7 +881,9 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
                  cuten(i)=0.
               endif   ! pret > 0
             enddo
+!$acc end kernels
 !
+!$acc parallel loop private(kstop,dtime_max,massflx,trcflx_in1,clw_in1,po_cup)
             do i=its,itf
             massflx(:)=0.
             trcflx_in1(:)=0.
@@ -942,6 +1016,8 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
             if(ktop(i).gt.2 .and.pret(i).gt.0.)dt_mf(i,ktop(i)-1)=ud_mf(i,ktop(i))
             endif
             enddo
+!$acc end parallel
+!$acc kernels
             do i=its,itf
               if(pret(i).gt.0.)then
                  cactiv(i)=1
@@ -974,12 +1050,15 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
                 ccn_gf(i)=(aod_gf(i)/0.0027)**(1/0.640)
               endif
             enddo
+!$acc end kernels
  100    continue
 !
 ! Scale dry mixing ratios for water wapor and cloud water to specific humidy / moist mixing ratios
 !
+!$acc kernels
         qv_spechum = qv/(1.0_kind_phys+qv)
         cnvw_moist = cnvw/(1.0_kind_phys+qv)
+!$acc end kernels
 !
 ! Diagnostic tendency updates
 !
@@ -990,21 +1069,28 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
             tidx=dtidx(index_of_temperature,index_of_process_scnv)
             qidx=dtidx(100+ntqv,index_of_process_scnv)
             if(uidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 dtend(:,k,uidx) = dtend(:,k,uidx) + cutens(:)*outus(:,k) * dt
               enddo
+!$acc end kernels
             endif
             if(vidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 dtend(:,k,vidx) = dtend(:,k,vidx) + cutens(:)*outvs(:,k) * dt
               enddo
+!$acc end kernels
             endif
             if(tidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 dtend(:,k,tidx) = dtend(:,k,tidx) + cutens(:)*outts(:,k) * dt
               enddo
+!$acc end kernels
             endif
             if(qidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 do i=its,itf
                   tem = cutens(i)*outqs(i,k)* dt
@@ -1012,6 +1098,7 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
                   dtend(i,k,qidx) = dtend(i,k,qidx) + tem
                 enddo
               enddo
+!$acc end kernels
             endif
           endif
           if((ideep.eq.1. .or. imid_gf.eq.1) .and. .not.flag_for_dcnv_generic_tend) then
@@ -1019,23 +1106,30 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
             vidx=dtidx(index_of_y_wind,index_of_process_dcnv)
             tidx=dtidx(index_of_temperature,index_of_process_dcnv)
             if(uidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 dtend(:,k,uidx) = dtend(:,k,uidx) + (cuten*outu(:,k)+cutenm*outum(:,k)) * dt
               enddo
+!$acc end kernels
             endif
             if(vidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 dtend(:,k,vidx) = dtend(:,k,vidx) + (cuten*outv(:,k)+cutenm*outvm(:,k)) * dt
               enddo
+!$acc end kernels
             endif
             if(tidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 dtend(:,k,tidx) = dtend(:,k,tidx) + (cuten*outt(:,k)+cutenm*outtm(:,k)) * dt
               enddo
+!$acc end kernels
             endif
 
             qidx=dtidx(100+ntqv,index_of_process_dcnv)
             if(qidx>=1) then
+!$acc kernels
               do k=kts,ktf
                 do i=its,itf
                   tem = (cuten(i)*outq(i,k) + cutenm(i)*outqm(i,k))* dt
@@ -1043,9 +1137,11 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
                   dtend(i,k,qidx) = dtend(i,k,qidx) + tem
                 enddo
               enddo
+!$acc end kernels
             endif
           endif
           if(allocated(clcw_save)) then
+!$acc parallel loop collapse(2) private(tem_shal,tem_deep,tem,tem1,weight_sum,cliw_both,clcw_both)
             do k=kts,ktf
               do i=its,itf
                 tem_shal = dt*(outqcs(i,k)*cutens(i)+outqcm(i,k)*cutenm(i))
@@ -1078,6 +1174,7 @@ subroutine cu_gf_driver_run(ntracer,garea,im,km,dt,flag_init,flag_restart,&
                 endif
               enddo
             enddo
+!$acc end parallel
           endif
         endif
    end subroutine cu_gf_driver_run
diff --git a/physics/cu_gf_driver_post.F90 b/physics/cu_gf_driver_post.F90
index eab5eefd6..b9fafc4df 100644
--- a/physics/cu_gf_driver_post.F90
+++ b/physics/cu_gf_driver_post.F90
@@ -37,6 +37,7 @@ subroutine cu_gf_driver_post_run (im, t, q, prevst, prevsq, cactiv, cactiv_m, co
       real(kind_phys),  intent(out) :: conv_act(:)
       real(kind_phys),  intent(out) :: conv_act_m(:)
       character(len=*), intent(out) :: errmsg
+!$acc declare copyin(t,q,cactiv,cactiv_m) copyout(prevst,prevsq,conv_act,conv_act_m)
       integer, intent(out)          :: errflg
 
       ! Local variables
@@ -46,6 +47,7 @@ subroutine cu_gf_driver_post_run (im, t, q, prevst, prevsq, cactiv, cactiv_m, co
       errmsg = ''
       errflg = 0
 
+!$acc kernels
       prevst(:,:) = t(:,:)
       prevsq(:,:) = q(:,:)
 
@@ -61,6 +63,7 @@ subroutine cu_gf_driver_post_run (im, t, q, prevst, prevsq, cactiv, cactiv_m, co
           conv_act_m(i)=0.0
         endif
       enddo
+!$acc end kernels
 
    end subroutine cu_gf_driver_post_run
 
diff --git a/physics/cu_gf_driver_pre.F90 b/physics/cu_gf_driver_pre.F90
index 4d4ae9162..58dc0414a 100644
--- a/physics/cu_gf_driver_pre.F90
+++ b/physics/cu_gf_driver_pre.F90
@@ -37,12 +37,15 @@ subroutine cu_gf_driver_pre_run (flag_init, flag_restart, kdt, fhour, dtp, t, q,
       real(kind_phys),  intent(in)  :: q(:,:)
       real(kind_phys),  intent(in)  :: prevst(:,:)
       real(kind_phys),  intent(in)  :: prevsq(:,:)
+!$acc declare copyin(t,q,prevst,prevsq)
       real(kind_phys),  intent(out) :: forcet(:,:)
       real(kind_phys),  intent(out) :: forceq(:,:)
       integer,          intent(out) :: cactiv(:)
       integer,          intent(out) :: cactiv_m(:)
+!$acc declare copyout(forcet,forceq,cactiv,cactiv_m)
       real(kind_phys),  intent(in)  :: conv_act(:)
       real(kind_phys),  intent(in)  :: conv_act_m(:)
+!$acc declare copyin(conv_act,conv_act_m)
       character(len=*), intent(out) :: errmsg
       integer,          intent(out) :: errflg
 
@@ -57,21 +60,29 @@ subroutine cu_gf_driver_pre_run (flag_init, flag_restart, kdt, fhour, dtp, t, q,
       ! are read from the restart files beforehand, same
       ! for conv_act.
       if(flag_init .and. .not.flag_restart) then
+!$acc kernels
         forcet(:,:)=0.0
         forceq(:,:)=0.0
+!$acc end kernels
       else
         dtdyn=3600.0*(fhour)/kdt
         if(dtp > dtdyn) then
+!$acc kernels
           forcet(:,:)=(t(:,:) - prevst(:,:))/dtp
           forceq(:,:)=(q(:,:) - prevsq(:,:))/dtp
+!$acc end kernels
         else
+!$acc kernels
           forcet(:,:)=(t(:,:) - prevst(:,:))/dtdyn
           forceq(:,:)=(q(:,:) - prevsq(:,:))/dtdyn
+!$acc end kernels
         endif
       endif
 
+!$acc kernels
       cactiv(:)=nint(conv_act(:))
       cactiv_m(:)=nint(conv_act_m(:))
+!$acc end kernels
 
    end subroutine cu_gf_driver_pre_run
 
diff --git a/physics/cu_gf_sh.F90 b/physics/cu_gf_sh.F90
index e30ca95bc..b9a723856 100644
--- a/physics/cu_gf_sh.F90
+++ b/physics/cu_gf_sh.F90
@@ -91,6 +91,7 @@ subroutine cu_gf_sh_run (                                            &
      real(kind=kind_phys),    dimension (its:ite,kts:kte)                              &
         ,intent (inout  )                 ::                           &
         cnvwt,outt,outq,outqc,cupclw,zuo,outu,outv
+!$acc declare copy(cnvwt,outt,outq,outqc,cupclw,zuo,outu,outv)
      real(kind=kind_phys),    dimension (its:ite)                                      &
         ,intent (out  )                   ::                           &
         xmb_out
@@ -103,6 +104,7 @@ subroutine cu_gf_sh_run (                                            &
      integer,    dimension (its:ite)                                   &
         ,intent (in  )                    ::                           &
         kpbl,tropics
+!$acc declare copyout(xmb_out,kbcon,ktop,k22) copyin(kpbl,tropics) copy(ierr)
   !
   ! basic environmental input includes a flag (ierr) to turn off
   ! convection for this call only and at that particular gridpoint
@@ -120,6 +122,7 @@ subroutine cu_gf_sh_run (                                            &
      real(kind=kind_phys)                                                              &
         ,intent (in   )                   ::                           &
         dtime,tcrit
+!$acc declare copyin(t,po,tn,dhdt,rho,us,vs) copy(q,qo) copyin(xland,z1,psur,hfx,qfx) copyin(dtime,tcrit)
   !
   !***************** the following are your basic environmental
   !                  variables. they carry a "_cup" if they are
@@ -179,6 +182,19 @@ subroutine cu_gf_sh_run (                                            &
 
         cd,dellah,dellaq,dellat,dellaqc,uc,vc,dellu,dellv,u_cup,v_cup
 
+!$acc declare create( &
+!$acc        entr_rate_2d,he,hes,qes,z,                                     &
+!$acc        heo,heso,qeso,zo,                                              &
+!$acc        xhe,xhes,xqes,xz,xt,xq,                                        &
+!$acc        qes_cup,q_cup,he_cup,hes_cup,z_cup,p_cup,gamma_cup,t_cup,      &
+!$acc        qeso_cup,qo_cup,heo_cup,heso_cup,zo_cup,po_cup,gammao_cup,     &
+!$acc        tn_cup,                                                        &
+!$acc        xqes_cup,xq_cup,xhe_cup,xhes_cup,xz_cup,                       &
+!$acc        xt_cup,dby,hc,zu,                                              &
+!$acc        dbyo,qco,pwo,hco,qrco,                                         &
+!$acc        dbyt,xdby,xhc,xzu,            &
+!$acc        cd,dellah,dellaq,dellat,dellaqc,uc,vc,dellu,dellv,u_cup,v_cup)
+
   ! aa0 cloud work function for downdraft
   ! aa0     = cloud work function without forcing effects
   ! aa1     = cloud work function with forcing effects
@@ -192,6 +208,13 @@ subroutine cu_gf_sh_run (                                            &
        cap_max_increment,lambau
      integer,    dimension (its:ite)      ::                           &
        kstabi,xland1,kbmax,ktopx
+!$acc declare create( &
+!$acc       zws,ztexec,zqexec,pre,aa1,aa0,xaa0,hkb,                         &
+!$acc       flux_tun,hkbo,xhkb,                                             &
+!$acc       rand_vmas,xmbmax,xmb,                                           &
+!$acc       cap_max,entr_rate,                                              &
+!$acc       cap_max_increment,lambau,                                       &
+!$acc       kstabi,xland1,kbmax,ktopx)
 
      integer                              ::                           &
        kstart,i,k,ki
@@ -205,15 +228,24 @@ subroutine cu_gf_sh_run (                                            &
      character*50 :: ierrc(its:ite)
      real(kind=kind_phys),    dimension (its:ite,kts:kte) ::                           &
        up_massentr,up_massdetr,up_massentro,up_massdetro,up_massentru,up_massdetru
+!$acc declare create(up_massentr,up_massdetr,up_massentro,up_massdetro,up_massentru,up_massdetru)
      real(kind=kind_phys) :: c_up,x_add,qaver,dts,fp,fpi
      real(kind=kind_phys),    dimension (its:ite,kts:kte) :: c1d,dtempdz
      integer, dimension (its:ite,kts:kte) ::  k_inv_layers 
      integer, dimension (its:ite) ::  start_level, pmin_lev
+!$acc declare create(c1d,dtempdz,k_inv_layers,start_level, pmin_lev)
+
+     real(kind=kind_phys), parameter :: zero = 0
+
+!$acc kernels
      start_level(:)=0
      rand_vmas(:)=0.
-     flux_tun=fluxtune
+     flux_tun(:)=fluxtune
      lambau(:)=2.
      c1d(:,:)=0.
+!$acc end kernels
+
+!$acc kernels
       do i=its,itf
         xland1(i)=int(xland(i)+.001) ! 1.
         ktopx(i)=0
@@ -224,9 +256,13 @@ subroutine cu_gf_sh_run (                                            &
         pre(i)=0.
         xmb_out(i)=0.
         cap_max_increment(i)=25.
-        ierrc(i)=" "
         entr_rate(i) = 1.e-3 !9.e-5 ! 1.75e-3 ! 1.2e-3 ! .2/50.
       enddo
+!$acc end kernels
+
+      do i=its,itf
+        ierrc(i)=" "
+      enddo
 !
 !--- initial entrainment rate (these may be changed later on in the
 !--- program
@@ -235,6 +271,7 @@ subroutine cu_gf_sh_run (                                            &
 !
 !--- initial detrainmentrates
 !
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
         up_massentro(i,k)=0.
@@ -250,6 +287,7 @@ subroutine cu_gf_sh_run (                                            &
         cupclw(i,k)=0.
       enddo
       enddo
+!$acc end kernels
 !
 !--- max/min allowed value for epsilon (ratio downdraft base mass flux/updraft
 !
@@ -259,6 +297,7 @@ subroutine cu_gf_sh_run (                                            &
 !--- maximum depth (mb) of capping 
 !--- inversion (larger cap = no convection)
 !
+!$acc kernels
       cap_maxs=175.
       do i=its,itf
         kbmax(i)=1
@@ -292,7 +331,7 @@ subroutine cu_gf_sh_run (                                            &
        zws(i) = zws(i)*rho(i,kpbl(i)) !check if zrho is correct
 
       enddo
-
+!$acc end kernels
 !
 !> - Determin max height(m) above ground where updraft air can originate
 !
@@ -322,6 +361,8 @@ subroutine cu_gf_sh_run (                                            &
            ierr,z1,                                                &
            itf,ktf,                                                &
            its,ite, kts,kte)
+
+!$acc kernels
       do i=its,itf
         if(ierr(i).eq.0)then
           u_cup(i,kts)=us(i,kts)
@@ -336,6 +377,7 @@ subroutine cu_gf_sh_run (                                            &
       do i=its,itf
         if(ierr(i).eq.0)then
 !
+!$acc loop seq
       do k=kts,ktf
         if(zo_cup(i,k).gt.zkbmax+z1(i))then
           kbmax(i)=k
@@ -347,12 +389,14 @@ subroutine cu_gf_sh_run (                                            &
       kbmax(i)=min(kbmax(i),ktf/2)
       endif
       enddo
+!$acc end kernels
 
 !
 !
 !
 !> - Determine level with highest moist static energy content (\p k22)
 !
+!$acc parallel loop
        do 36 i=its,itf
          if(kpbl(i).gt.3)cap_max(i)=po_cup(i,kpbl(i))
          if(ierr(i) == 0)then
@@ -360,17 +404,21 @@ subroutine cu_gf_sh_run (                                            &
           k22(i)=max(2,k22(i))
           if(k22(i).gt.kbmax(i))then
            ierr(i)=2
+#ifndef _OPENACC
            ierrc(i)="could not find k22"
+#endif
            ktop(i)=0
            k22(i)=0
            kbcon(i)=0
          endif
          endif
  36   continue
+!$acc end parallel
 !
 !> - Call get_cloud_bc() and cup_kbcon() to determine the level of 
 !! convective cloud base (\p kbcon)
 !
+!$acc parallel loop private(x_add)
       do i=its,itf
        if(ierr(i).eq.0)then
              x_add = xlv*zqexec(i)+cp*ztexec(i)
@@ -378,13 +426,17 @@ subroutine cu_gf_sh_run (                                            &
              call get_cloud_bc(kte,heo_cup(i,1:kte),hkbo(i),k22(i),x_add)
        endif ! ierr
       enddo
+!$acc end parallel
 
 !joe-georg and saulo's new idea:
+
+!$acc kernels
       do i=its,itf
       do k=kts,ktf
           dbyo(i,k)= 0. !hkbo(i)-heso_cup(i,k)
       enddo
       enddo
+!$acc end kernels
 
 
       call cup_kbcon(ierrc,cap_max_increment,5,k22,kbcon,heo_cup,heso_cup, &
@@ -403,6 +455,7 @@ subroutine cu_gf_sh_run (                                            &
                            kbcon,kstabi,dtempdz,itf,ktf,its,ite, kts,kte)
 !
 !
+!$acc parallel loop private(frh,kstart,x_add)
       do i=its,itf
          entr_rate_2d(i,:)=entr_rate(i)
          if(ierr(i) == 0)then
@@ -438,9 +491,11 @@ subroutine cu_gf_sh_run (                                            &
             endif
          endif
       enddo
+!$acc end parallel
 !> - Call rates_up_pdf() to get normalized mass flux profile
       call rates_up_pdf(rand_vmas,ipr,'shallow',ktop,ierr,po_cup,entr_rate_2d,hkbo,heo,heso_cup,zo_cup, &
            xland1,kstabi,k22,kbcon,its,ite,itf,kts,kte,ktf,zuo,kpbl,ktopx,kbcon,pmin_lev)
+!$acc kernels
       do i=its,itf
         if(ierr(i).eq.0)then
 !           do k=maxloc(zuo(i,:),1),1,-1 ! ktop(i)-1,1,-1
@@ -451,22 +506,26 @@ subroutine cu_gf_sh_run (                                            &
 !             endif
 !           enddo
            if(k22(i).gt.1)then
+!$acc loop independent
              do k=1,k22(i)-1
               zuo(i,k)=0.
               zu (i,k)=0.
               xzu(i,k)=0.
              enddo
            endif
+!$acc loop seq
            do k=maxloc(zuo(i,:),1),ktop(i)
              if(zuo(i,k).lt.1.e-6)then
                ktop(i)=k-1
                exit
              endif
            enddo
+!$acc loop independent
            do k=k22(i),ktop(i)
              xzu(i,k)= zuo(i,k)
               zu(i,k)= zuo(i,k)
            enddo
+!$acc loop independent
            do k=ktop(i)+1,ktf
              zuo(i,k)=0.
              zu (i,k)=0.
@@ -475,14 +534,15 @@ subroutine cu_gf_sh_run (                                            &
            k22(i)=max(2,k22(i))
         endif
       enddo
+!$acc end kernels
 !
 !> - Call get_lateral_massflux() to calculate mass entrainment and detrainment
 !
       call get_lateral_massflux(itf,ktf, its,ite, kts,kte                             &
                                 ,ierr,ktop,zo_cup,zuo,cd,entr_rate_2d                 &
                                 ,up_massentro, up_massdetro ,up_massentr, up_massdetr &
-                                ,'shallow',kbcon,k22,up_massentru,up_massdetru,lambau)
-
+                                ,2,kbcon,k22,up_massentru,up_massdetru,lambau)
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
          hc(i,k)=0.
@@ -507,11 +567,15 @@ subroutine cu_gf_sh_run (                                            &
          hc(i,k)=hkb(i)
          hco(i,k)=hkbo(i)
       enddo
+!$acc end kernels
 !
 !
+
+!$acc parallel loop private(ki,qaver,k,trash,trash2,dz,dp)
       do 42 i=its,itf
         dbyt(i,:)=0.
         if(ierr(i) /= 0) cycle
+!$acc loop seq
          do k=start_level(i)+1,ktop(i)
           hc(i,k)=(hc(i,k-1)*zu(i,k-1)-.5*up_massdetr(i,k-1)*hc(i,k-1)+      &
                          up_massentr(i,k-1)*he(i,k-1))   /                   &
@@ -547,16 +611,20 @@ subroutine cu_gf_sh_run (                                            &
 
          if(ktop(i).lt.kbcon(i)+1)then
             ierr(i)=5
+#ifndef _OPENACC
             ierrc(i)='ktop is less than kbcon+1'
+#endif
              go to 42
          endif
          if(ktop(i).gt.ktf-2)then
              ierr(i)=5
+#ifndef _OPENACC
              ierrc(i)="ktop is larger than ktf-2"
+#endif
              go to 42
          endif
 !
-         call get_cloud_bc(kte,qo_cup (i,1:kte),qaver,k22(i))
+         call get_cloud_bc(kte,qo_cup (i,1:kte),qaver,k22(i),zero)
          qaver = qaver + zqexec(i)
          do k=1,start_level(i)-1
            qco (i,k)= qo_cup(i,k)
@@ -564,6 +632,7 @@ subroutine cu_gf_sh_run (                                            &
          k=start_level(i)
          qco (i,k)= qaver 
 !
+!$acc loop seq
          do k=start_level(i)+1,ktop(i)
           trash=qeso_cup(i,k)+(1./xlv)*(gammao_cup(i,k)                &
                 /(1.+gammao_cup(i,k)))*dbyo(i,k)
@@ -593,15 +662,21 @@ subroutine cu_gf_sh_run (                                            &
          enddo
          trash=0.
          trash2=0.
+!$acc loop independent
          do k=k22(i)+1,ktop(i)
           dp=100.*(po_cup(i,k)-po_cup(i,k+1))
           cnvwt(i,k)=zuo(i,k)*cupclw(i,k)*g/dp
+!$acc atomic
           trash2=trash2+entr_rate_2d(i,k)
+!$acc atomic
           qco(i,k)=qco(i,k)-qrco(i,k)
          enddo
+!$acc loop independent
          do k=k22(i)+1,max(kbcon(i),k22(i)+1)
+!$acc atomic
           trash=trash+entr_rate_2d(i,k)
          enddo
+!$acc loop independent
          do k=ktop(i)+1,ktf-1
            hc  (i,k)=hes_cup (i,k)
            hco (i,k)=heso_cup(i,k)
@@ -616,6 +691,7 @@ subroutine cu_gf_sh_run (                                            &
            zuo (i,k)=0.
          enddo
  42 continue
+!$acc end parallel
 !
 !--- calculate workfunctions for updrafts
 !
@@ -626,14 +702,18 @@ subroutine cu_gf_sh_run (                                            &
         call cup_up_aa0(aa1,zo,zuo,dbyo,gammao_cup,tn_cup, &
             kbcon,ktop,ierr,                               &
             itf,ktf, its,ite, kts,kte)
+!$acc kernels
         do i=its,itf
           if(ierr(i) == 0)then
            if(aa1(i) <= 0.)then
                ierr(i)=17
+#ifndef _OPENACC
                ierrc(i)="cloud work function zero"
+#endif
            endif
          endif
        enddo
+!$acc end kernels
       endif
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -643,6 +723,7 @@ subroutine cu_gf_sh_run (                                            &
 !
 !--- 1. in bottom layer
 !
+!$acc kernels
       do k=kts,kte
        do i=its,itf
         dellah(i,k)=0.
@@ -652,6 +733,7 @@ subroutine cu_gf_sh_run (                                            &
         dellv  (i,k)=0.
        enddo
       enddo
+!$acc end kernels
 !
 !----------------------------------------------  cloud level ktop
 !
@@ -692,6 +774,8 @@ subroutine cu_gf_sh_run (                                            &
 !
 !- - - - - - - - - - - - - - - - - - - - - - - - model level 1
       trash2=0.
+!$acc kernels
+!$acc loop independent
       do i=its,itf
         if(ierr(i).eq.0)then
          dp=100.*(po_cup(i,1)-po_cup(i,2))
@@ -706,10 +790,12 @@ subroutine cu_gf_sh_run (                                            &
             entup=up_massentro(i,k)
             detup=up_massdetro(i,k)
             totmas=detup-entup+zuo(i,k+1)-zuo(i,k)
+#ifndef _OPENACC
             if(abs(totmas).gt.1.e-6)then
                write(0,*)'*********************',i,k,totmas
                write(0,*)k22(i),kbcon(i),ktop(i)
             endif
+#endif
             dp=100.*(po_cup(i,k)-po_cup(i,k+1))
             dellah(i,k) =-(zuo(i,k+1)*(hco(i,k+1)-heo_cup(i,k+1) )-     &
                            zuo(i,k  )*(hco(i,k  )-heo_cup(i,k  ) ))*g/dp
@@ -741,12 +827,13 @@ subroutine cu_gf_sh_run (                                            &
           enddo
         endif
       enddo
+!$acc end kernels
 
 !
 !--- using dellas, calculate changed environmental profiles
 !
       mbdt=.5 !3.e-4
-
+!$acc kernels
       do k=kts,ktf
        do i=its,itf
          dellat(i,k)=0.
@@ -767,6 +854,7 @@ subroutine cu_gf_sh_run (                                            &
         xt(i,ktf)=tn(i,ktf)
        endif
       enddo
+!$acc end kernels
 !
 !
      if(make_calc_for_xk) then
@@ -788,12 +876,16 @@ subroutine cu_gf_sh_run (                                            &
 !
 !
 !**************************** static control
+!$acc kernels
       do k=kts,ktf
       do i=its,itf
          xhc(i,k)=0.
          xdby(i,k)=0.
       enddo
       enddo
+!$acc end kernels
+
+!$acc parallel loop private(x_add)
       do i=its,itf
         if(ierr(i).eq.0)then
          x_add = xlv*zqexec(i)+cp*ztexec(i)
@@ -805,17 +897,21 @@ subroutine cu_gf_sh_run (                                            &
          xhc(i,k)=xhkb(i)
         endif !ierr
       enddo
+!$acc end parallel
 !
 !
+!$acc kernels
       do i=its,itf
        if(ierr(i).eq.0)then
         xzu(i,1:ktf)=zuo(i,1:ktf)
+!$acc loop seq
         do k=start_level(i)+1,ktop(i)
          xhc(i,k)=(xhc(i,k-1)*xzu(i,k-1)-.5*up_massdetro(i,k-1)*xhc(i,k-1)+ &
                           up_massentro(i,k-1)*xhe(i,k-1))   /               &
                           (xzu(i,k-1)-.5*up_massdetro(i,k-1)+up_massentro(i,k-1))
          xdby(i,k)=xhc(i,k)-xhes_cup(i,k)
         enddo
+!$acc loop independent
         do k=ktop(i)+1,ktf
            xhc (i,k)=xhes_cup(i,k)
            xdby(i,k)=0.
@@ -823,6 +919,7 @@ subroutine cu_gf_sh_run (                                            &
         enddo
        endif
       enddo
+!$acc end kernels
 
 !
 !--- workfunctions for updraft
@@ -837,6 +934,8 @@ subroutine cu_gf_sh_run (                                            &
 !
 ! now for shallow forcing
 !
+!$acc kernels
+!$acc loop private(xff_shal)
        do i=its,itf
         xmb(i)=0.
         xff_shal(1:3)=0.
@@ -870,7 +969,9 @@ subroutine cu_gf_sh_run (                                            &
           if(ichoice > 0)xmb(i)=min(xmbmax(i),xff_shal(ichoice))
           if(xmb(i) <= 0.)then
              ierr(i)=21
+#ifndef _OPENACC
              ierrc(i)="21"
+#endif
           endif
         endif
         if(ierr(i).ne.0)then
@@ -889,10 +990,12 @@ subroutine cu_gf_sh_run (                                            &
 ! final tendencies
 !
           pre(i)=0.
+!$acc loop independent
           do k=2,ktop(i)
            outt (i,k)= dellat (i,k)*xmb(i)
            outq (i,k)= dellaq (i,k)*xmb(i)
            outqc(i,k)= dellaqc(i,k)*xmb(i)
+!$acc atomic
            pre  (i)  = pre(i)+pwo(i,k)*xmb(i)
           enddo
           outt (i,1)= dellat (i,1)*xmb(i)
@@ -928,6 +1031,7 @@ subroutine cu_gf_sh_run (                                            &
              endif
           endif
       enddo
+!$acc end kernels
 !      
 ! done shallow
 !--------------------------done------------------------------