source: flexpart.git/src/mpi_mod.f90 @ 02095e3

10.4.1_peseiGFS_025bugfixes+enhancementsdevrelease-10release-10.4.1scaling-bugunivie
Last change on this file since 02095e3 was bb579a9, checked in by Espen Sollum ATMOS <eso@…>, 7 years ago

Merge branch 'ctbto' into dev

  • Property mode set to 100644
File size: 108.6 KB
Line 
1!**********************************************************************
2! Copyright 1998,1999,2000,2001,2002,2005,2007,2008,2009,2010         *
3! Andreas Stohl, Petra Seibert, A. Frank, Gerhard Wotawa,             *
4! Caroline Forster, Sabine Eckhardt, John Burkhart, Harald Sodemann   *
5!                                                                     *
6! This file is part of FLEXPART.                                      *
7!                                                                     *
8! FLEXPART is free software: you can redistribute it and/or modify    *
9! it under the terms of the GNU General Public License as published by*
10! the Free Software Foundation, either version 3 of the License, or   *
11! (at your option) any later version.                                 *
12!                                                                     *
13! FLEXPART is distributed in the hope that it will be useful,         *
14! but WITHOUT ANY WARRANTY; without even the implied warranty of      *
15! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the       *
16! GNU General Public License for more details.                        *
17!                                                                     *
18! You should have received a copy of the GNU General Public License   *
19! along with FLEXPART.  If not, see <http://www.gnu.org/licenses/>.   *
20!**********************************************************************
21
22module mpi_mod
23
24!*****************************************************************************
25!                                                                            *
26!  DESCRIPTION                                                               *
27!    This module contains subroutines and common variables used for the      *
28!    MPI parallelization of FLEXPART.                                        *
29!                                                                            *
30!  NOTE                                                                      *
31!    Depending on the MPI library installed on your system (e.g. mpich2,     *
32!    OpenMPI) you may need to choose below in this file between              *
33!      use mpi                                                               *
34!    (if the MPI library comes with the file 'mpi.mod'); or                  *
35!      include 'mpif.h'                                                      *
36!                                                                            *
37!                                                                            *
38!*****************************************************************************
39!                                                                            *
40! Variables:                                                                 *
41!                                                                            *
42! mp_ierr                 MPI error code                                     *
43! mp_np                   Number of MPI processes                            *
44! mp_pid                  Process ID of each MPI process                     *
45! mp_seed                 Parameter for random number seed                   *
46! read_grp_min            Minimum number of processes at which one will be   *
47!                         used as reader                                     *
48! numpart_mpi,            Number of particles per node                       *
49! maxpart_mpi                                                                *
50! mp_partid               MPI process ID for particle calculation            *
51! mp_partgroup_           Refers to the subset of processors performing      *
52!                         loops over particles. Will be all processes        *
53!                         unless a dedicated process runs getfields/readwind *
54! lmp_sync                If .false., use asynchronous MPI                   *
55! mp_cp                   Real precision to use for deposition fields        *
56!                                                                            *
57!                                                                            *
58!                                                                            *
59!                                                                            *
60!*****************************************************************************
61!                                                                            *
62! Modification by DJM, 2017-05-09 - added #ifdef USE_MPIINPLACE cpp          *
63! directive to mpif_tm_reduce_grid() to insure that MPI_IN_PLACE is          *
64! used in the MPI_Reduce() only if specifically compiled with that           *
65! directive.                                                                 *
66!                                                                            *
67!*****************************************************************************
68
69  use mpi
70  use par_mod, only: dp,sp
71  use com_mod, only: lroot
72
73  implicit none
74
75!  include 'mpif.h'
76
77  public
78
79! Set aside a process for reading windfields if using at least these many processes
80!==================================================
81  integer, parameter, private :: read_grp_min=4
82!==================================================
83
84! Variables for each MPI process in the world group
85  integer :: mp_ierr, mp_np, mp_pid, mp_partid
86  integer, private :: world_group_id
87
88! Variables for MPI processes in the 'particle' group
89  integer, allocatable, dimension(:) :: mp_partgroup_rank
90  integer :: mp_partgroup_comm, mp_partgroup_pid, mp_partgroup_np
91
92  integer :: mp_seed=0
93  integer, parameter :: mp_sp=MPI_REAL4, mp_dp=MPI_REAL8
94  integer :: mp_cp
95  integer, parameter :: id_root=0 ! master process
96
97! MPI tags/requests for send/receive operation
98  integer :: tm1
99  integer, parameter :: nvar_async=26
100!integer, dimension(:), allocatable :: tags
101  integer, dimension(:), allocatable :: reqs
102
103! Status array used for certain MPI operations (MPI_RECV)
104  integer, dimension(MPI_STATUS_SIZE) :: mp_status
105
106
107  integer :: id_read   ! readwind/getfield process
108  integer :: numpart_mpi,maxpart_mpi ! number of particles per node
109  integer :: tot_numpart=0
110  integer :: mp_comm_used ! global or subgroup communicator
111
112  logical :: lmpreader=.false. ! is set to true for reading process(es) only.
113  logical :: lmp_use_reader=.false. ! true if separate readwind process is used
114
115! .true. if only using synchronous MPI send/recv (default)
116! If setting this to .false., numwfmem must be set to 3
117!===============================================================================
118  logical :: lmp_sync=.true.
119!===============================================================================
120
121! mp_dbg_mode       Used for debugging MPI.
122! mp_dev_mode       various checks related to debugging the parallel code
123! mp_dbg_out        write some arrays to data file for debugging
124! mp_measure_time   Measure cpu/wall time, write out at end of run
125! mp_time_barrier   Measure MPI barrier time
126! mp_exact_numpart  Use an extra MPI communication to give the exact number of particles
127!                   to standard output (this does *not* otherwise affect the simulation)
128  logical, parameter :: mp_dbg_mode = .false.
129  logical, parameter :: mp_dev_mode = .false.
130  logical, parameter :: mp_dbg_out = .false.
131  logical, parameter :: mp_time_barrier=.true.
132  logical, parameter :: mp_measure_time=.false.
133  logical, parameter :: mp_exact_numpart=.true.
134
135! for measuring CPU/Wall time
136  real(sp),private :: mp_comm_time_beg, mp_comm_time_end, mp_comm_time_total=0.
137  real(dp),private :: mp_comm_wtime_beg, mp_comm_wtime_end, mp_comm_wtime_total=0.
138  real(sp),private :: mp_root_time_beg, mp_root_time_end, mp_root_time_total=0.
139  real(dp),private :: mp_root_wtime_beg, mp_root_wtime_end, mp_root_wtime_total=0.
140  real(sp),private :: mp_barrier_time_beg, mp_barrier_time_end, mp_barrier_time_total=0.
141  real(dp),private :: mp_barrier_wtime_beg, mp_barrier_wtime_end, mp_barrier_wtime_total=0.
142  real(sp),private :: tm_nploop_beg, tm_nploop_end, tm_nploop_total=0.
143  real(sp),private :: tm_tot_beg, tm_tot_end, tm_tot_total=0.
144  real(dp),private :: mp_getfields_wtime_beg, mp_getfields_wtime_end, mp_getfields_wtime_total=0.
145  real(sp),private :: mp_getfields_time_beg, mp_getfields_time_end, mp_getfields_time_total=0.
146  real(dp),private :: mp_readwind_wtime_beg, mp_readwind_wtime_end, mp_readwind_wtime_total=0.
147  real(sp),private :: mp_readwind_time_beg, mp_readwind_time_end, mp_readwind_time_total=0.
148  real(dp),private :: mp_io_wtime_beg, mp_io_wtime_end, mp_io_wtime_total=0.
149  real(sp),private :: mp_io_time_beg, mp_io_time_end, mp_io_time_total=0.
150  real(dp),private :: mp_wetdepo_wtime_beg, mp_wetdepo_wtime_end, mp_wetdepo_wtime_total=0.
151  real(sp),private :: mp_wetdepo_time_beg, mp_wetdepo_time_end, mp_wetdepo_time_total=0.
152  real(dp),private :: mp_advance_wtime_beg, mp_advance_wtime_end, mp_advance_wtime_total=0.
153  real(dp),private :: mp_conccalc_time_beg, mp_conccalc_time_end, mp_conccalc_time_total=0.
154  real(dp),private :: mp_total_wtime_beg, mp_total_wtime_end, mp_total_wtime_total=0.
155  real(dp),private :: mp_vt_wtime_beg, mp_vt_wtime_end, mp_vt_wtime_total
156  real(sp),private :: mp_vt_time_beg, mp_vt_time_end, mp_vt_time_total
157
158! dat_lun           logical unit number for i/o
159  integer, private :: dat_lun
160
161! Temporary arrays for particles (allocated and deallocated as needed)
162  integer, allocatable, dimension(:) :: nclass_tmp, npoint_tmp, itra1_tmp, idt_tmp, &
163       & itramem_tmp, itrasplit_tmp
164  real(kind=dp), allocatable, dimension(:) :: xtra1_tmp, ytra1_tmp
165  real, allocatable, dimension(:) :: ztra1_tmp
166  real, allocatable, dimension(:,:) :: xmass1_tmp
167
168! mp_redist_fract        Exchange particles between processes if relative difference
169!                        is larger. A good value is between 0.0 and 0.5
170! mp_maxpart_factor      Allocate more memory per process than strictly needed
171!                        (safety factor). Recommended value between 1.5 and 2.5
172! mp_min_redist          Do not redistribute particles if below this limit
173  real, parameter :: mp_redist_fract=0.2, mp_maxpart_factor=1.5
174  integer,parameter :: mp_min_redist=100000
175
176
177contains
178
179  subroutine mpif_init
180!***********************************************************************
181! DESCRIPTION
182!   Initialize MPI.
183!   
184!   Create the global communicator MPI_COMM_WORLD
185!   If using a separate MPI process for getfields/readwind, a subgroup
186!   is created for the other processes.
187!
188! VARIABLES
189!   mpi_mode    default 0, set to 2/3 if running MPI version
190!   mp_np       number of running processes, decided at run-time
191!***********************************************************************
192    use par_mod, only: maxpart, numwfmem, dep_prec
193    use com_mod, only: mpi_mode, verbosity
194
195    implicit none
196
197    integer :: i,j,s,addmaxpart=0
198
199! Each process gets an ID (mp_pid) in the range 0,..,mp_np-1
200    call MPI_INIT(mp_ierr)
201    if (mp_ierr /= 0) goto 100
202    call MPI_COMM_RANK(MPI_COMM_WORLD, mp_pid, mp_ierr)
203    if (mp_ierr /= 0) goto 100
204    call MPI_COMM_SIZE(MPI_COMM_WORLD, mp_np, mp_ierr)
205    if (mp_ierr /= 0) goto 100
206
207
208! Variable mpi_mode is used to handle subroutines common to parallel/serial version
209    if (lmp_sync) then
210      mpi_mode=2 ! hold 2 windfields in memory
211    else
212      mpi_mode=3 ! hold 3 windfields in memory
213    end if
214
215    if (mp_pid.ne.0) then
216      lroot = .false.
217    end if
218
219! Set MPI precision to use for transferring deposition fields
220!************************************************************
221    if (dep_prec==dp) then
222      mp_cp = MPI_REAL8
223! TODO: write info message for serial version as well
224      if (lroot.and.verbosity>0) write(*,*) 'Using double precision for deposition fields'
225    else if (dep_prec==sp) then
226      mp_cp = MPI_REAL4
227      if (lroot.and.verbosity>0) write(*,*) 'Using single precision for deposition fields'
228    else
229      write(*,*) 'ERROR: something went wrong setting MPI real precision'
230      stop
231    end if
232
233! Check for sensible combination of parameters
234!*********************************************
235
236    if (.not.lmp_sync.and.numwfmem.ne.3) then
237      if (lroot) then
238        write(*,FMT='(80("#"))')
239        write(*,*) '#### mpi_mod::mpif_init> ERROR: ', &
240             & 'numwfmem must be set to 3 for asyncronous reading ####'
241        write(*,FMT='(80("#"))')
242      end if
243      call MPI_FINALIZE(mp_ierr)
244      stop
245    else if (lmp_sync.and.numwfmem.ne.2.and.lroot) then
246      write(*,FMT='(80("#"))')
247      write(*,*) '#### mpi_mod::mpif_init> WARNING: ', &
248           & 'numwfmem should be set to 2 for syncronous'
249      write(*,*) ' reading. Results will still be valid, but unneccesary memory &
250           &is allocated.'
251      write(*,FMT='(80("#"))')
252! Force "syncronized" version if all processes will call getfields
253    else if ((.not.lmp_sync.and.mp_np.lt.read_grp_min).or.(mp_np.eq.1)) then
254      if (lroot) then
255        write(*,FMT='(80("#"))')
256        write(*,*) '#### mpi_mod::mpif_init> WARNING: ', &
257             & 'all procs call getfields. Setting lmp_sync=.true.'
258        write(*,FMT='(80("#"))')
259      end if
260      lmp_sync=.true.
261    end if
262
263! TODO: Add more warnings for unimplemented flexpart features
264
265! Set ID of process that calls getfield/readwind.
266! Using the last process in the group ensures statistical identical results
267! as running with one process less but not using separate read process
268!**********************************************************************
269
270!    id_read = min(mp_np-1, 1)
271    id_read = mp_np-1
272
273    if (mp_pid.eq.id_read) lmpreader=.true.
274
275    call MPI_Comm_group (MPI_COMM_WORLD, world_group_id, mp_ierr)
276
277! Create a MPI group/communicator that will calculate trajectories.
278! Skip this step if program is run with only a few processes
279!************************************************************************
280    allocate(mp_partgroup_rank(0:mp_np-2))
281
282! This allows checking for allocation of arrays when no subgroup is used
283    mp_partgroup_pid=0
284
285    if (read_grp_min.lt.2) then
286      write(*,*) '#### mpi_mod::mpif_init> ERROR ####', &
287           & 'read_grp_min should be at least 2. Exiting'
288      stop
289    end if
290
291    if (mp_np.ge.read_grp_min) then
292      lmp_use_reader = .true.
293
294! Map the subgroup IDs= 0,1,2,...,mp_np-2, skipping reader process
295      j=-1
296      do i=0, mp_np-2 !loop over all (subgroup) IDs
297        j=j+1
298        if (i.eq.id_read) then
299          j=j+1
300        end if
301        mp_partgroup_rank(i) = j
302      end do
303
304      call MPI_Group_incl (world_group_id, mp_np-1, mp_partgroup_rank, &
305           &mp_partgroup_pid, mp_ierr)
306      if (mp_ierr /= 0) goto 100
307      call MPI_Comm_create (MPI_COMM_WORLD, mp_partgroup_pid, mp_partgroup_comm, mp_ierr)
308      if (mp_ierr /= 0) goto 100
309
310      if (mp_pid.ne.id_read) then
311        call MPI_Comm_rank (mp_partgroup_comm, mp_partgroup_pid, mp_ierr)
312        if (mp_ierr /= 0) goto 100
313
314! Get size of new group (=mp_np-1)
315        call MPI_COMM_SIZE(mp_partgroup_comm, mp_partgroup_np, mp_ierr)
316        if (mp_ierr /= 0) goto 100
317        if (mp_partgroup_np.ne.mp_np-1) then
318          write(*,*)  '#### mpi_mod:: mpif_init> ERROR ####&
319               & mp_partgroup_np.ne.mp_np-1'
320          stop
321        endif
322
323      else
324        mp_partgroup_pid = -1
325      end if
326    end if
327
328    if (lmp_use_reader) then
329      mp_comm_used = mp_partgroup_comm
330      mp_partid = mp_partgroup_pid
331      mp_partgroup_np=mp_np-1
332    else
333      mp_comm_used = MPI_COMM_WORLD
334      mp_partgroup_np = mp_np
335      mp_partid = mp_pid
336    end if
337
338! Set maxpart per process
339! eso 08/2016: Increase maxpart per process, in case of unbalanced distribution
340    maxpart_mpi=int(mp_maxpart_factor*real(maxpart)/real(mp_partgroup_np))
341    if (mp_np == 1) maxpart_mpi = maxpart
342
343! Do not allocate particle data arrays for readwind process
344    if (lmpreader.and.lmp_use_reader) then
345      maxpart_mpi=0
346    end if
347
348! Set random seed for each non-root process
349    if (mp_pid.gt.0) then
350      s = 244
351      mp_seed = -abs(mod((s*181)*((mp_pid-83)*359), 104729))
352    end if
353    if (mp_dbg_mode) then
354      mp_seed=0
355      if (lroot) write(*,*) 'MPI: setting seed=0'
356    end if
357
358! Allocate request array for asynchronous MPI
359    if (.not.lmp_sync) then
360      allocate(reqs(0:nvar_async*mp_np-1))
361      reqs(:)=MPI_REQUEST_NULL
362    else
363      allocate(reqs(0:1))
364      reqs(:)=MPI_REQUEST_NULL
365    end if
366
367    goto 101
368
369100 write(*,*) '#### mpi_mod::mpif_init> ERROR ####', mp_ierr
370    stop
371
372101 end subroutine mpif_init
373
374
375  subroutine mpif_mpi_barrier
376!***********************************************************************
377! Set MPI barrier (all processes are synchronized here), with the
378! possible exception of a separate 'readwind' process.
379! Optionally measure cpu/wall time.
380!
381!***********************************************************************
382    implicit none
383
384    if (mp_time_barrier) then
385      call cpu_time(mp_barrier_time_beg)
386      mp_barrier_wtime_beg = mpi_wtime()
387    endif
388
389    call MPI_BARRIER(mp_comm_used, mp_ierr)
390    if (mp_ierr /= 0) goto 100
391
392    if (mp_time_barrier) then
393      call cpu_time(mp_barrier_time_end)
394      mp_barrier_wtime_end = mpi_wtime()
395
396      mp_barrier_time_total=mp_barrier_time_total+&
397           &(mp_barrier_time_end-mp_barrier_time_beg)
398      mp_barrier_wtime_total=mp_barrier_wtime_total+&
399           &(mp_barrier_wtime_end-mp_barrier_wtime_beg)
400    end if
401
402    goto 101
403
404100 write(*,*) '#### mpi_mod::mpif_mpi_barrier> ERROR ####', mp_ierr
405    stop
406
407101 end subroutine mpif_mpi_barrier
408
409
410  subroutine mpif_com_mod_allocate
411!*******************************************************************************   
412! Dynamic allocation of arrays (in serial code these have size maxpart)
413!
414! Not in use anymore, moved to com_mod for interoperability with serial version
415!
416! TODO: remove
417!*******************************************************************************
418    use com_mod
419
420    implicit none
421
422    integer :: array_size
423
424    array_size = maxpart_mpi
425
426    allocate(itra1(array_size),npoint(array_size),&
427         & nclass(array_size),idt(array_size),&
428         & itramem(array_size),itrasplit(array_size),&
429         & xtra1(array_size),ytra1(array_size),&
430         & ztra1(array_size),xmass1(array_size, maxspec))
431
432    allocate(uap(array_size),ucp(array_size),&
433         & uzp(array_size),us(array_size),&
434         & vs(array_size),&
435         & ws(array_size),cbt(array_size))
436
437
438  end subroutine mpif_com_mod_allocate
439
440
441  subroutine mpif_tm_send_dims
442!***********************************************************************
443! Distribute array dimensions from pid0 to all processes.
444! numpart_mpi/numpart is sent to allow dynamic allocation
445!
446! Currently not in use.
447!
448! Variables of similar type (integer, real) are placed in an array
449! and sent collectively, to avoid the overhead associated with individual
450! MPI send/receives
451!
452!
453!***********************************************************************
454    use com_mod
455
456    implicit none
457
458    integer :: add_part=0
459
460    call MPI_Bcast(numpart, 1, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
461
462! MPI subgroup does the particle-loop
463    if (lmp_use_reader) then
464      if (mod(numpart,mp_partgroup_np).ne.0) add_part=1
465      numpart_mpi=int(numpart/mp_partgroup_np)+add_part
466    else
467      if (mod(numpart,mp_np).ne.0) add_part=1
468      numpart_mpi=int(numpart/mp_np)+add_part
469    end if
470
471
472! redefine numpart as 'numpart per process' throughout the code
473!**************************************************************
474    numpart = numpart_mpi
475
476  end subroutine mpif_tm_send_dims
477
478
479  subroutine mpif_send_part_properties(num_part)
480!***********************************************************************
481! Distribute particle properties from root to all processes.
482
483! Used by init_domainfill_mpi
484!
485! Variables:
486!
487! num_part        input, number of particles per process (rounded up)
488!
489!***********************************************************************
490    use com_mod
491
492    implicit none
493
494    integer,intent(in) :: num_part
495    integer :: i,jj, addone
496
497! Exit if too many particles
498    if (num_part.gt.maxpart_mpi) then
499      write(*,*) '#####################################################'
500      write(*,*) '#### ERROR - TOTAL NUMBER OF PARTICLES REQUIRED  ####'
501      write(*,*) '#### EXCEEDS THE MAXIMUM ALLOWED NUMBER. REDUCE  ####'
502      write(*,*) '#### EITHER NUMBER OF PARTICLES PER RELEASE POINT####'
503      write(*,*) '#### OR INCREASE MAXPART.                        ####'
504      write(*,*) '#####################################################'
505!      call MPI_FINALIZE(mp_ierr)
506      stop
507    end if
508
509
510! Time for MPI communications
511!****************************
512    if (mp_measure_time) call mpif_mtime('commtime',0)
513
514! Distribute variables, send from pid 0 to other processes (including itself)
515!****************************************************************************
516
517    call MPI_SCATTER(nclass_tmp,num_part,MPI_INTEGER,nclass,&
518         &num_part,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
519    if (mp_ierr /= 0) goto 600
520    call MPI_SCATTER(npoint_tmp,num_part,MPI_INTEGER,npoint,&
521         &num_part,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
522    if (mp_ierr /= 0) goto 600
523    call MPI_SCATTER(itra1_tmp,num_part,MPI_INTEGER,itra1,&
524         &num_part,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
525    if (mp_ierr /= 0) goto 600
526    call MPI_SCATTER(idt_tmp,num_part,MPI_INTEGER,idt,&
527         &num_part,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
528    if (mp_ierr /= 0) goto 600
529    call MPI_SCATTER(itramem_tmp,num_part,MPI_INTEGER,itramem,&
530         &num_part,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
531    if (mp_ierr /= 0) goto 600
532    call MPI_SCATTER(itrasplit_tmp,num_part,MPI_INTEGER,itrasplit,&
533         &num_part,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
534    if (mp_ierr /= 0) goto 600
535    call MPI_SCATTER(xtra1_tmp,num_part,mp_dp,xtra1,&
536         &num_part,mp_dp,id_root,mp_comm_used,mp_ierr)
537    if (mp_ierr /= 0) goto 600
538    call MPI_SCATTER(ytra1_tmp,num_part,mp_dp,ytra1,&
539         &num_part,mp_dp,id_root,mp_comm_used,mp_ierr)
540    if (mp_ierr /= 0) goto 600
541    call MPI_SCATTER(ztra1_tmp,num_part,mp_sp,ztra1,&
542         &num_part,mp_sp,id_root,mp_comm_used,mp_ierr)
543    if (mp_ierr /= 0) goto 600
544    do i=1,nspec
545      call MPI_SCATTER(xmass1_tmp(:,i),num_part,mp_sp,xmass1(:,i),&
546           &num_part,mp_sp,id_root,mp_comm_used,mp_ierr)
547      if (mp_ierr /= 0) goto 600
548    end do
549
550    if (mp_measure_time) call mpif_mtime('commtime',1)
551
552    goto 601
553
554600 write(*,*) "mpi_mod> mp_ierr \= 0", mp_ierr
555    stop
556
557! After the transfer of particles it it possible that the value of
558! "numpart" is larger than the actual used, so we reduce it if there are
559! invalid particles at the end of the arrays
560
561601 do i=num_part, 1, -1
562      if (itra1(i).eq.-999999999) then
563        numpart=numpart-1
564      else
565        exit
566      end if
567    end do
568
569
570  end subroutine mpif_send_part_properties
571
572
573  subroutine mpif_calculate_part_redist(itime)
574!***********************************************************************
575! Calculate number of particles to redistribute between processes. This routine
576! can be called at regular time intervals to keep a level number of
577! particles on each process.
578!
579! First, all processes report their local "numpart" to each other, which is
580! stored in array "numpart_mpi(np)". This array is sorted from low to
581! high values, along with a corresponding process ID array "idx_arr(np)".
582! If the relative difference between the highest and lowest "numpart_mpi" value
583! is above a threshold, particles are transferred from process idx_arr(np-1)
584! to process idx_arr(0). Repeat for processes idx_arr(np-i) and idx_arr(i)
585! for all valid i.
586! Note: If np is an odd number, the process with the median
587! number of particles is left unchanged
588!
589! VARIABLES
590! itime        input, current time
591!***********************************************************************
592    use com_mod
593
594    implicit none
595   
596    integer, intent(in) :: itime
597    real :: pmin,z
598    integer :: i,jj,nn, num_part=1,m,imin, num_trans
599    logical :: first_iter
600    integer,allocatable,dimension(:) :: numparticles_mpi, idx_arr
601    real,allocatable,dimension(:) :: sorted ! TODO: we don't really need this
602
603! Exit if running with only 1 process
604!************************************************************************
605    if (mp_np.eq.1) return
606
607! All processes exchange information on number of particles
608!****************************************************************************
609    allocate(numparticles_mpi(0:mp_partgroup_np-1), &
610         &idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1))
611
612    call MPI_Allgather(numpart, 1, MPI_INTEGER, numparticles_mpi, &
613         & 1, MPI_INTEGER, mp_comm_used, mp_ierr)
614
615
616! Sort from lowest to highest
617! Initial guess: correct order
618    sorted(:) = numparticles_mpi(:)
619    do i=0, mp_partgroup_np-1
620      idx_arr(i) = i
621    end do
622
623! For each successive element in index array, see if a lower value exists
624    do i=0, mp_partgroup_np-2
625      pmin=sorted(i)
626      imin=idx_arr(i)
627      m=i+1
628      do jj=m, mp_partgroup_np-1
629        if (pmin.le.sorted(jj)) cycle
630        z=pmin
631        pmin=sorted(jj)
632        sorted(jj)=z
633
634        nn=imin
635        imin=idx_arr(jj)
636        idx_arr(jj)=nn
637      end do
638      sorted(i)=pmin
639      idx_arr(i)=imin
640    end do
641
642! For each pair of processes, transfer particles if the difference is above a
643! limit, and numpart at sending process large enough
644
645    m=mp_partgroup_np-1 ! index for last sorted process (most particles)
646    do i=0,mp_partgroup_np/2-1
647      num_trans = numparticles_mpi(idx_arr(m)) - numparticles_mpi(idx_arr(i))
648      if (mp_partid.eq.idx_arr(m).or.mp_partid.eq.idx_arr(i)) then
649        if ( numparticles_mpi(idx_arr(m)).gt.mp_min_redist.and.&
650             & real(num_trans)/real(numparticles_mpi(idx_arr(m))).gt.mp_redist_fract) then
651! DBG
652          ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi', &
653          !      &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi
654! DBG
655          call mpif_redist_part(itime, idx_arr(m), idx_arr(i), num_trans/2)
656        end if
657      end if
658      m=m-1
659    end do
660
661    deallocate(numparticles_mpi, idx_arr, sorted)
662
663  end subroutine mpif_calculate_part_redist
664
665
666  subroutine mpif_redist_part(itime, src_proc, dest_proc, num_trans)
667!***********************************************************************
668! Transfer particle properties between two arbitrary processes.
669!
670! VARIABLES
671!
672! itime           input, current time
673! src_proc        input, ID of source process
674! dest_proc       input, ID of destination process
675! num_trans       input, number of particles to transfer
676!
677!************************************************************************
678    use com_mod !TODO:  ,only: nclass etc
679
680    implicit none
681
682    integer, intent(in) :: itime, src_proc, dest_proc, num_trans
683    integer :: ll, ul ! lower and upper indices in arrays
684    integer :: arr_sz ! size of temporary arrays
685    integer :: mtag   ! MPI message tag
686    integer :: i, j, minpart, ipart, maxnumpart
687 
688! Check for invalid input arguments
689!**********************************
690 if (src_proc.eq.dest_proc) then
691   write(*,*) '<mpi_mod::mpif_redist_part>: Error: &
692        &src_proc.eq.dest_proc'
693   stop
694 end if
695
696! Measure time for MPI communications
697!************************************
698    if (mp_measure_time) call mpif_mtime('commtime',0)
699
700! Send particles
701!***************
702    if (mp_partid.eq.src_proc) then
703      mtag = 2000
704
705! Array indices for the transferred particles
706      ll=numpart-num_trans+1
707      ul=numpart
708
709      if (mp_dev_mode) then
710        write(*,FMT='(72("#"))')
711        write(*,*) "Sending ", num_trans, "particles (from/to)", src_proc, dest_proc
712        write(*,*) "numpart before: ", numpart
713      end if
714
715      call MPI_SEND(nclass(ll:ul),num_trans,&
716           & MPI_INTEGER,dest_proc,mtag+1,mp_comm_used,mp_ierr)
717
718      call MPI_SEND(npoint(ll:ul),num_trans,&
719           & MPI_INTEGER,dest_proc,mtag+2,mp_comm_used,mp_ierr)
720
721      call MPI_SEND(itra1(ll:ul),num_trans, &
722           & MPI_INTEGER,dest_proc,mtag+3,mp_comm_used,mp_ierr)
723
724      call MPI_SEND(idt(ll:ul),num_trans, &
725           & MPI_INTEGER,dest_proc,mtag+4,mp_comm_used,mp_ierr)
726
727      call MPI_SEND(itramem(ll:ul),num_trans, &
728           & MPI_INTEGER,dest_proc,mtag+5,mp_comm_used,mp_ierr)
729
730      call MPI_SEND(itrasplit(ll:ul),num_trans,&
731           & MPI_INTEGER,dest_proc,mtag+6,mp_comm_used,mp_ierr)
732
733      call MPI_SEND(xtra1(ll:ul),num_trans, &
734           & mp_dp,dest_proc,mtag+7,mp_comm_used,mp_ierr)
735
736      call MPI_SEND(ytra1(ll:ul),num_trans,&
737           & mp_dp,dest_proc,mtag+8,mp_comm_used,mp_ierr)
738
739      call MPI_SEND(ztra1(ll:ul),num_trans,&
740           & mp_sp,dest_proc,mtag+9,mp_comm_used,mp_ierr)
741
742      do j=1,nspec
743        call MPI_SEND(xmass1(ll:ul,j),num_trans,&
744             & mp_sp,dest_proc,mtag+(9+j),mp_comm_used,mp_ierr)
745      end do
746
747! Terminate transferred particles, update numpart
748      itra1(ll:ul) = -999999999
749
750      numpart = numpart-num_trans
751
752      if (mp_dev_mode) then
753        write(*,*) "numpart after: ", numpart
754        write(*,FMT='(72("#"))')
755      end if
756
757    else if (mp_partid.eq.dest_proc) then
758
759! Receive particles
760!******************
761      mtag = 2000
762      ! Array indices for the transferred particles
763      ll=numpart+1
764      ul=numpart+num_trans
765
766      if (mp_dev_mode) then
767        write(*,FMT='(72("#"))')
768        write(*,*) "Receiving ", num_trans, "particles (from/to)", src_proc, dest_proc
769        write(*,*) "numpart before: ", numpart
770      end if
771
772! We could receive the data directly at nclass(ll:ul) etc., but this leaves
773! vacant spaces at lower indices. Using temporary arrays instead.
774      arr_sz = ul-ll+1
775      allocate(itra1_tmp(arr_sz),npoint_tmp(arr_sz),nclass_tmp(arr_sz),&
776           & idt_tmp(arr_sz),itramem_tmp(arr_sz),itrasplit_tmp(arr_sz),&
777           & xtra1_tmp(arr_sz),ytra1_tmp(arr_sz),ztra1_tmp(arr_sz),&
778           & xmass1_tmp(arr_sz, maxspec))
779     
780      call MPI_RECV(nclass_tmp,num_trans,&
781           & MPI_INTEGER,src_proc,mtag+1,mp_comm_used,mp_status,mp_ierr)
782
783      call MPI_RECV(npoint_tmp,num_trans,&
784           & MPI_INTEGER,src_proc,mtag+2,mp_comm_used,mp_status,mp_ierr)
785
786      call MPI_RECV(itra1_tmp,num_trans, &
787           & MPI_INTEGER,src_proc,mtag+3,mp_comm_used,mp_status,mp_ierr)
788
789      call MPI_RECV(idt_tmp,num_trans, &
790           & MPI_INTEGER,src_proc,mtag+4,mp_comm_used,mp_status,mp_ierr)
791
792      call MPI_RECV(itramem_tmp,num_trans, &
793           & MPI_INTEGER,src_proc,mtag+5,mp_comm_used,mp_status,mp_ierr)
794
795      call MPI_RECV(itrasplit_tmp,num_trans,&
796           & MPI_INTEGER,src_proc,mtag+6,mp_comm_used,mp_status,mp_ierr)
797
798      call MPI_RECV(xtra1_tmp,num_trans, &
799           & mp_dp,src_proc,mtag+7,mp_comm_used,mp_status,mp_ierr)
800
801      call MPI_RECV(ytra1_tmp,num_trans,&
802           & mp_dp,src_proc,mtag+8,mp_comm_used,mp_status,mp_ierr)
803
804      call MPI_RECV(ztra1_tmp,num_trans,&
805           & mp_sp,src_proc,mtag+9,mp_comm_used,mp_status,mp_ierr)
806
807      do j=1,nspec
808        call MPI_RECV(xmass1_tmp(:,j),num_trans,&
809             & mp_sp,src_proc,mtag+(9+j),mp_comm_used,mp_status,mp_ierr)
810      end do
811
812! This is the maximum value numpart can possibly have after the transfer
813      maxnumpart=numpart+num_trans
814
815! Search for vacant space and copy from temporary storage
816!********************************************************
817      minpart=1
818      do i=1, num_trans
819! Take into acount that we may have transferred invalid particles
820        if (itra1_tmp(i).ne.itime) cycle
821        do ipart=minpart,maxnumpart
822          if (itra1(ipart).ne.itime) then
823            itra1(ipart) = itra1_tmp(i)
824            npoint(ipart) = npoint_tmp(i)
825            nclass(ipart) = nclass_tmp(i)
826            idt(ipart) = idt_tmp(i)
827            itramem(ipart) = itramem_tmp(i)
828            itrasplit(ipart) = itrasplit_tmp(i)
829            xtra1(ipart) = xtra1_tmp(i)
830            ytra1(ipart) = ytra1_tmp(i)
831            ztra1(ipart) = ztra1_tmp(i)
832            xmass1(ipart,:) = xmass1_tmp(i,:)
833            goto 200 ! Storage space has been found, stop searching
834          end if
835! :TODO: add check for if too many particles requiried
836        end do
837200     minpart=ipart+1
838      end do
839! Increase numpart, if necessary
840      numpart=max(numpart,ipart)
841
842      deallocate(itra1_tmp,npoint_tmp,nclass_tmp,idt_tmp,itramem_tmp,&
843           &itrasplit_tmp,xtra1_tmp,ytra1_tmp,ztra1_tmp,xmass1_tmp)
844
845      if (mp_dev_mode) then
846        write(*,*) "numpart after: ", numpart
847        write(*,FMT='(72("#"))')
848      end if
849
850    else
851! This routine should only be called by the two participating processes
852      write(*,*) "ERROR: wrong process has entered mpi_mod::mpif_redist_part"
853      stop
854      return
855    end if
856
857! Measure time for MPI communications
858!************************************
859    if (mp_measure_time) call mpif_mtime('commtime',1)
860
861  end subroutine mpif_redist_part
862
863
864  subroutine mpif_tm_send_vars
865!***********************************************************************
866! Distribute particle variables from pid0 to all processes.
867! Called from timemanager
868! *NOT IN USE* at the moment, but can be useful for debugging
869!
870!***********************************************************************
871    use com_mod
872
873    implicit none
874
875    integer :: i
876
877! Time for MPI communications
878!****************************
879    if (mp_measure_time) call mpif_mtime('commtime',0)
880
881! Distribute variables, send from pid 0 to other processes
882!**********************************************************************
883
884! integers
885    if (lroot) then
886      call MPI_SCATTER(npoint,numpart_mpi,MPI_INTEGER,MPI_IN_PLACE,&
887           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
888      if (mp_ierr /= 0) goto 600
889      call MPI_SCATTER(idt,numpart_mpi,MPI_INTEGER,MPI_IN_PLACE,&
890           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
891      if (mp_ierr /= 0) goto 600
892      call MPI_SCATTER(itra1,numpart_mpi,MPI_INTEGER,MPI_IN_PLACE,&
893           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
894      if (mp_ierr /= 0) goto 600
895      call MPI_SCATTER(nclass,numpart_mpi,MPI_INTEGER,MPI_IN_PLACE,&
896           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
897      if (mp_ierr /= 0) goto 600
898      call MPI_SCATTER(itramem,numpart_mpi,MPI_INTEGER,MPI_IN_PLACE,&
899           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
900      if (mp_ierr /= 0) goto 600
901
902! int2
903      call MPI_SCATTER(cbt,numpart_mpi,MPI_INTEGER2,MPI_IN_PLACE,&
904           & numpart_mpi,MPI_INTEGER2,id_root,mp_comm_used,mp_ierr)
905      if (mp_ierr /= 0) goto 600
906
907! real
908      call MPI_SCATTER(uap,numpart_mpi,mp_sp,MPI_IN_PLACE,&
909           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
910      if (mp_ierr /= 0) goto 600
911      call MPI_SCATTER(ucp,numpart_mpi,mp_sp,MPI_IN_PLACE,&
912           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
913      if (mp_ierr /= 0) goto 600
914      call MPI_SCATTER(uzp,numpart_mpi,mp_sp,MPI_IN_PLACE,&
915           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
916      if (mp_ierr /= 0) goto 600
917
918      call MPI_SCATTER(us,numpart_mpi,mp_sp,MPI_IN_PLACE,&
919           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
920      if (mp_ierr /= 0) goto 600
921      call MPI_SCATTER(vs,numpart_mpi,mp_sp,MPI_IN_PLACE,&
922           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
923      if (mp_ierr /= 0) goto 600
924      call MPI_SCATTER(ws,numpart_mpi,mp_sp,MPI_IN_PLACE,&
925           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
926      if (mp_ierr /= 0) goto 600
927
928      call MPI_SCATTER(xtra1,numpart_mpi,mp_dp,MPI_IN_PLACE,&
929           & numpart_mpi,mp_dp,id_root,mp_comm_used,mp_ierr)
930      if (mp_ierr /= 0) goto 600
931      call MPI_SCATTER(ytra1,numpart_mpi,mp_dp,MPI_IN_PLACE,&
932           & numpart_mpi,mp_dp,id_root,mp_comm_used,mp_ierr)
933      if (mp_ierr /= 0) goto 600
934      call MPI_SCATTER(ztra1,numpart_mpi,mp_sp,MPI_IN_PLACE,&
935           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
936      if (mp_ierr /= 0) goto 600
937
938      do i=1,nspec
939        call MPI_SCATTER(xmass1(:,i),numpart_mpi,mp_sp,MPI_IN_PLACE,&
940             & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
941        if (mp_ierr /= 0) goto 600
942      end do
943
944    else ! (mp_pid >= 1)
945! integers
946      call MPI_SCATTER(npoint,numpart_mpi,MPI_INTEGER,npoint,&
947           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
948      if (mp_ierr /= 0) goto 600
949      call MPI_SCATTER(idt,numpart_mpi,MPI_INTEGER,idt,&
950           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
951      if (mp_ierr /= 0) goto 600
952      call MPI_SCATTER(itra1,numpart_mpi,MPI_INTEGER,itra1,&
953           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
954      if (mp_ierr /= 0) goto 600
955      call MPI_SCATTER(nclass,numpart_mpi,MPI_INTEGER,nclass,&
956           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
957      if (mp_ierr /= 0) goto 600
958      call MPI_SCATTER(itramem,numpart_mpi,MPI_INTEGER,itramem,&
959           & numpart_mpi,MPI_INTEGER,id_root,mp_comm_used,mp_ierr)
960      if (mp_ierr /= 0) goto 600
961
962! int2
963      call MPI_SCATTER(cbt,numpart_mpi,MPI_INTEGER2,cbt,&
964           & numpart_mpi,MPI_INTEGER2,id_root,mp_comm_used,mp_ierr)
965      if (mp_ierr /= 0) goto 600
966
967! reals
968      call MPI_SCATTER(uap,numpart_mpi,mp_sp,uap,&
969           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
970      if (mp_ierr /= 0) goto 600
971      call MPI_SCATTER(ucp,numpart_mpi,mp_sp,ucp,&
972           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
973      if (mp_ierr /= 0) goto 600
974      call MPI_SCATTER(uzp,numpart_mpi,mp_sp,uzp,&
975           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
976      if (mp_ierr /= 0) goto 600
977
978      call MPI_SCATTER(us,numpart_mpi,mp_sp,us,&
979           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
980      if (mp_ierr /= 0) goto 600
981      call MPI_SCATTER(vs,numpart_mpi,mp_sp,vs,&
982           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
983      if (mp_ierr /= 0) goto 600
984      call MPI_SCATTER(ws,numpart_mpi,mp_sp,ws,&
985           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
986      if (mp_ierr /= 0) goto 600
987
988      call MPI_SCATTER(xtra1,numpart_mpi,mp_dp,xtra1,&
989           & numpart_mpi,mp_dp,id_root,mp_comm_used,mp_ierr)
990      if (mp_ierr /= 0) goto 600
991      call MPI_SCATTER(ytra1,numpart_mpi,mp_dp,ytra1,&
992           & numpart_mpi,mp_dp,id_root,mp_comm_used,mp_ierr)
993      if (mp_ierr /= 0) goto 600
994      call MPI_SCATTER(ztra1,numpart_mpi,mp_sp,ztra1,&
995           & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
996      if (mp_ierr /= 0) goto 600
997
998      do i=1,nspec
999        call MPI_SCATTER(xmass1(:,i),numpart_mpi,mp_sp,xmass1(:,i),&
1000             & numpart_mpi,mp_sp,id_root,mp_comm_used,mp_ierr)
1001        if (mp_ierr /= 0) goto 600
1002      end do
1003
1004    end if !(lroot)
1005
1006    if (mp_measure_time) call mpif_mtime('commtime',1)
1007
1008    goto 601
1009
1010600 write(*,*) "mpi_mod> mp_ierr \= 0", mp_ierr
1011    stop
1012601 end subroutine mpif_tm_send_vars
1013
1014
1015  subroutine mpif_tm_collect_vars
1016!*******************************************************************************
1017! Collect particle data to PID 0 from all processes.                           *
1018!                                                                              *
1019! This can be called from timemanager_mpi, before caclulating                  *
1020! concentrations/writing output grids.                                         *
1021!                                                                              *
1022! Currently *not in use*, as each process calculates concentrations            *
1023! separately, but can be useful for debugging                                  *
1024!                                                                              *
1025! To use this routine (together with mpif_tm_send_vars) to transfer data       *
1026! to the MPI root process (useful for debugging), insert code like this        *
1027! (in timemanager_mpi.f90)                                                     *
1028!                                                                              *
1029!      if (lroot) tot_numpart=numpart ! root stores total numpart              *
1030!      call mpif_tm_send_dims                                                  *
1031!      if (numpart>1) then                                                     *
1032!        call mpif_tm_send_vars                                                *
1033!      end if                                                                  *
1034!                                                                              *
1035!    To collect data from all processes to root process after a parallel       *
1036!    region:                                                                   *
1037!                                                                              *
1038!      if (numpart>0) then                                                     *
1039!          if (lroot) numpart = tot_numpart                                    *
1040!       call mpif_tm_collect_vars                                              *
1041!      end if                                                                  *
1042!                                                                              *
1043! Then a section that begins with "if (lroot) ..." will behave like            *
1044! serial flexpart                                                              *
1045!                                                                              *
1046!*******************************************************************************
1047    use com_mod !, only: numpart, nspec, itra1, npoint, nclass
1048
1049    implicit none
1050
1051    integer :: i
1052
1053    logical :: use_mp_vars = .false.! use mp_... type allocated vars
1054    logical :: use_in_place = .true.! use MPI_IN_PLACE to collect vars.
1055
1056
1057! Time for MPI communications
1058    if (mp_measure_time) call mpif_mtime('commtime',0)
1059
1060! Distribute variables, send from pid 0 to other processes
1061!**********************************************************************
1062
1063    if (.not. use_mp_vars.and..not.use_in_place) then
1064
1065! Integers:
1066      call MPI_GATHER(npoint, numpart_mpi, MPI_INTEGER, npoint, &
1067           &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1068      if (mp_ierr /= 0) goto 600
1069      call MPI_GATHER(idt, numpart_mpi, MPI_INTEGER, idt, &
1070           &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1071      if (mp_ierr /= 0) goto 600
1072      call MPI_GATHER(itra1, numpart_mpi, MPI_INTEGER, itra1, &
1073           &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1074      if (mp_ierr /= 0) goto 600
1075      call MPI_GATHER(nclass, numpart_mpi, MPI_INTEGER, nclass, &
1076           &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1077      if (mp_ierr /= 0) goto 600
1078      call MPI_GATHER(itramem, numpart_mpi, MPI_INTEGER, itramem, &
1079           &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1080      if (mp_ierr /= 0) goto 600
1081
1082! int2
1083      call MPI_GATHER(cbt, numpart_mpi, MPI_INTEGER2, cbt, &
1084           &numpart_mpi, MPI_INTEGER2, id_root, mp_comm_used, mp_ierr)
1085      if (mp_ierr /= 0) goto 600
1086
1087! Reals:
1088      call MPI_GATHER(uap, numpart_mpi, mp_sp, uap, &
1089           &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1090      if (mp_ierr /= 0) goto 600
1091      call MPI_GATHER(ucp, numpart_mpi, mp_sp, ucp, &
1092           &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1093      if (mp_ierr /= 0) goto 600
1094      call MPI_GATHER(uzp, numpart_mpi, mp_sp, uzp, &
1095           &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1096      if (mp_ierr /= 0) goto 600
1097
1098      call MPI_GATHER(us, numpart_mpi, mp_sp, us, &
1099           &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1100      if (mp_ierr /= 0) goto 600
1101      call MPI_GATHER(vs, numpart_mpi, mp_sp, vs, &
1102           &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1103      if (mp_ierr /= 0) goto 600
1104      call MPI_GATHER(ws, numpart_mpi, mp_sp, ws, &
1105           &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1106      if (mp_ierr /= 0) goto 600
1107
1108
1109      call MPI_GATHER(xtra1, numpart_mpi, mp_dp, xtra1, &
1110           &numpart_mpi, mp_dp, id_root, mp_comm_used, mp_ierr)
1111      if (mp_ierr /= 0) goto 600
1112      call MPI_GATHER(ytra1, numpart_mpi, mp_dp, ytra1, &
1113           &numpart_mpi, mp_dp, id_root, mp_comm_used, mp_ierr)
1114      if (mp_ierr /= 0) goto 600
1115      call MPI_GATHER(ztra1, numpart_mpi, mp_sp, ztra1, &
1116           &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1117      if (mp_ierr /= 0) goto 600
1118
1119      do i=1, nspec
1120        call MPI_GATHER(xmass1(:,i), numpart_mpi, mp_sp, xmass1(:,i), &
1121             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1122        if (mp_ierr /= 0) goto 600
1123
1124      end do
1125
1126! Use variables npoint etc directly for communications
1127!***********************************************************************
1128    else if (use_in_place.and..not.use_mp_vars) then
1129      if (lroot) then
1130! Integers:
1131        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, MPI_INTEGER, npoint, &
1132             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1133        if (mp_ierr /= 0) goto 600
1134        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, MPI_INTEGER, idt, &
1135             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1136        if (mp_ierr /= 0) goto 600
1137        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, MPI_INTEGER, itra1, &
1138             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1139        if (mp_ierr /= 0) goto 600
1140        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, MPI_INTEGER, nclass, &
1141             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1142        if (mp_ierr /= 0) goto 600
1143        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, MPI_INTEGER, itramem, &
1144             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1145        if (mp_ierr /= 0) goto 600
1146
1147! int2
1148        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, MPI_INTEGER2, cbt, &
1149             &numpart_mpi, MPI_INTEGER2, id_root, mp_comm_used, mp_ierr)
1150        if (mp_ierr /= 0) goto 600
1151
1152! Reals:
1153        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, uap, &
1154             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1155        if (mp_ierr /= 0) goto 600
1156        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, ucp, &
1157             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1158        if (mp_ierr /= 0) goto 600
1159        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, uzp, &
1160             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1161        if (mp_ierr /= 0) goto 600
1162
1163        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, us, &
1164             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1165        if (mp_ierr /= 0) goto 600
1166        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, vs, &
1167             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1168        if (mp_ierr /= 0) goto 600
1169        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, ws, &
1170             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1171        if (mp_ierr /= 0) goto 600
1172
1173
1174        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_dp, xtra1, &
1175             &numpart_mpi, mp_dp, id_root, mp_comm_used, mp_ierr)
1176        if (mp_ierr /= 0) goto 600
1177        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_dp, ytra1, &
1178             &numpart_mpi, mp_dp, id_root, mp_comm_used, mp_ierr)
1179        if (mp_ierr /= 0) goto 600
1180        call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, ztra1, &
1181             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1182        if (mp_ierr /= 0) goto 600
1183
1184        do i=1, nspec
1185          call MPI_GATHER(MPI_IN_PLACE, numpart_mpi, mp_sp, xmass1(:,i), &
1186               &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1187          if (mp_ierr /= 0) goto 600
1188        end do
1189
1190      else ! (for mp_pid >= 1)
1191
1192! Integers:
1193        call MPI_GATHER(npoint, numpart_mpi, MPI_INTEGER, npoint, &
1194             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1195        if (mp_ierr /= 0) goto 600
1196        call MPI_GATHER(idt, numpart_mpi, MPI_INTEGER, idt, &
1197             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1198        if (mp_ierr /= 0) goto 600
1199        call MPI_GATHER(itra1, numpart_mpi, MPI_INTEGER, itra1, &
1200             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1201        if (mp_ierr /= 0) goto 600
1202        call MPI_GATHER(nclass, numpart_mpi, MPI_INTEGER, nclass, &
1203             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1204        if (mp_ierr /= 0) goto 600
1205        call MPI_GATHER(itramem, numpart_mpi, MPI_INTEGER, itramem, &
1206             &numpart_mpi, MPI_INTEGER, id_root, mp_comm_used, mp_ierr)
1207        if (mp_ierr /= 0) goto 600
1208
1209! int2
1210        call MPI_GATHER(cbt, numpart_mpi, MPI_INTEGER2, cbt, &
1211             &numpart_mpi, MPI_INTEGER2, id_root, mp_comm_used, mp_ierr)
1212        if (mp_ierr /= 0) goto 600
1213
1214! Reals:
1215        call MPI_GATHER(uap, numpart_mpi, mp_sp, uap, &
1216             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1217        if (mp_ierr /= 0) goto 600
1218        call MPI_GATHER(ucp, numpart_mpi, mp_sp, ucp, &
1219             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1220        if (mp_ierr /= 0) goto 600
1221        call MPI_GATHER(uzp, numpart_mpi, mp_sp, uzp, &
1222             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1223        if (mp_ierr /= 0) goto 600
1224
1225        call MPI_GATHER(us, numpart_mpi, mp_sp, us, &
1226             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1227        if (mp_ierr /= 0) goto 600
1228        call MPI_GATHER(vs, numpart_mpi, mp_sp, vs, &
1229             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1230        if (mp_ierr /= 0) goto 600
1231        call MPI_GATHER(ws, numpart_mpi, mp_sp, ws, &
1232             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1233        if (mp_ierr /= 0) goto 600
1234
1235
1236        call MPI_GATHER(xtra1, numpart_mpi, mp_dp, xtra1, &
1237             &numpart_mpi, mp_dp, id_root, mp_comm_used, mp_ierr)
1238        if (mp_ierr /= 0) goto 600
1239        call MPI_GATHER(ytra1, numpart_mpi, mp_dp, ytra1, &
1240             &numpart_mpi, mp_dp, id_root, mp_comm_used, mp_ierr)
1241        if (mp_ierr /= 0) goto 600
1242        call MPI_GATHER(ztra1, numpart_mpi, mp_sp, ztra1, &
1243             &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1244        if (mp_ierr /= 0) goto 600
1245
1246        do i=1, nspec
1247          call MPI_GATHER(xmass1(:,i), numpart_mpi, mp_sp, xmass1(:,i), &
1248               &numpart_mpi, mp_sp, id_root, mp_comm_used, mp_ierr)
1249          if (mp_ierr /= 0) goto 600
1250        end do
1251      end if ! (lroot)
1252    end if !  (use_in_place)
1253
1254    if (mp_measure_time) call mpif_mtime('commtime',1)
1255
1256    goto 601
1257
1258600 write(*,*) "mpi_mod> mp_ierr \= 0", mp_ierr
1259    stop
1260601 end subroutine mpif_tm_collect_vars
1261
1262
1263  subroutine mpif_gf_send_vars(memstat)
1264!*******************************************************************************
1265! DESCRIPTION
1266!   Distribute 'getfield' variables from reader process
1267!
1268!   Called from timemanager
1269!
1270! NOTE
1271!   This subroutine distributes windfields read from the reader process to
1272!   all other processes. Usually one set of fields are transfered, but at
1273!   first timestep when there are no fields in memory, both are transfered.
1274!   MPI_Bcast is used, so implicitly all processes are synchronized at this
1275!   step
1276!
1277!
1278!*******************************************************************************
1279    use com_mod
1280    use par_mod,only: numwfmem
1281
1282    implicit none
1283
1284    integer, intent(in) :: memstat
1285
1286! Common array sizes used for communications
1287    integer, parameter :: d3_size1 = nxmax*nymax*nzmax
1288    integer, parameter :: d3_size2 = nxmax*nymax*nuvzmax
1289    integer, parameter :: d2_size1 = nxmax*nymax
1290    integer, parameter :: d2_size2 = nxmax*nymax*maxspec
1291    integer, parameter :: d2_size3 = nxmax*nymax
1292    integer, parameter :: d1_size1 = maxwf
1293
1294    integer :: d3s1,d3s2,d2s1,d2s2
1295
1296! li,ui    lower,upper indices of fields to be transfered (1-3, ui>=li)
1297    integer :: li,ui
1298
1299! First time routine is called the unchangeable fields will be transfered   
1300    logical :: first_call=.true.
1301
1302!**********************************************************************
1303
1304! Sizes of arrays transfered
1305    d3s1=d3_size1
1306    d3s2=d3_size2
1307    d2s1=d2_size1
1308    d2s2=d2_size2
1309
1310! Decide which field(s) need to be transfered
1311    if (memstat.ge.32) then ! distribute 2 fields, to 1st/2nd indices
1312      li=1; ui=2
1313      d3s1=2*d3_size1
1314      d3s2=2*d3_size2
1315      d2s1=2*d2_size1
1316      d2s2=2*d2_size2
1317    else if (memstat.eq.17) then ! distribute 1 field, place on 1st index
1318      li=1; ui=1
1319    else if (memstat.eq.18) then ! distribute 1 field, place on 2nd index
1320      li=2; ui=2
1321    else if (memstat.eq.19) then ! distribute 1 field, place on 3nd index
1322      li=3; ui=3
1323    else
1324      write(*,*) '#### mpi_mod::mpif_gf_send_vars> ERROR: ', &
1325           & 'wrong value of memstat, exiting ####', memstat
1326      stop
1327    end if
1328
1329
1330! Time for MPI communications
1331    if (mp_measure_time) call mpif_mtime('commtime',0)
1332
1333! Send variables from getfield process (id_read) to other processes
1334!**********************************************************************
1335
1336! The non-reader processes need to know if cloud water was read.
1337    call MPI_Bcast(readclouds,1,MPI_LOGICAL,id_read,MPI_COMM_WORLD,mp_ierr)
1338    if (mp_ierr /= 0) goto 600
1339
1340! Static fields/variables sent only at startup
1341    if (first_call) then
1342      call MPI_Bcast(oro(:,:),d2_size3,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1343      if (mp_ierr /= 0) goto 600
1344      call MPI_Bcast(excessoro(:,:),d2_size3,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1345      if (mp_ierr /= 0) goto 600
1346      call MPI_Bcast(lsm(:,:),d2_size3,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1347      if (mp_ierr /= 0) goto 600
1348      call MPI_Bcast(xlanduse(:,:,:),d2_size3*numclass,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1349      if (mp_ierr /= 0) goto 600
1350      call MPI_Bcast(wftime,d1_size1,MPI_INTEGER,id_read,MPI_COMM_WORLD,mp_ierr)
1351      if (mp_ierr /= 0) goto 600
1352      call MPI_Bcast(numbwf,1,MPI_INTEGER,id_read,MPI_COMM_WORLD,mp_ierr)
1353      if (mp_ierr /= 0) goto 600
1354      call MPI_Bcast(nmixz,1,MPI_INTEGER,id_read,MPI_COMM_WORLD,mp_ierr)
1355      if (mp_ierr /= 0) goto 600
1356      call MPI_Bcast(height,nzmax,MPI_INTEGER,id_read,MPI_COMM_WORLD,mp_ierr)
1357      if (mp_ierr /= 0) goto 600
1358
1359      first_call=.false.
1360    endif
1361
1362    call MPI_Bcast(uu(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1363    if (mp_ierr /= 0) goto 600
1364    call MPI_Bcast(vv(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1365    if (mp_ierr /= 0) goto 600
1366    call MPI_Bcast(uupol(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1367    if (mp_ierr /= 0) goto 600
1368    call MPI_Bcast(vvpol(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1369    if (mp_ierr /= 0) goto 600
1370    call MPI_Bcast(ww(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1371    if (mp_ierr /= 0) goto 600
1372    call MPI_Bcast(tt(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1373    if (mp_ierr /= 0) goto 600
1374    call MPI_Bcast(rho(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1375    if (mp_ierr /= 0) goto 600
1376    call MPI_Bcast(drhodz(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1377    if (mp_ierr /= 0) goto 600
1378    call MPI_Bcast(tth(:,:,:,li:ui),d3s2,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1379    if (mp_ierr /= 0) goto 600
1380    call MPI_Bcast(qvh(:,:,:,li:ui),d3s2,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1381    if (mp_ierr /= 0) goto 600
1382    call MPI_Bcast(qv(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1383    if (mp_ierr /= 0) goto 600
1384    call MPI_Bcast(pv(:,:,:,li:ui),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1385    if (mp_ierr /= 0) goto 600
1386    call MPI_Bcast(clouds(:,:,:,li:ui),d3s1,MPI_INTEGER1,id_read,MPI_COMM_WORLD,mp_ierr)
1387    if (mp_ierr /= 0) goto 600
1388
1389! cloud water/ice:
1390    if (readclouds) then
1391      call MPI_Bcast(ctwc(:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1392      if (mp_ierr /= 0) goto 600
1393    end if
1394
1395! 2D fields
1396    call MPI_Bcast(cloudsh(:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1397    if (mp_ierr /= 0) goto 600
1398    call MPI_Bcast(vdep(:,:,:,li:ui),d2s2,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1399    if (mp_ierr /= 0) goto 600
1400    call MPI_Bcast(ps(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1401    if (mp_ierr /= 0) goto 600
1402    call MPI_Bcast(sd(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1403    if (mp_ierr /= 0) goto 600
1404    call MPI_Bcast(tcc(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1405    if (mp_ierr /= 0) goto 600
1406    call MPI_Bcast(tt2(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1407    if (mp_ierr /= 0) goto 600
1408    call MPI_Bcast(td2(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1409    if (mp_ierr /= 0) goto 600
1410    call MPI_Bcast(lsprec(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1411    if (mp_ierr /= 0) goto 600
1412    call MPI_Bcast(convprec(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1413    if (mp_ierr /= 0) goto 600
1414    call MPI_Bcast(ustar(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1415    if (mp_ierr /= 0) goto 600
1416    call MPI_Bcast(wstar(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1417    if (mp_ierr /= 0) goto 600
1418    call MPI_Bcast(hmix(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1419    if (mp_ierr /= 0) goto 600
1420    call MPI_Bcast(tropopause(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1421    if (mp_ierr /= 0) goto 600
1422    call MPI_Bcast(oli(:,:,:,li:ui),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1423    if (mp_ierr /= 0) goto 600
1424
1425    if (mp_measure_time) call mpif_mtime('commtime',1)
1426
1427    goto 601
1428
1429600 write(*,*) "mpi_mod> mp_ierr \= 0", mp_ierr
1430    stop
1431
1432601 end subroutine mpif_gf_send_vars
1433
1434
1435  subroutine mpif_gf_send_vars_nest(memstat)
1436!***********************************************************************
1437! DESCRIPTION
1438!   Distribute 'getfield' variables from reader process to all processes.
1439!   For nested fields
1440!
1441!   Called from timemanager
1442!
1443! NOTE
1444!   This subroutine distributes nested windfields from the reader process to
1445!   all other processes. Usually one set of fields is transfered, but at
1446!   first timestep when there are no fields in memory, both are transfered.
1447!   MPI_Bcast is used, so implicitly all processes are synchronized at this
1448!   step
1449!
1450!
1451!***********************************************************************
1452    use com_mod
1453    use par_mod,only: numwfmem
1454
1455    implicit none
1456
1457    integer, intent(in) :: memstat
1458    integer :: i
1459! li,ui    lower,upper indices of fields to be transfered (1-3, ui>=li)
1460    integer :: li,ui
1461
1462! Common array sizes used for communications
1463    integer :: d3_size1 = nxmaxn*nymaxn*nzmax
1464    integer :: d3_size2 = nxmaxn*nymaxn*nuvzmax
1465    integer :: d2_size1 = nxmaxn*nymaxn
1466    integer :: d2_size2 = nxmaxn*nymaxn*maxspec
1467    integer :: d2_size3 = nxmaxn*nymaxn
1468
1469    integer :: d3s1,d3s2,d2s1,d2s2
1470
1471! First time routine is called the unchangeable fields will be transfered   
1472    logical :: first_call=.true.
1473
1474!**********************************************************************
1475
1476! Sizes of arrays transfered
1477    d3s1=d3_size1
1478    d3s2=d3_size2
1479    d2s1=d2_size1
1480    d2s2=d2_size2
1481
1482! Decide which field(s) need to be transfered
1483    if (memstat.ge.32) then ! distribute 2 fields
1484      li=1; ui=2
1485      d3s1=2*d3_size1
1486      d3s2=2*d3_size2
1487      d2s1=2*d2_size1
1488      d2s2=2*d2_size2
1489    else if (memstat.eq.17) then ! distribute 1 field, on 1st index
1490      li=1; ui=1
1491    else if (memstat.eq.18) then ! distribute 1 field, on 2nd index
1492      li=2; ui=2
1493    else if (memstat.eq.19) then ! distribute 1 field, on 3nd index
1494      li=3; ui=3
1495    else
1496      write(*,*) '#### mpi_mod::mpif_gf_send_vars_nest> ERROR: ', &
1497           & 'wrong value of memstat, exiting ####', memstat
1498      stop
1499    end if
1500
1501
1502! Time for MPI communications
1503    if (mp_measure_time) call mpif_mtime('commtime',0)
1504
1505! Distribute variables, send from getfield process (id_read) to other
1506! processes
1507!**********************************************************************
1508
1509! The non-reader processes need to know if cloud water was read.
1510    call MPI_Bcast(readclouds_nest,maxnests,MPI_LOGICAL,id_read,MPI_COMM_WORLD,mp_ierr)
1511    if (mp_ierr /= 0) goto 600
1512
1513! Static fields/variables sent only at startup
1514    if (first_call) then
1515      call MPI_Bcast(oron(:,:,:),d2_size3,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1516      if (mp_ierr /= 0) goto 600
1517      call MPI_Bcast(excessoron(:,:,:),d2_size3,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1518      if (mp_ierr /= 0) goto 600
1519      call MPI_Bcast(lsmn(:,:,:),d2_size3,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1520      if (mp_ierr /= 0) goto 600
1521      call MPI_Bcast(xlandusen(:,:,:,:),d2_size3*numclass,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1522      if (mp_ierr /= 0) goto 600
1523      first_call=.false.
1524    end if
1525
1526! MPI prefers contiguous arrays for sending (else a buffer is created),
1527! hence the loop over nests
1528!**********************************************************************
1529    do i=1, numbnests
1530! 3D fields
1531      call MPI_Bcast(uun(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1532      if (mp_ierr /= 0) goto 600
1533      call MPI_Bcast(vvn(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1534      if (mp_ierr /= 0) goto 600
1535      call MPI_Bcast(wwn(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1536      if (mp_ierr /= 0) goto 600
1537      call MPI_Bcast(ttn(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1538      if (mp_ierr /= 0) goto 600
1539      call MPI_Bcast(rhon(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1540      if (mp_ierr /= 0) goto 600
1541      call MPI_Bcast(drhodzn(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1542      if (mp_ierr /= 0) goto 600
1543      call MPI_Bcast(tthn(:,:,:,li:ui,i),d3s2,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1544      if (mp_ierr /= 0) goto 600
1545      call MPI_Bcast(qvhn(:,:,:,li:ui,i),d3s2,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1546      if (mp_ierr /= 0) goto 600
1547      call MPI_Bcast(qvn(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1548      if (mp_ierr /= 0) goto 600
1549      call MPI_Bcast(pvn(:,:,:,li:ui,i),d3s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1550      if (mp_ierr /= 0) goto 600
1551      call MPI_Bcast(cloudsn(:,:,:,li:ui,i),d3s1,MPI_INTEGER1,id_read,MPI_COMM_WORLD,mp_ierr)
1552      if (mp_ierr /= 0) goto 600
1553
1554! cloud water/ice:
1555      if (readclouds_nest(i)) then
1556! call MPI_Bcast(icloud_stats(:,:,:,li:ui),d2s1*5,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1557! if (mp_ierr /= 0) goto 600
1558        call MPI_Bcast(ctwcn(:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1559        if (mp_ierr /= 0) goto 600
1560      end if
1561
1562! 2D fields
1563      call MPI_Bcast(cloudshn(:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1564      if (mp_ierr /= 0) goto 600
1565      call MPI_Bcast(vdepn(:,:,:,li:ui,i),d2s2,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1566      if (mp_ierr /= 0) goto 600
1567      call MPI_Bcast(psn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1568      if (mp_ierr /= 0) goto 600
1569      call MPI_Bcast(sdn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1570      if (mp_ierr /= 0) goto 600
1571      call MPI_Bcast(tccn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1572      if (mp_ierr /= 0) goto 600
1573      call MPI_Bcast(tt2n(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1574      if (mp_ierr /= 0) goto 600
1575      call MPI_Bcast(td2n(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1576      if (mp_ierr /= 0) goto 600
1577      call MPI_Bcast(lsprecn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1578      if (mp_ierr /= 0) goto 600
1579      call MPI_Bcast(convprecn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1580      if (mp_ierr /= 0) goto 600
1581      call MPI_Bcast(ustarn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1582      if (mp_ierr /= 0) goto 600
1583      call MPI_Bcast(wstarn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1584      if (mp_ierr /= 0) goto 600
1585      call MPI_Bcast(olin(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1586      if (mp_ierr /= 0) goto 600
1587      call MPI_Bcast(hmixn(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1588      if (mp_ierr /= 0) goto 600
1589      call MPI_Bcast(tropopausen(:,:,:,li:ui,i),d2s1,mp_sp,id_read,MPI_COMM_WORLD,mp_ierr)
1590      if (mp_ierr /= 0) goto 600
1591    end do
1592
1593    if (mp_measure_time) call mpif_mtime('commtime',1)
1594
1595    goto 601
1596
1597600 write(*,*) "mpi_mod> mp_ierr \= 0",mp_ierr
1598    stop
1599
1600601 end subroutine mpif_gf_send_vars_nest
1601
1602
1603  subroutine mpif_gf_send_vars_async(memstat)
1604!*******************************************************************************
1605! DESCRIPTION
1606!   Distribute 'getfield' variables from reader process to all processes.
1607!   Called from timemanager by reader process only
1608!
1609! NOTE
1610!   This version uses asynchronious sends. The newest fields are sent in the
1611!   background, so calculations can continue while
1612!   MPI communications are performed.
1613!
1614!   The way the MPI tags/requests are sequenced means that this routine must
1615!   carefully match routine 'mpif_gf_recv_vars_async'
1616!
1617! VARIABLES
1618!   memstat -- input, for resolving pointer to windfield index being read
1619!   mind    -- index where to place new fields
1620!
1621!
1622!*******************************************************************************
1623    use com_mod
1624
1625    implicit none
1626
1627    integer, intent(in) :: memstat
1628    integer :: mind
1629    integer :: dest,i
1630
1631! Common array sizes used for communications
1632    integer :: d3s1 = nxmax*nymax*nzmax
1633    integer :: d3s2 = nxmax*nymax*nuvzmax
1634    integer :: d2s1 = nxmax*nymax
1635    integer :: d2s2 = nxmax*nymax*maxspec
1636!    integer :: d1s1 = maxwf
1637
1638!*******************************************************************************
1639
1640! At the time the send is posted, the reader process is one step further
1641! in the permutation of memstat compared with the receiving processes
1642
1643    if (memstat.ge.32) then
1644! last read was synchronous, to indices 1 and 2, 3 is free
1645      write(*,*) "#### mpi_mod::mpif_gf_send_vars_async> ERROR: &
1646           & memstat>=32 should never happen here."
1647      stop
1648    else if (memstat.eq.17) then
1649! old fields on 1,2, send 3
1650      mind=3
1651    else if (memstat.eq.18) then
1652! old fields on 2,3, send 1
1653      mind=1
1654    else if (memstat.eq.19) then
1655! old fields on 3,1, send 2
1656      mind=2
1657    else
1658      write(*,*) "#### mpi_mod::mpif_gf_send_vars_async> ERROR: &
1659           & invalid memstat"
1660      mind=-1
1661      stop
1662    end if
1663
1664    if (mp_dev_mode) then
1665      if (mp_pid.ne.id_read) then
1666        write(*,*) 'MPI_DEV_MODE: error calling mpif_gf_send_vars_async'
1667      end if
1668    end if
1669
1670    if (mp_dev_mode) write(*,*) '## in mpif_gf_send_vars_async, memstat:', memstat
1671
1672! Time for MPI communications
1673    if (mp_measure_time) call mpif_mtime('commtime',0)
1674
1675! Loop over receiving processes, initiate data sending
1676!*****************************************************
1677
1678    do dest=0,mp_np-1 ! mp_np-2 will also work if last proc reserved for reading
1679! TODO: use mp_partgroup_np here
1680      if (dest.eq.id_read) cycle
1681      i=dest*nvar_async
1682      call MPI_Isend(uu(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1683      if (mp_ierr /= 0) goto 600
1684      i=i+1
1685      call MPI_Isend(vv(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1686      if (mp_ierr /= 0) goto 600
1687      i=i+1
1688      call MPI_Isend(uupol(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1689      if (mp_ierr /= 0) goto 600
1690      i=i+1
1691      call MPI_Isend(vvpol(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1692      if (mp_ierr /= 0) goto 600
1693      i=i+1
1694      call MPI_Isend(ww(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1695      if (mp_ierr /= 0) goto 600
1696      i=i+1
1697      call MPI_Isend(tt(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1698      if (mp_ierr /= 0) goto 600
1699      i=i+1
1700      call MPI_Isend(rho(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1701      if (mp_ierr /= 0) goto 600
1702      i=i+1
1703      call MPI_Isend(drhodz(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1704      if (mp_ierr /= 0) goto 600
1705      i=i+1
1706      call MPI_Isend(tth(:,:,:,mind),d3s2,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1707      if (mp_ierr /= 0) goto 600
1708      i=i+1
1709      call MPI_Isend(qvh(:,:,:,mind),d3s2,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1710      if (mp_ierr /= 0) goto 600
1711      i=i+1
1712      call MPI_Isend(qv(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1713      if (mp_ierr /= 0) goto 600
1714      i=i+1
1715      call MPI_Isend(pv(:,:,:,mind),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1716      if (mp_ierr /= 0) goto 600
1717      i=i+1
1718      call MPI_Isend(clouds(:,:,:,mind),d3s1,MPI_INTEGER1,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1719      i=i+1
1720      if (mp_ierr /= 0) goto 600
1721      call MPI_Isend(cloudsh(:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1722      if (mp_ierr /= 0) goto 600
1723      i=i+1
1724      call MPI_Isend(vdep(:,:,:,mind),d2s2,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1725      if (mp_ierr /= 0) goto 600
1726      i=i+1
1727! 15
1728      call MPI_Isend(ps(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1729      if (mp_ierr /= 0) goto 600
1730      i=i+1
1731      call MPI_Isend(sd(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1732      if (mp_ierr /= 0) goto 600
1733      i=i+1
1734      call MPI_Isend(tcc(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1735      if (mp_ierr /= 0) goto 600
1736      i=i+1
1737      call MPI_Isend(tt2(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1738      if (mp_ierr /= 0) goto 600
1739      i=i+1
1740      call MPI_Isend(td2(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1741      if (mp_ierr /= 0) goto 600
1742      i=i+1
1743      call MPI_Isend(lsprec(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1744      if (mp_ierr /= 0) goto 600
1745      i=i+1
1746      call MPI_Isend(convprec(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1747      if (mp_ierr /= 0) goto 600
1748      i=i+1
1749      call MPI_Isend(ustar(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1750      if (mp_ierr /= 0) goto 600
1751      i=i+1
1752      call MPI_Isend(wstar(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1753      if (mp_ierr /= 0) goto 600
1754      i=i+1
1755      call MPI_Isend(hmix(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1756      if (mp_ierr /= 0) goto 600
1757      i=i+1
1758! 25
1759      call MPI_Isend(tropopause(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1760      if (mp_ierr /= 0) goto 600
1761      i=i+1
1762      call MPI_Isend(oli(:,:,:,mind),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
1763      if (mp_ierr /= 0) goto 600
1764
1765! Send cloud water if it exists. Increment counter always (as on receiving end)
1766      if (readclouds) then
1767        i=i+1
1768        call MPI_Isend(ctwc(:,:,mind),d2s1,mp_sp,dest,tm1,&
1769             &MPI_COMM_WORLD,reqs(i),mp_ierr)
1770        if (mp_ierr /= 0) goto 600
1771      end if
1772    end do
1773
1774    if (mp_measure_time) call mpif_mtime('commtime',1)
1775
1776    goto 601
1777
1778600 write(*,*) "#### mpi_mod::mpif_gf_send_vars_async> mp_ierr \= 0", mp_ierr
1779    stop
1780
1781601 end subroutine mpif_gf_send_vars_async
1782
1783
1784  subroutine mpif_gf_recv_vars_async(memstat)
1785!*******************************************************************************
1786! DESCRIPTION
1787!   Receive 'getfield' variables from reader process.
1788!   Called from timemanager by all processes except reader
1789!
1790! NOTE
1791!   This version uses asynchronious communications.
1792!
1793! VARIABLES
1794!   memstat -- input, used to resolve windfield index being received
1795!
1796!
1797!*******************************************************************************
1798    use com_mod
1799
1800    implicit none
1801
1802    integer, intent(in) :: memstat
1803    integer :: mind,j
1804
1805! Common array sizes used for communications
1806    integer :: d3s1 = nxmax*nymax*nzmax
1807    integer :: d3s2 = nxmax*nymax*nuvzmax
1808    integer :: d2s1 = nxmax*nymax
1809    integer :: d2s2 = nxmax*nymax*maxspec
1810!integer :: d1_size1 = maxwf
1811
1812!    integer :: d3s1,d3s2,d2s1,d2s2
1813!*******************************************************************************
1814
1815! At the time this immediate receive is posted, memstat is the state of
1816! windfield indices at the previous/current time. From this, the future
1817! state is deduced.
1818    if (memstat.eq.32) then
1819! last read was synchronous to indices 1 and 2, 3 is free
1820      mind=3
1821    else if (memstat.eq.17) then
1822! last read was asynchronous to index 3, 1 is free
1823      mind=1
1824    else if (memstat.eq.18) then
1825! last read was asynchronous to index 1, 2 is free
1826      mind=2
1827    else if (memstat.eq.19) then
1828! last read was asynchronous to index 2, 3 is free
1829      mind=3
1830    else
1831! illegal state
1832      write(*,*) 'mpi_mod> FLEXPART ERROR: Illegal memstat value. Exiting.'
1833      stop
1834    end if
1835
1836    if (mp_dev_mode) then
1837      if (mp_pid.eq.id_read) then
1838        write(*,*) 'MPI_DEV_MODE: error calling mpif_gf_recv_vars_async'
1839      end if
1840    end if
1841
1842! Time for MPI communications
1843    if (mp_measure_time) call mpif_mtime('commtime',0)
1844
1845    if (mp_dev_mode) write(*,*) '## in mpif_gf_send_vars_async, memstat:', memstat
1846
1847! Initiate receiving of data
1848!***************************
1849
1850! Get MPI tags/requests for communications
1851    j=mp_pid*nvar_async
1852    call MPI_Irecv(uu(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1853         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1854    if (mp_ierr /= 0) goto 600
1855    j=j+1
1856    call MPI_Irecv(vv(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1857         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1858    if (mp_ierr /= 0) goto 600
1859    j=j+1
1860    call MPI_Irecv(uupol(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1861         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1862    if (mp_ierr /= 0) goto 600
1863    j=j+1
1864    call MPI_Irecv(vvpol(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1865         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1866    if (mp_ierr /= 0) goto 600
1867    j=j+1
1868    call MPI_Irecv(ww(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1869         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1870    if (mp_ierr /= 0) goto 600
1871    j=j+1
1872    call MPI_Irecv(tt(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1873         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1874    if (mp_ierr /= 0) goto 600
1875    j=j+1
1876    call MPI_Irecv(rho(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1877         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1878    if (mp_ierr /= 0) goto 600
1879    j=j+1
1880    call MPI_Irecv(drhodz(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1881         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1882    if (mp_ierr /= 0) goto 600
1883    j=j+1
1884    call MPI_Irecv(tth(:,:,:,mind),d3s2,mp_sp,id_read,MPI_ANY_TAG,&
1885         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1886    if (mp_ierr /= 0) goto 600
1887    j=j+1
1888    call MPI_Irecv(qvh(:,:,:,mind),d3s2,mp_sp,id_read,MPI_ANY_TAG,&
1889         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1890    if (mp_ierr /= 0) goto 600
1891    j=j+1
1892    call MPI_Irecv(qv(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1893         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1894    if (mp_ierr /= 0) goto 600
1895    j=j+1
1896    call MPI_Irecv(pv(:,:,:,mind),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
1897         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1898    if (mp_ierr /= 0) goto 600
1899    j=j+1
1900    call MPI_Irecv(clouds(:,:,:,mind),d3s1,MPI_INTEGER1,id_read,MPI_ANY_TAG,&
1901         &MPI_COMM_WORLD,reqs(j),mp_ierr)   
1902    if (mp_ierr /= 0) goto 600
1903    j=j+1
1904    call MPI_Irecv(cloudsh(:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1905         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1906    if (mp_ierr /= 0) goto 600
1907    j=j+1
1908    call MPI_Irecv(vdep(:,:,:,mind),d2s2,mp_sp,id_read,MPI_ANY_TAG,&
1909         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1910    if (mp_ierr /= 0) goto 600
1911    j=j+1
1912    call MPI_Irecv(ps(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1913         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1914    if (mp_ierr /= 0) goto 600
1915    j=j+1
1916    call MPI_Irecv(sd(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1917         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1918    if (mp_ierr /= 0) goto 600
1919    j=j+1
1920    call MPI_Irecv(tcc(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1921         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1922    if (mp_ierr /= 0) goto 600
1923    j=j+1
1924    call MPI_Irecv(tt2(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1925         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1926    if (mp_ierr /= 0) goto 600
1927    j=j+1
1928    call MPI_Irecv(td2(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1929         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1930    if (mp_ierr /= 0) goto 600
1931    j=j+1
1932    call MPI_Irecv(lsprec(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1933         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1934    if (mp_ierr /= 0) goto 600
1935    j=j+1
1936    call MPI_Irecv(convprec(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1937         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1938    if (mp_ierr /= 0) goto 600
1939    call MPI_Irecv(ustar(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1940         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1941    if (mp_ierr /= 0) goto 600
1942    j=j+1
1943    call MPI_Irecv(wstar(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1944         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1945    if (mp_ierr /= 0) goto 600
1946    j=j+1
1947    call MPI_Irecv(hmix(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1948         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1949    if (mp_ierr /= 0) goto 600
1950    j=j+1
1951    call MPI_Irecv(tropopause(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1952         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1953    if (mp_ierr /= 0) goto 600
1954    j=j+1
1955    call MPI_Irecv(oli(:,:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
1956         &MPI_COMM_WORLD,reqs(j),mp_ierr)
1957    if (mp_ierr /= 0) goto 600
1958
1959! Post request for clwc. These data are possibly not sent, request must then be cancelled
1960! For now assume that data at all steps either have or do not have water
1961    if (readclouds) then
1962      j=j+1
1963      call MPI_Irecv(ctwc(:,:,mind),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,&
1964           &MPI_COMM_WORLD,reqs(j),mp_ierr)
1965      if (mp_ierr /= 0) goto 600
1966    end if
1967
1968
1969    if (mp_measure_time) call mpif_mtime('commtime',1)
1970
1971    goto 601
1972
1973600 write(*,*) "#### mpi_mod::mpif_gf_recv_vars_async> MPI ERROR ", mp_ierr
1974    stop
1975
1976601 end subroutine mpif_gf_recv_vars_async
1977
1978
1979  subroutine mpif_gf_send_vars_nest_async(memstat)
1980!*******************************************************************************
1981! DESCRIPTION
1982!   Distribute 'getfield' variables from reader process to all processes.
1983!   Called from timemanager by reader process only.
1984!   Version for nested wind fields
1985!
1986! NOTE
1987!   This version uses asynchronious sends. The newest fields are sent in the
1988!   background, so calculations can continue while
1989!   MPI communications are performed.
1990!
1991!   The way the MPI tags/requests are sequenced means that this routine must
1992!   carefully match routine 'mpif_gf_recv_vars_async'
1993!
1994! VARIABLES
1995!   memstat -- input, for resolving pointer to windfield index being read
1996!   mind    -- index where to place new fields
1997!
1998! TODO
1999!   Some unused arrays are currently sent (uupoln,..)
2000!*******************************************************************************
2001    use com_mod
2002
2003    implicit none
2004
2005    integer, intent(in) :: memstat
2006    integer :: mind
2007    integer :: dest,i,k
2008
2009! Common array sizes used for communications
2010    integer :: d3s1 = nxmaxn*nymaxn*nzmax
2011    integer :: d3s2 = nxmaxn*nymaxn*nuvzmax
2012    integer :: d2s1 = nxmaxn*nymaxn
2013    integer :: d2s2 = nxmaxn*nymaxn*maxspec
2014
2015!*******************************************************************************
2016
2017! At the time the send is posted, the reader process is one step further
2018! in the permutation of memstat compared with the receiving processes
2019
2020    if (memstat.ge.32) then
2021! last read was synchronous, to indices 1 and 2, 3 is free
2022      write(*,*) "#### mpi_mod::mpif_gf_send_vars_nest_async> ERROR: &
2023           & memstat>=32 should never happen here."
2024      stop
2025    else if (memstat.eq.17) then
2026! old fields on 1,2, send 3
2027      mind=3
2028    else if (memstat.eq.18) then
2029! old fields on 2,3, send 1
2030      mind=1
2031    else if (memstat.eq.19) then
2032! old fields on 3,1, send 2
2033      mind=2
2034    else
2035      write(*,*) "#### mpi_mod::mpif_gf_send_vars_nest_async> ERROR: &
2036           & invalid memstat"
2037      mind=-1
2038      stop
2039    end if
2040
2041    if (mp_dev_mode) then
2042      if (mp_pid.ne.id_read) then
2043        write(*,*) 'MPI_DEV_MODE: error calling mpif_gf_send_vars_nest_async'
2044      end if
2045    end if
2046
2047    if (mp_dev_mode) write(*,*) '## in mpif_gf_send_vars_nest_async, memstat:', memstat
2048
2049! Time for MPI communications
2050    if (mp_measure_time) call mpif_mtime('commtime',0)
2051
2052! Loop over receiving processes, initiate data sending
2053!*****************************************************
2054
2055    do dest=0,mp_np-1 ! mp_np-2 will also work if last proc reserved for reading
2056! TODO: use mp_partgroup_np here
2057      if (dest.eq.id_read) cycle
2058      do k=1, numbnests
2059        i=dest*nvar_async
2060        call MPI_Isend(uun(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2061        if (mp_ierr /= 0) goto 600
2062        i=i+1
2063        call MPI_Isend(vvn(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2064        if (mp_ierr /= 0) goto 600
2065        i=i+1
2066        call MPI_Isend(wwn(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2067        if (mp_ierr /= 0) goto 600
2068        i=i+1
2069        call MPI_Isend(ttn(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2070        if (mp_ierr /= 0) goto 600
2071        i=i+1
2072        call MPI_Isend(rhon(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2073        if (mp_ierr /= 0) goto 600
2074        i=i+1
2075        call MPI_Isend(drhodzn(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2076        if (mp_ierr /= 0) goto 600
2077        i=i+1
2078        call MPI_Isend(tthn(:,:,:,mind,k),d3s2,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2079        if (mp_ierr /= 0) goto 600
2080        i=i+1
2081        call MPI_Isend(qvhn(:,:,:,mind,k),d3s2,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2082        if (mp_ierr /= 0) goto 600
2083        i=i+1
2084        call MPI_Isend(qvn(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2085        if (mp_ierr /= 0) goto 600
2086        i=i+1
2087        call MPI_Isend(pvn(:,:,:,mind,k),d3s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2088        if (mp_ierr /= 0) goto 600
2089        i=i+1
2090        call MPI_Isend(cloudsn(:,:,:,mind,k),d3s1,MPI_INTEGER1,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2091        i=i+1
2092        if (mp_ierr /= 0) goto 600
2093        call MPI_Isend(cloudshn(:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2094        if (mp_ierr /= 0) goto 600
2095        i=i+1
2096        call MPI_Isend(vdepn(:,:,:,mind,k),d2s2,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2097        if (mp_ierr /= 0) goto 600
2098        i=i+1
2099        call MPI_Isend(psn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2100        if (mp_ierr /= 0) goto 600
2101        i=i+1
2102        call MPI_Isend(sdn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2103        if (mp_ierr /= 0) goto 600
2104        i=i+1
2105! 15
2106        call MPI_Isend(tccn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2107        if (mp_ierr /= 0) goto 600
2108        i=i+1
2109        call MPI_Isend(tt2n(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2110        if (mp_ierr /= 0) goto 600
2111        i=i+1
2112        call MPI_Isend(td2n(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2113        if (mp_ierr /= 0) goto 600
2114        i=i+1
2115        call MPI_Isend(lsprecn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2116        if (mp_ierr /= 0) goto 600
2117        i=i+1
2118        call MPI_Isend(convprecn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2119        if (mp_ierr /= 0) goto 600
2120        i=i+1
2121        call MPI_Isend(ustarn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2122        if (mp_ierr /= 0) goto 600
2123        i=i+1
2124        call MPI_Isend(wstarn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2125        if (mp_ierr /= 0) goto 600
2126        i=i+1
2127        call MPI_Isend(hmixn(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2128        if (mp_ierr /= 0) goto 600
2129        i=i+1
2130        call MPI_Isend(tropopausen(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2131        if (mp_ierr /= 0) goto 600
2132        i=i+1
2133        call MPI_Isend(olin(:,:,:,mind,k),d2s1,mp_sp,dest,tm1,MPI_COMM_WORLD,reqs(i),mp_ierr)
2134        if (mp_ierr /= 0) goto 600
2135! 25
2136
2137! Send cloud water if it exists. Increment counter always (as on receiving end)
2138        if (readclouds) then
2139          i=i+1
2140          call MPI_Isend(ctwcn(:,:,mind,k),d2s1,mp_sp,dest,tm1,&
2141               &MPI_COMM_WORLD,reqs(i),mp_ierr)
2142          if (mp_ierr /= 0) goto 600
2143        end if
2144      end do
2145    end do
2146
2147    if (mp_measure_time) call mpif_mtime('commtime',1)
2148
2149    goto 601
2150
2151600 write(*,*) "#### mpi_mod::mpif_gf_send_vars_nest_async> mp_ierr \= 0", mp_ierr
2152    stop
2153
2154601 end subroutine mpif_gf_send_vars_nest_async
2155
2156
2157  subroutine mpif_gf_recv_vars_nest_async(memstat)
2158!*******************************************************************************
2159! DESCRIPTION
2160!   Receive 'getfield' variables from reader process.
2161!   Called from timemanager by all processes except reader
2162!   Version for nested wind fields
2163!
2164! NOTE
2165!   This version uses asynchronious communications.
2166!
2167! VARIABLES
2168!   memstat -- input, used to resolve windfield index being received
2169!
2170!
2171!*******************************************************************************
2172    use com_mod
2173
2174    implicit none
2175
2176    integer, intent(in) :: memstat
2177    integer :: mind,j,k
2178
2179! Common array sizes used for communications
2180    integer :: d3s1 = nxmaxn*nymaxn*nzmax
2181    integer :: d3s2 = nxmaxn*nymaxn*nuvzmax
2182    integer :: d2s1 = nxmaxn*nymaxn
2183    integer :: d2s2 = nxmaxn*nymaxn*maxspec
2184
2185!*******************************************************************************
2186
2187! At the time this immediate receive is posted, memstat is the state of
2188! windfield indices at the previous/current time. From this, the future
2189! state is deduced.
2190    if (memstat.eq.32) then
2191! last read was synchronous to indices 1 and 2, 3 is free
2192      mind=3
2193    else if (memstat.eq.17) then
2194! last read was asynchronous to index 3, 1 is free
2195      mind=1
2196    else if (memstat.eq.18) then
2197! last read was asynchronous to index 1, 2 is free
2198      mind=2
2199    else if (memstat.eq.19) then
2200! last read was asynchronous to index 2, 3 is free
2201      mind=3
2202    else
2203! illegal state
2204      write(*,*) 'mpi_mod> FLEXPART ERROR: Illegal memstat value. Exiting.'
2205      stop
2206    end if
2207
2208    if (mp_dev_mode) then
2209      if (mp_pid.eq.id_read) then
2210        write(*,*) 'MPI_DEV_MODE: error calling mpif_gf_recv_vars_async'
2211      end if
2212    end if
2213
2214! Time for MPI communications
2215    if (mp_measure_time) call mpif_mtime('commtime',0)
2216
2217    if (mp_dev_mode) write(*,*) '## in mpif_gf_send_vars_async, memstat:', memstat
2218
2219! Initiate receiving of data
2220!***************************
2221
2222    do k=1, numbnests
2223! Get MPI tags/requests for communications
2224      j=mp_pid*nvar_async
2225      call MPI_Irecv(uun(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2226           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2227      if (mp_ierr /= 0) goto 600
2228      j=j+1
2229      call MPI_Irecv(vvn(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2230           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2231      if (mp_ierr /= 0) goto 600
2232      j=j+1
2233      call MPI_Irecv(wwn(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2234           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2235      if (mp_ierr /= 0) goto 600
2236      j=j+1
2237      call MPI_Irecv(ttn(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2238           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2239      if (mp_ierr /= 0) goto 600
2240      j=j+1
2241      call MPI_Irecv(rhon(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2242           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2243      if (mp_ierr /= 0) goto 600
2244      j=j+1
2245      call MPI_Irecv(drhodzn(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2246           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2247      if (mp_ierr /= 0) goto 600
2248      j=j+1
2249      call MPI_Irecv(tthn(:,:,:,mind,k),d3s2,mp_sp,id_read,MPI_ANY_TAG,&
2250           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2251      if (mp_ierr /= 0) goto 600
2252      j=j+1
2253      call MPI_Irecv(qvhn(:,:,:,mind,k),d3s2,mp_sp,id_read,MPI_ANY_TAG,&
2254           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2255      if (mp_ierr /= 0) goto 600
2256      j=j+1
2257      call MPI_Irecv(qvn(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2258           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2259      if (mp_ierr /= 0) goto 600
2260      j=j+1
2261      call MPI_Irecv(pvn(:,:,:,mind,k),d3s1,mp_sp,id_read,MPI_ANY_TAG,&
2262           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2263      if (mp_ierr /= 0) goto 600
2264      j=j+1
2265      call MPI_Irecv(cloudsn(:,:,:,mind,k),d3s1,MPI_INTEGER1,id_read,MPI_ANY_TAG,&
2266           &MPI_COMM_WORLD,reqs(j),mp_ierr)   
2267      if (mp_ierr /= 0) goto 600
2268      j=j+1
2269      call MPI_Irecv(cloudshn(:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2270           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2271      if (mp_ierr /= 0) goto 600
2272      j=j+1
2273      call MPI_Irecv(vdepn(:,:,:,mind,k),d2s2,mp_sp,id_read,MPI_ANY_TAG,&
2274           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2275      if (mp_ierr /= 0) goto 600
2276      j=j+1
2277      call MPI_Irecv(psn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2278           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2279      if (mp_ierr /= 0) goto 600
2280      j=j+1
2281      call MPI_Irecv(sdn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2282           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2283      if (mp_ierr /= 0) goto 600
2284      j=j+1
2285      call MPI_Irecv(tccn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2286           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2287      if (mp_ierr /= 0) goto 600
2288      j=j+1
2289      call MPI_Irecv(tt2n(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2290           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2291      if (mp_ierr /= 0) goto 600
2292      j=j+1
2293      call MPI_Irecv(td2n(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2294           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2295      if (mp_ierr /= 0) goto 600
2296      j=j+1
2297      call MPI_Irecv(lsprecn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2298           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2299      if (mp_ierr /= 0) goto 600
2300      j=j+1
2301      call MPI_Irecv(convprecn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2302           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2303      if (mp_ierr /= 0) goto 600
2304      call MPI_Irecv(ustarn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2305           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2306      if (mp_ierr /= 0) goto 600
2307      j=j+1
2308      call MPI_Irecv(wstarn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2309           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2310      if (mp_ierr /= 0) goto 600
2311      j=j+1
2312      call MPI_Irecv(hmixn(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2313           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2314      if (mp_ierr /= 0) goto 600
2315      j=j+1
2316      call MPI_Irecv(tropopausen(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2317           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2318      if (mp_ierr /= 0) goto 600
2319      j=j+1
2320      call MPI_Irecv(olin(:,:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
2321           &MPI_COMM_WORLD,reqs(j),mp_ierr)
2322      if (mp_ierr /= 0) goto 600
2323
2324! Post request for clwc. These data are possibly not sent, request must then be cancelled
2325! For now assume that data at all steps either have or do not have water
2326      if (readclouds) then
2327        j=j+1
2328        call MPI_Irecv(ctwcn(:,:,mind,k),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,&
2329             &MPI_COMM_WORLD,reqs(j),mp_ierr)
2330        if (mp_ierr /= 0) goto 600
2331      end if
2332    end do
2333
2334    if (mp_measure_time) call mpif_mtime('commtime',1)
2335
2336    goto 601
2337
2338600 write(*,*) "#### mpi_mod::mpif_gf_recv_vars_nest_async> MPI ERROR ", mp_ierr
2339    stop
2340
2341601 end subroutine mpif_gf_recv_vars_nest_async
2342
2343
2344  subroutine mpif_gf_request
2345!*******************************************************************************
2346! DESCRIPTION
2347!   Check for completion of MPI_Isend/MPI_Irecv data transfer.
2348!   
2349!   
2350! NOTE
2351!   implicit synchronisation between all processes takes place here
2352!
2353! TODO
2354!   NB: take into account nested wind fields by using a separate
2355!   subroutine mpif_gf_request_nest (and separate request arrays for the
2356!   nested variables)
2357!
2358!*******************************************************************************
2359!    use com_mod,only: readclouds
2360
2361    implicit none
2362
2363
2364    integer :: n_req !,i
2365
2366!***********************************************************************
2367
2368    n_req=nvar_async*mp_np
2369
2370    if (mp_measure_time) call mpif_mtime('commtime',0)
2371
2372!    call MPI_Wait(rm1,MPI_STATUS_IGNORE,mp_ierr)
2373
2374! TODO: cancel recv request if readclouds=.false.
2375!    if (readclouds) then
2376    call MPI_Waitall(n_req,reqs,MPI_STATUSES_IGNORE,mp_ierr)
2377!    endif
2378! else
2379!   do i = 0, nvar_async*mp_np-1
2380!     if (mod(i,27).eq.0 .or. mod(i,28).eq.0) then
2381!       call MPI_Cancel(reqs(i),mp_ierr)
2382!       cycle
2383!     end if
2384!     call MPI_Wait(reqs(i),MPI_STATUS_IGNORE,mp_ierr)
2385!   end do
2386! end if
2387
2388    if (mp_ierr /= 0) goto 600
2389
2390    if (mp_measure_time) call mpif_mtime('commtime',1)
2391
2392    goto 601
2393
2394600 write(*,*) "#### mpi_mod::mpif_gf_request> MPI ERROR ", mp_ierr
2395    stop
2396
2397601 end subroutine mpif_gf_request
2398
2399
2400  subroutine mpif_tm_reduce_grid
2401!***********************************************************************
2402! Collect grid variable to PID 0, adding from all processes.
2403!
2404! NOTE
2405!   - Older MPI implementations may lack the MPI_IN_PLACE operation.
2406!     If so use 1) below and change unc_mod
2407!
2408!***********************************************************************
2409    use com_mod
2410    use unc_mod
2411    use par_mod
2412
2413    implicit none
2414
2415    integer :: grid_size2d,grid_size3d
2416    integer, parameter :: rcpt_size=maxreceptor*maxspec
2417
2418!**********************************************************************
2419    grid_size2d=numxgrid*numygrid*maxspec* &
2420         & maxpointspec_act*nclassunc*maxageclass
2421    grid_size3d=numxgrid*numygrid*numzgrid*maxspec* &
2422         & maxpointspec_act*nclassunc*maxageclass
2423
2424
2425! Time for MPI communications
2426    if (mp_measure_time) call mpif_mtime('commtime',0)
2427
2428! 1) Using a separate grid (gridunc0) for received values
2429! call MPI_Reduce(gridunc, gridunc0, grid_size3d, mp_sp, MPI_SUM, id_root, &
2430!      & mp_comm_used, mp_ierr)
2431! if (mp_ierr /= 0) goto 600
2432
2433! 2) Using in-place reduction
2434
2435!!!--------------------------------------------------------------------
2436!!! DJM - 2017-05-09 change - MPI_IN_PLACE option for MPI_Reduce() causes
2437!!! severe numerical problems in some cases.  I'm guessing there are memory
2438!!! overrun issues in this complex code, but have so far failed to identify
2439!!! a specific problem.  And/or, when one searches the Internet for this
2440!!! issue, there is "some" hint that the implementation may be buggy. 
2441!!!
2442!!! At this point, with the following CPP USE_MPIINPLACE directive, the
2443!!! default behaviour will be to not use the MPI_IN_PLACE option.
2444!!! Users will have to compile with -DUSE_MPIINPLACE if they want that option.
2445!!! Introduction of the CPP directives also requires that the code be compiled
2446!!! with the "-x f95-cpp-input" option.
2447!!!
2448!!! Modification of this section requires the addition of array gridunc0, which
2449!!! requires an allocation in outgrid_init.f90 and initial declaration in
2450!!! unc_mod.f90.
2451!!!--------------------------------------------------------------------
2452
2453#ifdef USE_MPIINPLACE
2454
2455    if (lroot) then
2456      call MPI_Reduce(MPI_IN_PLACE, gridunc, grid_size3d, mp_sp, MPI_SUM, id_root, &
2457           & mp_comm_used, mp_ierr)
2458      if (mp_ierr /= 0) goto 600
2459    else
2460      call MPI_Reduce(gridunc, 0, grid_size3d, mp_sp, MPI_SUM, id_root, &
2461           & mp_comm_used, mp_ierr)
2462    end if
2463
2464#else
2465
2466      call MPI_Reduce(gridunc, gridunc0, grid_size3d, mp_sp, MPI_SUM, id_root, &
2467           & mp_comm_used, mp_ierr)
2468      if (lroot) gridunc = gridunc0
2469
2470#endif
2471
2472    if ((WETDEP).and.(ldirect.gt.0)) then
2473      call MPI_Reduce(wetgridunc, wetgridunc0, grid_size2d, mp_cp, MPI_SUM, id_root, &
2474           & mp_comm_used, mp_ierr)
2475      if (mp_ierr /= 0) goto 600
2476    end if
2477
2478    if ((DRYDEP).and.(ldirect.gt.0)) then
2479      call MPI_Reduce(drygridunc, drygridunc0, grid_size2d, mp_cp, MPI_SUM, id_root, &
2480           & mp_comm_used, mp_ierr)
2481      if (mp_ierr /= 0) goto 600
2482    end if
2483
2484! Receptor concentrations   
2485    if (lroot) then
2486      call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, &
2487           & mp_comm_used,mp_ierr)
2488      if (mp_ierr /= 0) goto 600
2489    else
2490      call MPI_Reduce(creceptor,0,rcpt_size,mp_sp,MPI_SUM,id_root, &
2491           & mp_comm_used,mp_ierr)
2492    end if
2493
2494    if (mp_measure_time) call mpif_mtime('commtime',1)
2495
2496    goto 601
2497
2498600 write(*,*) "mpi_mod> mp_ierr \= 0", mp_ierr
2499    stop
2500
2501601 end subroutine mpif_tm_reduce_grid
2502
2503
2504  subroutine mpif_tm_reduce_grid_nest
2505!***********************************************************************
2506! Collect nested grids to PID 0, adding from all processes.
2507!
2508! NOTE
2509!   - Compiling with 'fcheck=all' (gfortran) will cause a run-time error
2510!     as wetgriduncn0 is only allocated for root process. Possibly valid
2511!     MPI code but invalid gfortran.
2512!
2513!***********************************************************************
2514    use com_mod
2515    use unc_mod
2516    use par_mod
2517
2518    implicit none
2519
2520    integer :: grid_size2d,grid_size3d
2521
2522!**********************************************************************
2523
2524    grid_size3d=numxgridn*numygridn*numzgrid*maxspec* &
2525         & maxpointspec_act*nclassunc*maxageclass
2526    grid_size2d=numxgridn*numygridn*maxspec* &
2527         & maxpointspec_act*nclassunc*maxageclass
2528
2529
2530! Time for MPI communications
2531    if (mp_measure_time) call mpif_mtime('commtime',0)
2532
2533! Using a separate grid (gridunc0) for received values, for debugging
2534! call MPI_Reduce(griduncn, griduncn0, grid_size3d, mp_sp, MPI_SUM, id_root, &
2535!      & mp_comm_used, mp_ierr)
2536! if (mp_ierr /= 0) goto 600
2537
2538! Using in-place reduction
2539    if (lroot) then
2540      call MPI_Reduce(MPI_IN_PLACE, griduncn, grid_size3d, mp_sp, MPI_SUM, id_root, &
2541           & mp_comm_used, mp_ierr)
2542      if (mp_ierr /= 0) goto 600
2543    else
2544      call MPI_Reduce(griduncn, 0, grid_size3d, mp_sp, MPI_SUM, id_root, &
2545           & mp_comm_used, mp_ierr)
2546    end if
2547
2548    if ((WETDEP).and.(ldirect.gt.0)) then
2549      call MPI_Reduce(wetgriduncn, wetgriduncn0, grid_size2d, mp_cp, MPI_SUM, id_root, &
2550           & mp_comm_used, mp_ierr)
2551      if (mp_ierr /= 0) goto 600
2552    end if
2553
2554    if ((DRYDEP).and.(ldirect.gt.0)) then
2555      call MPI_Reduce(drygriduncn, drygriduncn0, grid_size2d, mp_cp, MPI_SUM, id_root, &
2556           & mp_comm_used, mp_ierr)
2557      if (mp_ierr /= 0) goto 600
2558    end if
2559
2560    if (mp_measure_time) call mpif_mtime('commtime',1)
2561
2562    goto 601
2563
2564600 write(*,*) "mpi_mod> mp_ierr \= 0", mp_ierr
2565    stop
2566
2567601 end subroutine mpif_tm_reduce_grid_nest
2568
2569
2570  subroutine mpif_mtime(ident,imode)
2571!***********************************************************************
2572! Measure CPU/Wall time in various parts of the code
2573!
2574! VARIABLES
2575!   ident        character, identifies routine to measure
2576!   imode        integer, 0:start clock(s)  1: stop clock(s)
2577!
2578!***********************************************************************
2579    implicit none
2580
2581    character(LEN=*), intent(in) :: ident
2582    integer, intent(in) :: imode
2583
2584!***********************************************************************
2585
2586    select case(ident)
2587
2588    case ('timemanager')
2589      if (imode.eq.0) then
2590        call cpu_time(tm_tot_beg)
2591      else
2592        call cpu_time(tm_tot_end)
2593        tm_tot_total = tm_tot_total + (tm_tot_end-tm_tot_beg)
2594      end if
2595
2596    case ('wetdepo')
2597      if (imode.eq.0) then
2598        mp_wetdepo_wtime_beg = mpi_wtime()
2599        call cpu_time(mp_wetdepo_time_beg)
2600      else
2601        mp_wetdepo_wtime_end = mpi_wtime()
2602        call cpu_time(mp_wetdepo_time_end)
2603
2604        mp_wetdepo_wtime_total = mp_wetdepo_wtime_total + &
2605             &(mp_wetdepo_wtime_end - mp_wetdepo_wtime_beg)
2606        mp_wetdepo_time_total = mp_wetdepo_time_total + &
2607             &(mp_wetdepo_time_end - mp_wetdepo_time_beg)
2608      end if
2609
2610    case ('advance')
2611      if (imode.eq.0) then
2612        mp_advance_wtime_beg = mpi_wtime()
2613      else
2614        mp_advance_wtime_end = mpi_wtime()
2615
2616        mp_advance_wtime_total = mp_advance_wtime_total + &
2617             &(mp_advance_wtime_end - mp_advance_wtime_beg)
2618      end if
2619
2620    case ('getfields')
2621      if (imode.eq.0) then
2622        mp_getfields_wtime_beg = mpi_wtime()
2623        call cpu_time(mp_getfields_time_beg)
2624      else
2625        mp_getfields_wtime_end = mpi_wtime()
2626        call cpu_time(mp_getfields_time_end)
2627
2628        mp_getfields_wtime_total = mp_getfields_wtime_total + &
2629             &(mp_getfields_wtime_end - mp_getfields_wtime_beg)
2630        mp_getfields_time_total = mp_getfields_time_total + &
2631             &(mp_getfields_time_end - mp_getfields_time_beg)
2632      end if
2633
2634    case ('partloop1')
2635      if (imode.eq.0) then
2636        call cpu_time(tm_nploop_beg)
2637      else
2638        call cpu_time(tm_nploop_end)
2639        tm_nploop_total = tm_nploop_total + (tm_nploop_end - tm_nploop_beg)
2640      end if
2641
2642    case ('conccalc')
2643      if (imode.eq.0) then
2644        call cpu_time(mp_conccalc_time_beg)
2645      else
2646        call cpu_time(mp_conccalc_time_end)
2647        mp_conccalc_time_total = mp_conccalc_time_total + mp_conccalc_time_end - &
2648             &mp_conccalc_time_beg
2649      end if
2650
2651    case ('rootonly')
2652      if (imode.eq.0) then
2653        call cpu_time(mp_root_time_beg)
2654        mp_root_wtime_beg = mpi_wtime()
2655      else
2656        call cpu_time(mp_root_time_end)
2657        mp_root_wtime_end = mpi_wtime()
2658        mp_root_time_total = mp_root_time_total + &
2659             &(mp_root_time_end - mp_root_time_beg)
2660        mp_root_wtime_total = mp_root_wtime_total + &
2661             &(mp_root_wtime_end - mp_root_wtime_beg)
2662      end if
2663
2664    case ('iotime')
2665      if (imode.eq.0) then
2666        mp_io_wtime_beg = mpi_wtime()
2667        call cpu_time(mp_io_time_beg)
2668      else
2669        mp_io_wtime_end = mpi_wtime()
2670        call cpu_time(mp_io_time_end)
2671
2672        mp_io_wtime_total = mp_io_wtime_total + (mp_io_wtime_end - &
2673             & mp_io_wtime_beg)
2674        mp_io_time_total = mp_io_time_total + (mp_io_time_end - &
2675             & mp_io_time_beg)
2676      end if
2677
2678    case ('verttransform')
2679      if (imode.eq.0) then
2680        mp_vt_wtime_beg = mpi_wtime()
2681        call cpu_time(mp_vt_time_beg)
2682      else
2683        mp_vt_wtime_end = mpi_wtime()
2684        call cpu_time(mp_vt_time_end)
2685
2686        mp_vt_wtime_total = mp_vt_wtime_total + (mp_vt_wtime_end - &
2687             & mp_vt_wtime_beg)
2688        mp_vt_time_total = mp_vt_time_total + (mp_vt_time_end - &
2689             & mp_vt_time_beg)
2690      end if
2691
2692    case ('readwind')
2693      if (imode.eq.0) then
2694        call cpu_time(mp_readwind_time_beg)
2695        mp_readwind_wtime_beg = mpi_wtime()
2696      else
2697        call cpu_time(mp_readwind_time_end)
2698        mp_readwind_wtime_end = mpi_wtime()
2699
2700        mp_readwind_time_total = mp_readwind_time_total + &
2701             &(mp_readwind_time_end - mp_readwind_time_beg)
2702        mp_readwind_wtime_total = mp_readwind_wtime_total + &
2703             &(mp_readwind_wtime_end - mp_readwind_wtime_beg)
2704      end if
2705
2706    case ('commtime')
2707      if (imode.eq.0) then
2708        call cpu_time(mp_comm_time_beg)
2709        mp_comm_wtime_beg = mpi_wtime()
2710      else
2711        call cpu_time(mp_comm_time_end)
2712        mp_comm_wtime_end = mpi_wtime()
2713        mp_comm_time_total = mp_comm_time_total + &
2714             &(mp_comm_time_end - mp_comm_time_beg)
2715        mp_comm_wtime_total = mp_comm_wtime_total + &
2716             &(mp_comm_wtime_end - mp_comm_wtime_beg)
2717      end if
2718
2719    case ('flexpart')
2720      if (imode.eq.0) then
2721        mp_total_wtime_beg=mpi_wtime()
2722      else
2723        mp_total_wtime_end=mpi_wtime()
2724        mp_total_wtime_total = mp_total_wtime_end-mp_total_wtime_beg
2725      end if
2726
2727    case default
2728      write(*,*) 'mpi_mod::mpif_mtime> WARNING: unknown case identifier'
2729
2730    end select
2731
2732
2733  end subroutine mpif_mtime
2734
2735
2736  subroutine mpif_finalize
2737!***********************************************************************
2738! Finalize MPI
2739! Optionally print detailed time diagnostics
2740!***********************************************************************
2741    implicit none
2742
2743    integer :: ip !,j,r
2744
2745!***********************************************************************
2746
2747    IF (mp_measure_time) THEN
2748      do ip=0, mp_np-1
2749        call MPI_BARRIER(MPI_COMM_WORLD, mp_ierr)
2750
2751        if (ip == mp_pid) then
2752          write(*,FMT='(72("#"))')
2753          write(*,FMT='(A60,I3)') 'STATISTICS FOR MPI PROCESS:', mp_pid
2754          if (ip == 0) then
2755            write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR ROOT-ONLY (PID 0) &
2756                 &SECTIONS: ', mp_root_time_total
2757            write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR ROOT-ONLY (PID 0) &
2758                 &SECTIONS:', mp_root_wtime_total
2759          end if
2760          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR FLEXPART: ' &
2761               &, mp_total_wtime_total
2762          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR MPI COMMUNICATIONS:',&
2763               & mp_comm_time_total
2764          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR MPI COMMUNICATIONS:',&
2765               & mp_comm_wtime_total
2766          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR MPI BARRIERS:',&
2767               & mp_barrier_time_total
2768          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR MPI BARRIERS:',&
2769               & mp_barrier_wtime_total
2770          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR TIMEMANAGER:',&
2771               & tm_tot_total
2772          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR TIMEMANAGER/NUMPART LOOP:',&
2773               & tm_nploop_total
2774          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR ADVANCE:',&
2775               & mp_advance_wtime_total
2776          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR GETFIELDS:',&
2777               & mp_getfields_wtime_total
2778          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR GETFIELDS:',&
2779               & mp_getfields_time_total
2780          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',&
2781               & mp_readwind_wtime_total
2782          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',&
2783               & mp_readwind_time_total
2784          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR FILE IO:',&
2785               & mp_io_wtime_total
2786          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR FILE IO:',&
2787               & mp_io_time_total
2788          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR WETDEPO:',&
2789               & mp_wetdepo_wtime_total
2790          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR WETDEPO:',&
2791               & mp_wetdepo_time_total
2792          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR CONCCALC:',&
2793               & mp_conccalc_time_total
2794! write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR VERTTRANSFORM:',&
2795!      & mp_vt_wtime_total
2796! write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR VERTTRANSFORM:',&
2797!      & mp_vt_time_total
2798! NB: the 'flush' function is possibly a gfortran-specific extension,
2799! comment out if it gives problems
2800!          call flush()
2801        end if
2802      end do
2803    end if
2804
2805! This call to barrier is for correctly formatting output
2806    call MPI_BARRIER(MPI_COMM_WORLD, mp_ierr)
2807
2808    if (lroot.and.mp_measure_time) then
2809      write(*,FMT='(72("#"))')
2810      WRITE(*,*) "To turn off output of time measurements, set "
2811      WRITE(*,*) "    mp_measure_time=.false."
2812      WRITE(*,*) "in file mpi_mod.f90"
2813      write(*,FMT='(72("#"))')
2814    end if
2815
2816! j=mp_pid*nvar_async
2817! In the implementation with 3 fields, the processes may have posted
2818! MPI_Irecv requests that should be cancelled here
2819! if (.not.lmp_sync) then
2820!   r=mp_pid*nvar_async
2821!   do j=r,r+nvar_async-1
2822!     call MPI_Cancel(j,mp_ierr)
2823!     if (mp_ierr /= 0) write(*,*) '#### mpif_finalize::MPI_Cancel> ERROR ####'
2824!   end do
2825! end if
2826
2827    call MPI_FINALIZE(mp_ierr)
2828    if (mp_ierr /= 0) then
2829      write(*,*) '#### mpif_finalize::MPI_FINALIZE> MPI ERROR ', mp_ierr, ' ####'
2830      stop
2831    end if
2832
2833
2834  end subroutine mpif_finalize
2835
2836
2837  subroutine get_lun(my_lun)
2838!***********************************************************************
2839! get_lun:
2840!   Starting from 100, get next free logical unit number
2841!***********************************************************************
2842
2843    implicit none
2844
2845    integer, intent(inout) :: my_lun
2846    integer, save :: free_lun=100
2847    logical :: exists, iopen
2848
2849!***********************************************************************
2850
2851    loop1: do
2852      inquire(UNIT=free_lun, EXIST=exists, OPENED=iopen)
2853      if (exists .and. .not.iopen) exit loop1
2854      free_lun = free_lun+1
2855    end do loop1
2856    my_lun = free_lun
2857
2858  end subroutine get_lun
2859
2860
2861  subroutine write_data_dbg(array_in, array_name, tstep, ident)
2862!***********************************************************************
2863! Write one-dimensional arrays to file (for debugging purposes)
2864!***********************************************************************
2865    implicit none
2866
2867    real, intent(in), dimension(:) :: array_in
2868    integer, intent(in) :: tstep
2869    integer :: lios
2870    character(LEN=*), intent(in) :: ident, array_name
2871
2872    character(LEN=8) :: c_ts
2873    character(LEN=40) :: fn_1, fn_2
2874
2875!***********************************************************************
2876
2877    write(c_ts, FMT='(I8.8,BZ)') tstep
2878    fn_1='-'//trim(adjustl(c_ts))//'-'//trim(ident)
2879    write(c_ts, FMT='(I2.2,BZ)') mp_np
2880    fn_2= trim(adjustl(array_name))//trim(adjustl(fn_1))//'-np'//trim(adjustl(c_ts))//'.dat'
2881
2882    call get_lun(dat_lun)
2883    open(UNIT=dat_lun, FILE=fn_2, IOSTAT=lios, ACTION='WRITE', &
2884         FORM='UNFORMATTED', STATUS='REPLACE')
2885    write(UNIT=dat_lun, IOSTAT=lios) array_in
2886    close(UNIT=dat_lun)
2887
2888  end subroutine write_data_dbg
2889
2890
2891  subroutine set_fields_synthetic()
2892!*******************************************************************************
2893! DESCRIPTION
2894!   Set all meteorological fields to synthetic (usually constant/homogeneous)
2895!   values.
2896!   Used for validation and error-checking
2897!
2898! NOTE
2899!   This version uses asynchronious communications.
2900!
2901! VARIABLES
2902!   
2903!
2904!
2905!*******************************************************************************
2906    use com_mod
2907
2908    implicit none
2909
2910    integer :: li=1, ui=2 ! wfmem indices (i.e, operate on entire field)
2911
2912    if (.not.lmp_sync) ui=3
2913
2914
2915! Variables transferred at initialization only
2916!*********************************************
2917!    readclouds=readclouds_
2918    oro(:,:)=0.0
2919    excessoro(:,:)=0.0
2920    lsm(:,:)=0.0
2921    xlanduse(:,:,:)=0.0
2922!    wftime
2923!    numbwf
2924!    nmixz
2925!    height
2926
2927! Time-varying fields:
2928    uu(:,:,:,li:ui) = 10.0
2929    vv(:,:,:,li:ui) = 0.0
2930    uupol(:,:,:,li:ui) = 10.0
2931    vvpol(:,:,:,li:ui)=0.0
2932    ww(:,:,:,li:ui)=0.
2933    tt(:,:,:,li:ui)=300.
2934    rho(:,:,:,li:ui)=1.3
2935    drhodz(:,:,:,li:ui)=.0
2936    tth(:,:,:,li:ui)=0.0
2937    qvh(:,:,:,li:ui)=1.0
2938    qv(:,:,:,li:ui)=1.0
2939
2940    pv(:,:,:,li:ui)=1.0
2941    clouds(:,:,:,li:ui)=0
2942
2943    clwc(:,:,:,li:ui)=0.0
2944    ciwc(:,:,:,li:ui)=0.0
2945
2946! 2D fields
2947
2948    cloudsh(:,:,li:ui)=0
2949    vdep(:,:,:,li:ui)=0.0
2950    ps(:,:,:,li:ui)=1.0e5
2951    sd(:,:,:,li:ui)=0.0
2952    tcc(:,:,:,li:ui)=0.0
2953    tt2(:,:,:,li:ui)=300.
2954    td2(:,:,:,li:ui)=250.
2955    lsprec(:,:,:,li:ui)=0.0
2956    convprec(:,:,:,li:ui)=0.0
2957    ustar(:,:,:,li:ui)=1.0
2958    wstar(:,:,:,li:ui)=1.0
2959    hmix(:,:,:,li:ui)=10000.
2960    tropopause(:,:,:,li:ui)=10000.
2961    oli(:,:,:,li:ui)=0.01
2962
2963  end subroutine set_fields_synthetic
2964
2965end module mpi_mod
Note: See TracBrowser for help on using the repository browser.
hosted by ZAMG