Context Navigation

Reverse Diff

Changes in src/mpi_mod.f90 [0ecc1fe:0c8c7f2] in flexpart.git

File:

: 1 edited

src/mpi_mod.f90 (modified) (17 diffs)

Legend:

: Unmodified
: Added
: Removed

src/mpi_mod.f90

-                      r0ecc1fe
+                      r0c8c7f2
 ! Variables for MPI processes in the 'particle' group
   integer, allocatable, dimension(:) :: mp_partgroup_rank
+  integer, allocatable, dimension(:) :: npart_per_process
   integer :: mp_partgroup_comm, mp_partgroup_pid, mp_partgroup_np
 …
 ! mp_time_barrier   Measure MPI barrier time
 ! mp_exact_numpart  Use an extra MPI communication to give the exact number of particles
 !                   to standard output (this does *not* otherwise affect the simulation)
+!                   to standard output (this does not otherwise affect the simulation)
   logical, parameter :: mp_dbg_mode = .false.
   logical, parameter :: mp_dev_mode = .false.
 …
 !   mp_np       number of running processes, decided at run-time
 !***********************************************************************
     use par_mod, only: maxpart, numwfmem, dep_prec
     use com_mod, only: mpi_mode, verbosity
+    use par_mod, only: maxpart, numwfmem, dep_prec, maxreceptor, maxspec
+    use com_mod, only: mpi_mode, verbosity, creceptor0
     implicit none
 …
 ! Set maxpart per process
 ! eso 08/2016: Increase maxpart per process, in case of unbalanced distribution
+! ESO 08/2016: Increase maxpart per process, in case of unbalanced distribution
     maxpart_mpi=int(mp_maxpart_factor*real(maxpart)/real(mp_partgroup_np))
     if (mp_np == 1) maxpart_mpi = maxpart
 …
     end if
+! Allocate array for number of particles per process
+    allocate(npart_per_process(0:mp_partgroup_np-1))
+! Write whether MPI_IN_PLACE is used or not
+#ifdef USE_MPIINPLACE
+    if (lroot) write(*,*) 'Using MPI_IN_PLACE operations'
+#else
+    if (lroot) allocate(creceptor0(maxreceptor,maxspec))
+    if (lroot) write(*,*) 'Not using MPI_IN_PLACE operations'
+#endif
     goto 101
 …
 ! invalid particles at the end of the arrays
 do i=num_part, 1, -1
+do i=numpart, 1, -1
       if (itra1(i).eq.-999999999) then
         numpart=numpart-1
 …
     integer :: i,jj,nn, num_part=1,m,imin, num_trans
     logical :: first_iter
     integer,allocatable,dimension(:) :: numparticles_mpi, idx_arr
+    integer,allocatable,dimension(:) :: idx_arr
     real,allocatable,dimension(:) :: sorted ! TODO: we don't really need this
 …
 ! All processes exchange information on number of particles
 !****************************************************************************
+    allocate(numparticles_mpi(0:mp_partgroup_np-1), &
+         &idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1))
+    call MPI_Allgather(numpart, 1, MPI_INTEGER, numparticles_mpi, &
+    allocate( idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1))
+    call MPI_Allgather(numpart, 1, MPI_INTEGER, npart_per_process, &
          & 1, MPI_INTEGER, mp_comm_used, mp_ierr)
 …
 ! Sort from lowest to highest
 ! Initial guess: correct order
     sorted(:) = numparticles_mpi(:)
+    sorted(:) = npart_per_process(:)
     do i=0, mp_partgroup_np-1
       idx_arr(i) = i
     end do
+! Do not rebalance particles for ipout=3
+    if (ipout.eq.3) return
 ! For each successive element in index array, see if a lower value exists
 …
     m=mp_partgroup_np-1 ! index for last sorted process (most particles)
     do i=0,mp_partgroup_np/2-1
       num_trans = numparticles_mpi(idx_arr(m)) - numparticles_mpi(idx_arr(i))
+      num_trans = npart_per_process(idx_arr(m)) - npart_per_process(idx_arr(i))
       if (mp_partid.eq.idx_arr(m).or.mp_partid.eq.idx_arr(i)) then
         if ( numparticles_mpi(idx_arr(m)).gt.mp_min_redist.and.&
              & real(num_trans)/real(numparticles_mpi(idx_arr(m))).gt.mp_redist_fract) then
+        if ( npart_per_process(idx_arr(m)).gt.mp_min_redist.and.&
+             & real(num_trans)/real(npart_per_process(idx_arr(m))).gt.mp_redist_fract) then
 ! DBG
           ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi', &
           !      &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi
+          ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, npart_per_process', &
+          !      &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, npart_per_process
 ! DBG
           call mpif_redist_part(itime, idx_arr(m), idx_arr(i), num_trans/2)
 …
     end do
     deallocate(numparticles_mpi, idx_arr, sorted)
+    deallocate(idx_arr, sorted)
   end subroutine mpif_calculate_part_redist
 …
     if (readclouds) then
       j=j+1
       call MPI_Irecv(ctwc(:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
+      call MPI_Irecv(ctwc(:,:,mind),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,&
            &MPI_COMM_WORLD,reqs(j),mp_ierr)
       if (mp_ierr /= 0) goto 600
 …
       if (readclouds) then
         j=j+1
         call MPI_Irecv(ctwcn(:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
+        call MPI_Irecv(ctwcn(:,:,mind,k),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,&
              &MPI_COMM_WORLD,reqs(j),mp_ierr)
         if (mp_ierr /= 0) goto 600
 …
     end if
+  ! Receptor concentrations
+    if (lroot) then
+      call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, &
+           & mp_comm_used,mp_ierr)
+      if (mp_ierr /= 0) goto 600
+    else
+      call MPI_Reduce(creceptor,0,rcpt_size,mp_sp,MPI_SUM,id_root, &
+           & mp_comm_used,mp_ierr)
+    end if
 #else
       call MPI_Reduce(gridunc, gridunc0, grid_size3d, mp_sp, MPI_SUM, id_root, &
            & mp_comm_used, mp_ierr)
+      if (mp_ierr /= 0) goto 600
       if (lroot) gridunc = gridunc0
+      call MPI_Reduce(creceptor, creceptor0,rcpt_size,mp_sp,MPI_SUM,id_root, &
+           & mp_comm_used,mp_ierr)
+      if (mp_ierr /= 0) goto 600
+      if (lroot) creceptor = creceptor0
 #endif
 …
     end if
-! Receptor concentrations
-    if (lroot) then
-      call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, &
-           & mp_comm_used,mp_ierr)
-      if (mp_ierr /= 0) goto 600
-    else
-      call MPI_Reduce(creceptor,0,rcpt_size,mp_sp,MPI_SUM,id_root, &
-           & mp_comm_used,mp_ierr)
-    end if
     if (mp_measure_time) call mpif_mtime('commtime',1)
 …
       end if
     case ('readwind')
       if (imode.eq.0) then
         call cpu_time(mp_readwind_time_beg)
         mp_readwind_wtime_beg = mpi_wtime()
       else
         call cpu_time(mp_readwind_time_end)
         mp_readwind_wtime_end = mpi_wtime()
         mp_readwind_time_total = mp_readwind_time_total + &
              &(mp_readwind_time_end - mp_readwind_time_beg)
         mp_readwind_wtime_total = mp_readwind_wtime_total + &
              &(mp_readwind_wtime_end - mp_readwind_wtime_beg)
       end if
+   case ('readwind')
+     if (imode.eq.0) then
+       call cpu_time(mp_readwind_time_beg)
+       mp_readwind_wtime_beg = mpi_wtime()
+     else
+       call cpu_time(mp_readwind_time_end)
+       mp_readwind_wtime_end = mpi_wtime()
+       mp_readwind_time_total = mp_readwind_time_total + &
+            &(mp_readwind_time_end - mp_readwind_time_beg)
+       mp_readwind_wtime_total = mp_readwind_wtime_total + &
+            &(mp_readwind_wtime_end - mp_readwind_wtime_beg)
+     end if
     case ('commtime')
 …
           write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR GETFIELDS:',&
                & mp_getfields_time_total
           write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',&
                & mp_readwind_wtime_total
           write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',&
                & mp_readwind_time_total
+!          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',&
+!               & mp_readwind_wtime_total
+!          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',&
+!               & mp_readwind_time_total
           write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR FILE IO:',&
                & mp_io_wtime_total

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in src/mpi_mod.f90 [0ecc1fe:0c8c7f2] in flexpart.git

Legend:

src/mpi_mod.f90

Download in other formats: