Changes in src/mpi_mod.f90 [0ecc1fe:0c8c7f2] in flexpart.git


Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/mpi_mod.f90

    r0ecc1fe r0c8c7f2  
    8888! Variables for MPI processes in the 'particle' group
    8989  integer, allocatable, dimension(:) :: mp_partgroup_rank
     90  integer, allocatable, dimension(:) :: npart_per_process
    9091  integer :: mp_partgroup_comm, mp_partgroup_pid, mp_partgroup_np
    9192
     
    125126! mp_time_barrier   Measure MPI barrier time
    126127! mp_exact_numpart  Use an extra MPI communication to give the exact number of particles
    127 !                   to standard output (this does *not* otherwise affect the simulation)
     128!                   to standard output (this does not otherwise affect the simulation)
    128129  logical, parameter :: mp_dbg_mode = .false.
    129130  logical, parameter :: mp_dev_mode = .false.
     
    190191!   mp_np       number of running processes, decided at run-time
    191192!***********************************************************************
    192     use par_mod, only: maxpart, numwfmem, dep_prec
    193     use com_mod, only: mpi_mode, verbosity
     193    use par_mod, only: maxpart, numwfmem, dep_prec, maxreceptor, maxspec
     194    use com_mod, only: mpi_mode, verbosity, creceptor0
    194195
    195196    implicit none
     
    337338
    338339! Set maxpart per process
    339 ! eso 08/2016: Increase maxpart per process, in case of unbalanced distribution
     340! ESO 08/2016: Increase maxpart per process, in case of unbalanced distribution
    340341    maxpart_mpi=int(mp_maxpart_factor*real(maxpart)/real(mp_partgroup_np))
    341342    if (mp_np == 1) maxpart_mpi = maxpart
     
    365366    end if
    366367
     368! Allocate array for number of particles per process   
     369    allocate(npart_per_process(0:mp_partgroup_np-1))
     370
     371! Write whether MPI_IN_PLACE is used or not
     372#ifdef USE_MPIINPLACE
     373    if (lroot) write(*,*) 'Using MPI_IN_PLACE operations'
     374#else
     375    if (lroot) allocate(creceptor0(maxreceptor,maxspec))
     376    if (lroot) write(*,*) 'Not using MPI_IN_PLACE operations'
     377#endif
    367378    goto 101
    368379
     
    559570! invalid particles at the end of the arrays
    560571
    561 601 do i=num_part, 1, -1
     572601 do i=numpart, 1, -1
    562573      if (itra1(i).eq.-999999999) then
    563574        numpart=numpart-1
     
    598609    integer :: i,jj,nn, num_part=1,m,imin, num_trans
    599610    logical :: first_iter
    600     integer,allocatable,dimension(:) :: numparticles_mpi, idx_arr
     611    integer,allocatable,dimension(:) :: idx_arr
    601612    real,allocatable,dimension(:) :: sorted ! TODO: we don't really need this
    602613
     
    607618! All processes exchange information on number of particles
    608619!****************************************************************************
    609     allocate(numparticles_mpi(0:mp_partgroup_np-1), &
    610          &idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1))
    611 
    612     call MPI_Allgather(numpart, 1, MPI_INTEGER, numparticles_mpi, &
     620    allocate( idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1))
     621
     622    call MPI_Allgather(numpart, 1, MPI_INTEGER, npart_per_process, &
    613623         & 1, MPI_INTEGER, mp_comm_used, mp_ierr)
    614624
     
    616626! Sort from lowest to highest
    617627! Initial guess: correct order
    618     sorted(:) = numparticles_mpi(:)
     628    sorted(:) = npart_per_process(:)
    619629    do i=0, mp_partgroup_np-1
    620630      idx_arr(i) = i
    621631    end do
     632
     633! Do not rebalance particles for ipout=3   
     634    if (ipout.eq.3) return
    622635
    623636! For each successive element in index array, see if a lower value exists
     
    645658    m=mp_partgroup_np-1 ! index for last sorted process (most particles)
    646659    do i=0,mp_partgroup_np/2-1
    647       num_trans = numparticles_mpi(idx_arr(m)) - numparticles_mpi(idx_arr(i))
     660      num_trans = npart_per_process(idx_arr(m)) - npart_per_process(idx_arr(i))
    648661      if (mp_partid.eq.idx_arr(m).or.mp_partid.eq.idx_arr(i)) then
    649         if ( numparticles_mpi(idx_arr(m)).gt.mp_min_redist.and.&
    650              & real(num_trans)/real(numparticles_mpi(idx_arr(m))).gt.mp_redist_fract) then
     662        if ( npart_per_process(idx_arr(m)).gt.mp_min_redist.and.&
     663             & real(num_trans)/real(npart_per_process(idx_arr(m))).gt.mp_redist_fract) then
    651664! DBG
    652           ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi', &
    653           !      &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi
     665          ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, npart_per_process', &
     666          !      &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, npart_per_process
    654667! DBG
    655668          call mpif_redist_part(itime, idx_arr(m), idx_arr(i), num_trans/2)
     
    659672    end do
    660673
    661     deallocate(numparticles_mpi, idx_arr, sorted)
     674    deallocate(idx_arr, sorted)
    662675
    663676  end subroutine mpif_calculate_part_redist
     
    19611974    if (readclouds) then
    19621975      j=j+1
    1963       call MPI_Irecv(ctwc(:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
     1976      call MPI_Irecv(ctwc(:,:,mind),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,&
    19641977           &MPI_COMM_WORLD,reqs(j),mp_ierr)
    19651978      if (mp_ierr /= 0) goto 600
     
    23262339      if (readclouds) then
    23272340        j=j+1
    2328         call MPI_Irecv(ctwcn(:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,&
     2341        call MPI_Irecv(ctwcn(:,:,mind,k),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,&
    23292342             &MPI_COMM_WORLD,reqs(j),mp_ierr)
    23302343        if (mp_ierr /= 0) goto 600
     
    24622475    end if
    24632476
     2477  ! Receptor concentrations   
     2478    if (lroot) then
     2479      call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, &
     2480           & mp_comm_used,mp_ierr)
     2481      if (mp_ierr /= 0) goto 600
     2482    else
     2483      call MPI_Reduce(creceptor,0,rcpt_size,mp_sp,MPI_SUM,id_root, &
     2484           & mp_comm_used,mp_ierr)
     2485    end if
     2486
    24642487#else
    24652488
    24662489      call MPI_Reduce(gridunc, gridunc0, grid_size3d, mp_sp, MPI_SUM, id_root, &
    24672490           & mp_comm_used, mp_ierr)
     2491      if (mp_ierr /= 0) goto 600
    24682492      if (lroot) gridunc = gridunc0
     2493
     2494      call MPI_Reduce(creceptor, creceptor0,rcpt_size,mp_sp,MPI_SUM,id_root, &
     2495           & mp_comm_used,mp_ierr)
     2496      if (mp_ierr /= 0) goto 600
     2497      if (lroot) creceptor = creceptor0
    24692498
    24702499#endif
     
    24822511    end if
    24832512
    2484 ! Receptor concentrations   
    2485     if (lroot) then
    2486       call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, &
    2487            & mp_comm_used,mp_ierr)
    2488       if (mp_ierr /= 0) goto 600
    2489     else
    2490       call MPI_Reduce(creceptor,0,rcpt_size,mp_sp,MPI_SUM,id_root, &
    2491            & mp_comm_used,mp_ierr)
    2492     end if
    24932513
    24942514    if (mp_measure_time) call mpif_mtime('commtime',1)
     
    27002720      end if
    27012721
    2702     case ('readwind')
    2703       if (imode.eq.0) then
    2704         call cpu_time(mp_readwind_time_beg)
    2705         mp_readwind_wtime_beg = mpi_wtime()
    2706       else
    2707         call cpu_time(mp_readwind_time_end)
    2708         mp_readwind_wtime_end = mpi_wtime()
    2709 
    2710         mp_readwind_time_total = mp_readwind_time_total + &
    2711              &(mp_readwind_time_end - mp_readwind_time_beg)
    2712         mp_readwind_wtime_total = mp_readwind_wtime_total + &
    2713              &(mp_readwind_wtime_end - mp_readwind_wtime_beg)
    2714       end if
     2722   case ('readwind')
     2723     if (imode.eq.0) then
     2724       call cpu_time(mp_readwind_time_beg)
     2725       mp_readwind_wtime_beg = mpi_wtime()
     2726     else
     2727       call cpu_time(mp_readwind_time_end)
     2728       mp_readwind_wtime_end = mpi_wtime()
     2729
     2730       mp_readwind_time_total = mp_readwind_time_total + &
     2731            &(mp_readwind_time_end - mp_readwind_time_beg)
     2732       mp_readwind_wtime_total = mp_readwind_wtime_total + &
     2733            &(mp_readwind_wtime_end - mp_readwind_wtime_beg)
     2734     end if
    27152735
    27162736    case ('commtime')
     
    27882808          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR GETFIELDS:',&
    27892809               & mp_getfields_time_total
    2790           write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',&
    2791                & mp_readwind_wtime_total
    2792           write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',&
    2793                & mp_readwind_time_total
     2810!          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',&
     2811!               & mp_readwind_wtime_total
     2812!          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',&
     2813!               & mp_readwind_time_total
    27942814          write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR FILE IO:',&
    27952815               & mp_io_wtime_total
Note: See TracChangeset for help on using the changeset viewer.
hosted by ZAMG