Changes in src/mpi_mod.f90 [0c8c7f2:0ecc1fe] in flexpart.git
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
src/mpi_mod.f90
r0c8c7f2 r0ecc1fe 88 88 ! Variables for MPI processes in the 'particle' group 89 89 integer, allocatable, dimension(:) :: mp_partgroup_rank 90 integer, allocatable, dimension(:) :: npart_per_process91 90 integer :: mp_partgroup_comm, mp_partgroup_pid, mp_partgroup_np 92 91 … … 126 125 ! mp_time_barrier Measure MPI barrier time 127 126 ! mp_exact_numpart Use an extra MPI communication to give the exact number of particles 128 ! to standard output (this does not otherwise affect the simulation)127 ! to standard output (this does *not* otherwise affect the simulation) 129 128 logical, parameter :: mp_dbg_mode = .false. 130 129 logical, parameter :: mp_dev_mode = .false. … … 191 190 ! mp_np number of running processes, decided at run-time 192 191 !*********************************************************************** 193 use par_mod, only: maxpart, numwfmem, dep_prec , maxreceptor, maxspec194 use com_mod, only: mpi_mode, verbosity , creceptor0192 use par_mod, only: maxpart, numwfmem, dep_prec 193 use com_mod, only: mpi_mode, verbosity 195 194 196 195 implicit none … … 338 337 339 338 ! Set maxpart per process 340 ! ESO08/2016: Increase maxpart per process, in case of unbalanced distribution339 ! eso 08/2016: Increase maxpart per process, in case of unbalanced distribution 341 340 maxpart_mpi=int(mp_maxpart_factor*real(maxpart)/real(mp_partgroup_np)) 342 341 if (mp_np == 1) maxpart_mpi = maxpart … … 366 365 end if 367 366 368 ! Allocate array for number of particles per process369 allocate(npart_per_process(0:mp_partgroup_np-1))370 371 ! Write whether MPI_IN_PLACE is used or not372 #ifdef USE_MPIINPLACE373 if (lroot) write(*,*) 'Using MPI_IN_PLACE operations'374 #else375 if (lroot) allocate(creceptor0(maxreceptor,maxspec))376 if (lroot) write(*,*) 'Not using MPI_IN_PLACE operations'377 #endif378 367 goto 101 379 368 … … 570 559 ! invalid particles at the end of the arrays 571 560 572 601 do i=num part, 1, -1561 601 do i=num_part, 1, -1 573 562 if (itra1(i).eq.-999999999) then 574 563 numpart=numpart-1 … … 609 598 integer :: i,jj,nn, num_part=1,m,imin, num_trans 610 599 logical :: first_iter 611 integer,allocatable,dimension(:) :: idx_arr600 integer,allocatable,dimension(:) :: numparticles_mpi, idx_arr 612 601 real,allocatable,dimension(:) :: sorted ! TODO: we don't really need this 613 602 … … 618 607 ! All processes exchange information on number of particles 619 608 !**************************************************************************** 620 allocate( idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1)) 621 622 call MPI_Allgather(numpart, 1, MPI_INTEGER, npart_per_process, & 609 allocate(numparticles_mpi(0:mp_partgroup_np-1), & 610 &idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1)) 611 612 call MPI_Allgather(numpart, 1, MPI_INTEGER, numparticles_mpi, & 623 613 & 1, MPI_INTEGER, mp_comm_used, mp_ierr) 624 614 … … 626 616 ! Sort from lowest to highest 627 617 ! Initial guess: correct order 628 sorted(:) = n part_per_process(:)618 sorted(:) = numparticles_mpi(:) 629 619 do i=0, mp_partgroup_np-1 630 620 idx_arr(i) = i 631 621 end do 632 633 ! Do not rebalance particles for ipout=3634 if (ipout.eq.3) return635 622 636 623 ! For each successive element in index array, see if a lower value exists … … 658 645 m=mp_partgroup_np-1 ! index for last sorted process (most particles) 659 646 do i=0,mp_partgroup_np/2-1 660 num_trans = n part_per_process(idx_arr(m)) - npart_per_process(idx_arr(i))647 num_trans = numparticles_mpi(idx_arr(m)) - numparticles_mpi(idx_arr(i)) 661 648 if (mp_partid.eq.idx_arr(m).or.mp_partid.eq.idx_arr(i)) then 662 if ( n part_per_process(idx_arr(m)).gt.mp_min_redist.and.&663 & real(num_trans)/real(n part_per_process(idx_arr(m))).gt.mp_redist_fract) then649 if ( numparticles_mpi(idx_arr(m)).gt.mp_min_redist.and.& 650 & real(num_trans)/real(numparticles_mpi(idx_arr(m))).gt.mp_redist_fract) then 664 651 ! DBG 665 ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, n part_per_process', &666 ! &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, n part_per_process652 ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi', & 653 ! &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, numparticles_mpi 667 654 ! DBG 668 655 call mpif_redist_part(itime, idx_arr(m), idx_arr(i), num_trans/2) … … 672 659 end do 673 660 674 deallocate( idx_arr, sorted)661 deallocate(numparticles_mpi, idx_arr, sorted) 675 662 676 663 end subroutine mpif_calculate_part_redist … … 1974 1961 if (readclouds) then 1975 1962 j=j+1 1976 call MPI_Irecv(ctwc(:,:,mind),d2s1 *5,mp_sp,id_read,MPI_ANY_TAG,&1963 call MPI_Irecv(ctwc(:,:,mind),d2s1,mp_sp,id_read,MPI_ANY_TAG,& 1977 1964 &MPI_COMM_WORLD,reqs(j),mp_ierr) 1978 1965 if (mp_ierr /= 0) goto 600 … … 2339 2326 if (readclouds) then 2340 2327 j=j+1 2341 call MPI_Irecv(ctwcn(:,:,mind,k),d2s1 *5,mp_sp,id_read,MPI_ANY_TAG,&2328 call MPI_Irecv(ctwcn(:,:,mind,k),d2s1,mp_sp,id_read,MPI_ANY_TAG,& 2342 2329 &MPI_COMM_WORLD,reqs(j),mp_ierr) 2343 2330 if (mp_ierr /= 0) goto 600 … … 2475 2462 end if 2476 2463 2477 ! Receptor concentrations 2464 #else 2465 2466 call MPI_Reduce(gridunc, gridunc0, grid_size3d, mp_sp, MPI_SUM, id_root, & 2467 & mp_comm_used, mp_ierr) 2468 if (lroot) gridunc = gridunc0 2469 2470 #endif 2471 2472 if ((WETDEP).and.(ldirect.gt.0)) then 2473 call MPI_Reduce(wetgridunc, wetgridunc0, grid_size2d, mp_cp, MPI_SUM, id_root, & 2474 & mp_comm_used, mp_ierr) 2475 if (mp_ierr /= 0) goto 600 2476 end if 2477 2478 if ((DRYDEP).and.(ldirect.gt.0)) then 2479 call MPI_Reduce(drygridunc, drygridunc0, grid_size2d, mp_cp, MPI_SUM, id_root, & 2480 & mp_comm_used, mp_ierr) 2481 if (mp_ierr /= 0) goto 600 2482 end if 2483 2484 ! Receptor concentrations 2478 2485 if (lroot) then 2479 2486 call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, & … … 2484 2491 & mp_comm_used,mp_ierr) 2485 2492 end if 2486 2487 #else2488 2489 call MPI_Reduce(gridunc, gridunc0, grid_size3d, mp_sp, MPI_SUM, id_root, &2490 & mp_comm_used, mp_ierr)2491 if (mp_ierr /= 0) goto 6002492 if (lroot) gridunc = gridunc02493 2494 call MPI_Reduce(creceptor, creceptor0,rcpt_size,mp_sp,MPI_SUM,id_root, &2495 & mp_comm_used,mp_ierr)2496 if (mp_ierr /= 0) goto 6002497 if (lroot) creceptor = creceptor02498 2499 #endif2500 2501 if ((WETDEP).and.(ldirect.gt.0)) then2502 call MPI_Reduce(wetgridunc, wetgridunc0, grid_size2d, mp_cp, MPI_SUM, id_root, &2503 & mp_comm_used, mp_ierr)2504 if (mp_ierr /= 0) goto 6002505 end if2506 2507 if ((DRYDEP).and.(ldirect.gt.0)) then2508 call MPI_Reduce(drygridunc, drygridunc0, grid_size2d, mp_cp, MPI_SUM, id_root, &2509 & mp_comm_used, mp_ierr)2510 if (mp_ierr /= 0) goto 6002511 end if2512 2513 2493 2514 2494 if (mp_measure_time) call mpif_mtime('commtime',1) … … 2720 2700 end if 2721 2701 2722 case ('readwind')2723 if (imode.eq.0) then2724 call cpu_time(mp_readwind_time_beg)2725 mp_readwind_wtime_beg = mpi_wtime()2726 else2727 call cpu_time(mp_readwind_time_end)2728 mp_readwind_wtime_end = mpi_wtime()2729 2730 mp_readwind_time_total = mp_readwind_time_total + &2731 &(mp_readwind_time_end - mp_readwind_time_beg)2732 mp_readwind_wtime_total = mp_readwind_wtime_total + &2733 &(mp_readwind_wtime_end - mp_readwind_wtime_beg)2734 end if2702 case ('readwind') 2703 if (imode.eq.0) then 2704 call cpu_time(mp_readwind_time_beg) 2705 mp_readwind_wtime_beg = mpi_wtime() 2706 else 2707 call cpu_time(mp_readwind_time_end) 2708 mp_readwind_wtime_end = mpi_wtime() 2709 2710 mp_readwind_time_total = mp_readwind_time_total + & 2711 &(mp_readwind_time_end - mp_readwind_time_beg) 2712 mp_readwind_wtime_total = mp_readwind_wtime_total + & 2713 &(mp_readwind_wtime_end - mp_readwind_wtime_beg) 2714 end if 2735 2715 2736 2716 case ('commtime') … … 2808 2788 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR GETFIELDS:',& 2809 2789 & mp_getfields_time_total 2810 !write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',&2811 !& mp_readwind_wtime_total2812 !write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',&2813 !& mp_readwind_time_total2790 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',& 2791 & mp_readwind_wtime_total 2792 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',& 2793 & mp_readwind_time_total 2814 2794 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR FILE IO:',& 2815 2795 & mp_io_wtime_total
Note: See TracChangeset
for help on using the changeset viewer.