Changes in src/mpi_mod.f90 [0ecc1fe:0c8c7f2] in flexpart.git
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
src/mpi_mod.f90
r0ecc1fe r0c8c7f2 88 88 ! Variables for MPI processes in the 'particle' group 89 89 integer, allocatable, dimension(:) :: mp_partgroup_rank 90 integer, allocatable, dimension(:) :: npart_per_process 90 91 integer :: mp_partgroup_comm, mp_partgroup_pid, mp_partgroup_np 91 92 … … 125 126 ! mp_time_barrier Measure MPI barrier time 126 127 ! mp_exact_numpart Use an extra MPI communication to give the exact number of particles 127 ! to standard output (this does *not* otherwise affect the simulation)128 ! to standard output (this does not otherwise affect the simulation) 128 129 logical, parameter :: mp_dbg_mode = .false. 129 130 logical, parameter :: mp_dev_mode = .false. … … 190 191 ! mp_np number of running processes, decided at run-time 191 192 !*********************************************************************** 192 use par_mod, only: maxpart, numwfmem, dep_prec 193 use com_mod, only: mpi_mode, verbosity 193 use par_mod, only: maxpart, numwfmem, dep_prec, maxreceptor, maxspec 194 use com_mod, only: mpi_mode, verbosity, creceptor0 194 195 195 196 implicit none … … 337 338 338 339 ! Set maxpart per process 339 ! eso08/2016: Increase maxpart per process, in case of unbalanced distribution340 ! ESO 08/2016: Increase maxpart per process, in case of unbalanced distribution 340 341 maxpart_mpi=int(mp_maxpart_factor*real(maxpart)/real(mp_partgroup_np)) 341 342 if (mp_np == 1) maxpart_mpi = maxpart … … 365 366 end if 366 367 368 ! Allocate array for number of particles per process 369 allocate(npart_per_process(0:mp_partgroup_np-1)) 370 371 ! Write whether MPI_IN_PLACE is used or not 372 #ifdef USE_MPIINPLACE 373 if (lroot) write(*,*) 'Using MPI_IN_PLACE operations' 374 #else 375 if (lroot) allocate(creceptor0(maxreceptor,maxspec)) 376 if (lroot) write(*,*) 'Not using MPI_IN_PLACE operations' 377 #endif 367 378 goto 101 368 379 … … 559 570 ! invalid particles at the end of the arrays 560 571 561 601 do i=num _part, 1, -1572 601 do i=numpart, 1, -1 562 573 if (itra1(i).eq.-999999999) then 563 574 numpart=numpart-1 … … 598 609 integer :: i,jj,nn, num_part=1,m,imin, num_trans 599 610 logical :: first_iter 600 integer,allocatable,dimension(:) :: numparticles_mpi,idx_arr611 integer,allocatable,dimension(:) :: idx_arr 601 612 real,allocatable,dimension(:) :: sorted ! TODO: we don't really need this 602 613 … … 607 618 ! All processes exchange information on number of particles 608 619 !**************************************************************************** 609 allocate(numparticles_mpi(0:mp_partgroup_np-1), & 610 &idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1)) 611 612 call MPI_Allgather(numpart, 1, MPI_INTEGER, numparticles_mpi, & 620 allocate( idx_arr(0:mp_partgroup_np-1), sorted(0:mp_partgroup_np-1)) 621 622 call MPI_Allgather(numpart, 1, MPI_INTEGER, npart_per_process, & 613 623 & 1, MPI_INTEGER, mp_comm_used, mp_ierr) 614 624 … … 616 626 ! Sort from lowest to highest 617 627 ! Initial guess: correct order 618 sorted(:) = n umparticles_mpi(:)628 sorted(:) = npart_per_process(:) 619 629 do i=0, mp_partgroup_np-1 620 630 idx_arr(i) = i 621 631 end do 632 633 ! Do not rebalance particles for ipout=3 634 if (ipout.eq.3) return 622 635 623 636 ! For each successive element in index array, see if a lower value exists … … 645 658 m=mp_partgroup_np-1 ! index for last sorted process (most particles) 646 659 do i=0,mp_partgroup_np/2-1 647 num_trans = n umparticles_mpi(idx_arr(m)) - numparticles_mpi(idx_arr(i))660 num_trans = npart_per_process(idx_arr(m)) - npart_per_process(idx_arr(i)) 648 661 if (mp_partid.eq.idx_arr(m).or.mp_partid.eq.idx_arr(i)) then 649 if ( n umparticles_mpi(idx_arr(m)).gt.mp_min_redist.and.&650 & real(num_trans)/real(n umparticles_mpi(idx_arr(m))).gt.mp_redist_fract) then662 if ( npart_per_process(idx_arr(m)).gt.mp_min_redist.and.& 663 & real(num_trans)/real(npart_per_process(idx_arr(m))).gt.mp_redist_fract) then 651 664 ! DBG 652 ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, n umparticles_mpi', &653 ! &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, n umparticles_mpi665 ! write(*,*) 'mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, npart_per_process', & 666 ! &mp_partid, idx_arr(m), idx_arr(i), mp_min_redist, num_trans, npart_per_process 654 667 ! DBG 655 668 call mpif_redist_part(itime, idx_arr(m), idx_arr(i), num_trans/2) … … 659 672 end do 660 673 661 deallocate( numparticles_mpi,idx_arr, sorted)674 deallocate(idx_arr, sorted) 662 675 663 676 end subroutine mpif_calculate_part_redist … … 1961 1974 if (readclouds) then 1962 1975 j=j+1 1963 call MPI_Irecv(ctwc(:,:,mind),d2s1 ,mp_sp,id_read,MPI_ANY_TAG,&1976 call MPI_Irecv(ctwc(:,:,mind),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,& 1964 1977 &MPI_COMM_WORLD,reqs(j),mp_ierr) 1965 1978 if (mp_ierr /= 0) goto 600 … … 2326 2339 if (readclouds) then 2327 2340 j=j+1 2328 call MPI_Irecv(ctwcn(:,:,mind,k),d2s1 ,mp_sp,id_read,MPI_ANY_TAG,&2341 call MPI_Irecv(ctwcn(:,:,mind,k),d2s1*5,mp_sp,id_read,MPI_ANY_TAG,& 2329 2342 &MPI_COMM_WORLD,reqs(j),mp_ierr) 2330 2343 if (mp_ierr /= 0) goto 600 … … 2462 2475 end if 2463 2476 2477 ! Receptor concentrations 2478 if (lroot) then 2479 call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, & 2480 & mp_comm_used,mp_ierr) 2481 if (mp_ierr /= 0) goto 600 2482 else 2483 call MPI_Reduce(creceptor,0,rcpt_size,mp_sp,MPI_SUM,id_root, & 2484 & mp_comm_used,mp_ierr) 2485 end if 2486 2464 2487 #else 2465 2488 2466 2489 call MPI_Reduce(gridunc, gridunc0, grid_size3d, mp_sp, MPI_SUM, id_root, & 2467 2490 & mp_comm_used, mp_ierr) 2491 if (mp_ierr /= 0) goto 600 2468 2492 if (lroot) gridunc = gridunc0 2493 2494 call MPI_Reduce(creceptor, creceptor0,rcpt_size,mp_sp,MPI_SUM,id_root, & 2495 & mp_comm_used,mp_ierr) 2496 if (mp_ierr /= 0) goto 600 2497 if (lroot) creceptor = creceptor0 2469 2498 2470 2499 #endif … … 2482 2511 end if 2483 2512 2484 ! Receptor concentrations2485 if (lroot) then2486 call MPI_Reduce(MPI_IN_PLACE,creceptor,rcpt_size,mp_sp,MPI_SUM,id_root, &2487 & mp_comm_used,mp_ierr)2488 if (mp_ierr /= 0) goto 6002489 else2490 call MPI_Reduce(creceptor,0,rcpt_size,mp_sp,MPI_SUM,id_root, &2491 & mp_comm_used,mp_ierr)2492 end if2493 2513 2494 2514 if (mp_measure_time) call mpif_mtime('commtime',1) … … 2700 2720 end if 2701 2721 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2722 case ('readwind') 2723 if (imode.eq.0) then 2724 call cpu_time(mp_readwind_time_beg) 2725 mp_readwind_wtime_beg = mpi_wtime() 2726 else 2727 call cpu_time(mp_readwind_time_end) 2728 mp_readwind_wtime_end = mpi_wtime() 2729 2730 mp_readwind_time_total = mp_readwind_time_total + & 2731 &(mp_readwind_time_end - mp_readwind_time_beg) 2732 mp_readwind_wtime_total = mp_readwind_wtime_total + & 2733 &(mp_readwind_wtime_end - mp_readwind_wtime_beg) 2734 end if 2715 2735 2716 2736 case ('commtime') … … 2788 2808 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR GETFIELDS:',& 2789 2809 & mp_getfields_time_total 2790 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',&2791 & mp_readwind_wtime_total2792 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',&2793 & mp_readwind_time_total2810 ! write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR READWIND:',& 2811 ! & mp_readwind_wtime_total 2812 ! write(*,FMT='(A60,TR1,F9.2)') 'TOTAL CPU TIME FOR READWIND:',& 2813 ! & mp_readwind_time_total 2794 2814 write(*,FMT='(A60,TR1,F9.2)') 'TOTAL WALL TIME FOR FILE IO:',& 2795 2815 & mp_io_wtime_total
Note: See TracChangeset
for help on using the changeset viewer.