Debian上Fortran并行计算实操指南
一 环境准备
二 方法一 OpenMP 共享内存并行
program main use omp_lib implicit none integer :: i, n = 1000000, tid, nthreads real(kind=8) :: s, x
s = 0.0d0 !$omp parallel private(tid,x) shared(s,n) reduction(+:s) tid = omp_get_thread_num() nthreads = omp_get_num_threads() !$omp do do i = 1, n x = dble(i) s = s + x*x end do !$omp end do !$omp end parallel
if (omp_get_thread_num() == 0) then print ‘("Threads = ", i0, " Sum = ", f0.0)’, nthreads, s end if end program main
三 方法二 MPI 分布式内存并行
program main use mpi_f08 implicit none type(MPI_Comm) :: comm integer :: rank, size, ierr real(kind=8) :: t0, t1
call MPI_Init(ierr) comm = MPI_COMM_WORLD call MPI_Comm_rank(comm, rank, ierr) call MPI_Comm_size(comm, size, ierr)
call cpu_time(t0) ! 示例:各进程计算局部求和 1…(rank+1)*1000 block integer :: i, local_n real(kind=8) :: local_sum, global_sum local_n = (rank + 1) * 1000 local_sum = 0.0d0 do i = 1, local_n local_sum = local_sum + dble(i) end do call MPI_Reduce(local_sum, global_sum, 1, MPI_DOUBLE_PRECISION, & MPI_SUM, 0, comm, ierr) call cpu_time(t1) if (rank == 0) then print ‘("Rank ", i0, “/”, i0, " Local sum = ", f0.0, " Global sum = ", f0.0)’, & rank, size, local_sum, global_sum print ‘("Elapsed time = “, f0.3, " s”)’, t1 - t0 end if end block
call MPI_Finalize(ierr) end program main
四 多节点与性能优化建议