p_z_matmul Subroutine

subroutine p_z_matmul(A, B, C, Nblock, alfa, beta)

Arguments

Type IntentOptional Attributes Name
complex(kind=8), intent(in), dimension(:,:) :: A
complex(kind=8), intent(in), dimension(:,:) :: B
complex(kind=8), intent(inout), dimension(size(A,1),size(B,2)) :: C
integer :: Nblock
real(kind=8), optional :: alfa
real(kind=8), optional :: beta

Calls

proc~~p_z_matmul~2~~CallsGraph proc~p_z_matmul~2 p_z_matmul blacs_gridinfo blacs_gridinfo proc~p_z_matmul~2->blacs_gridinfo descinit descinit proc~p_z_matmul~2->descinit distribute_blacs distribute_blacs proc~p_z_matmul~2->distribute_blacs free_unit free_unit proc~p_z_matmul~2->free_unit gather_blacs gather_blacs proc~p_z_matmul~2->gather_blacs numroc numroc proc~p_z_matmul~2->numroc pzgemm pzgemm proc~p_z_matmul~2->pzgemm

Source Code

subroutine p_z_matmul(A,B,C,Nblock,alfa,beta)
  complex(8),dimension(:,:),intent(in)                    :: A ![N,K]
  complex(8),dimension(:,:),intent(in)                    :: B ![K,M]
  complex(8),dimension(size(A,1),size(B,2)),intent(inout) :: C ![N,M]
  integer                                                 :: Nblock
  real(8),optional                                        :: alfa,beta
  real(8)                                                 :: alfa_,beta_
  integer                                                 :: Nb
  integer                                                 :: N,K,M
  integer                                                 :: Nrows,Kcols,Krows,Mcols
  !
  integer                                                 :: i,j,lda,info
  integer,external                                        :: numroc,indxG2L,indxG2P,indxL2G
  !
  integer,dimension(9)                                    :: descAloc,descBloc,descCloc,descC
  complex(8),dimension(:,:),allocatable                   :: Aloc,Bloc,Cloc
  integer                                                 :: rankX,rankY
  integer                                                 :: sendR,sendC,Nipiv
  integer                                                 :: Nrow,Ncol
  integer                                                 :: myi,myj,unit
  real(8)                                                 :: t_stop,t_start
  logical                                                 :: master
  !
  ! C = alfa*A*B + beta*C
  !
  alfa_     = dcmplx(1d0,0d0) ;if(present(alfa))alfa_=alfa
  beta_     = dcmplx(0d0,0d0) ;if(present(beta))beta_=beta
  !
  !
  N = size(A,1)
  K = size(A,2) !==size(B,1)
  M = size(B,2)
  if(any(shape(B)/=[K,M]))stop "p_d_matmul error: B has illegal shape"
  if(any(shape(C)/=[N,M]))stop "p_d_matmul error: C has illegal shape"
  !
  !INIT SCALAPACK TREATMENT:
  !
  !< Get coordinate of the processes
  call blacs_gridinfo( p_context, p_Nx, p_Ny, rankX, rankY)
  !
  master = (rankX==0).AND.(rankY==0)
  if(rankX<0.AND.rankY<0)goto 1010
  !
  Nb = Nblock
  !
  Nrows = numroc(N, Nb, rankX, 0, p_Nx)
  Kcols = numroc(K, Nb, rankY, 0, p_Ny)
  Krows = numroc(K, Nb, rankX, 0, p_Nx)
  Mcols = numroc(M, Nb, rankY, 0, p_Ny)
  !
  if(master)then
     unit = free_unit()
     open(unit,file="p_dgemm.info")
     write(unit,"(A20,I8,A5,I8)")"Grid=",p_Nx,"x",p_Ny
     write(unit,"(A20,I8,A5,I8)")"Nrows x Kcols=",Nrows,"x",Kcols
     write(unit,"(A20,I8,A5,I8)")"Krows x Mcols=",Krows,"x",Mcols
  endif
  !
  !< allocate local distributed A
  !
  call descinit( descAloc, N, K, Nb, Nb, 0, 0, p_context, Nrows, info )
  call descinit( descBloc, K, M, Nb, Nb, 0, 0, p_context, Krows, info )
  call descinit( descCloc, N, M, Nb, Nb, 0, 0, p_context, Nrows, info )
  !
  !< Distribute A,B; allocate C
  allocate(Aloc(Nrows,Kcols));Aloc=zero
  allocate(Bloc(Krows,Mcols));Bloc=zero
  allocate(Cloc(Nrows,Mcols));Cloc=zero
  call Distribute_BLACS(A,Aloc,descAloc,unit)
  call Distribute_BLACS(B,Bloc,descBloc,unit)
  !
  if(master)call cpu_time(t_stop)
  if(master)write(unit,"(A20,F21.12)")"Time Distribute A,B:",t_stop-t_start
  !
  if(master)call cpu_time(t_start)
  call PZGEMM( 'No transpose', 'No transpose', N, M, K, &
       alfa_, &
       Aloc, 1, 1, descAloc, &
       Bloc, 1, 1, descBloc, &
       beta_, &
       Cloc, 1, 1, descCloc )
  !
  if(master)call cpu_time(t_stop)
  if(master)write(unit,"(A20,F21.12)")"Time matmul C_loc:",t_stop-t_start
  !
  C=dcmplx(0d0,0d0)
  call descinit( descC, N, M, Nb, Nb, 0, 0, p_context, Nrows, info )
  call Gather_BLACS(Cloc,C,descC,unit)
  !
  if(master)close(unit)
1010 continue
  return
  !
end subroutine p_z_matmul