﻿SUBROUTINE  EWALDP2_NEW  (myrank, mpsize)
!
!     ===== This subroutine is for CPU's with myrank of greater than 0.
!     =====                          (SLAVE process)
!
  use param
  use atomsi
  use aboxof
  use values
  use paramt
  use tables
  use counts
  use vector
  use radial
  use forces
  use cartes
  use molecu
  use quanco
  use ewal
  use charge
  use datoms
  use pos
!
!  implicit none
!
!
  integer*4   iii(3), jjjj
  double precision   VAL0N(11), VAL0NC(7)
  double precision   rrr(7,2)
  integer*4  ierr,idiatom,iquantum,ibmhexp,iwatpol,i,myrank,mpsize
  integer*4  ispme         !SPME
!
  include  'mpif.h'
  integer*4  status(MPI_STATUS_SIZE)
  integer :: local_NVNz, local_NVNz_offset, tmp_size(2), array_size, j,k
  integer,allocatable :: ircnt(:), idisp(:), idatom_pack(:), idatom_indx(:)
  real(8),allocatable :: ddatom_pack(:)
  complex(8),allocatable :: recvbuf(:,:,:)
!
1111 CONTINUE
  call MPI_Bcast (irecrd,                  9,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (idiatom,                 1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (iquantum,                1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (ibmhexp,                 1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (iwatpol,                 1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)  !WATER-POL
  call MPI_Bcast (iatomo,                  1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)   !WATER-POL
  call MPI_Bcast (ispme,                   1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)   !SPME     
  call MPI_Bcast (NTION,                   1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (NION(1),              lemw,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (IION(1),              lemw,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (IONS(1,1),          lemw*2,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (NCOMPO,                  1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (NPAIR,                   1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (WIO(1),               lemw,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (ZIO(1),               lemw,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (NDMOLE,                  1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (ZIIP(1),      ntion+ndmole,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) !WATER-POL
  call MPI_Bcast (RSWTCH(1),             lef,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (CIJ(1),                lef,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (DIJ(1),                lef,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (D4IJ(1),               lef,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (D7IJ(1),               lef,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (N3BP,                    1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (I3BP(1,1),           L3P*3,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (ANG3BP(1),             L3P,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (FK3BP(1),              L3P,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (R3BGRD(1,1),         L3P*2,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (R3BLIM(1,1),         L3P*2,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (R3LIM(1,1),          L3P*2,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (E1(1,1),         LSR*NPAIR,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (F1(1,1),         LSR*NPAIR,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (Q1U1(1,1),       LSR*NPAIR,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (Q2U1(1,1),       LSR*NPAIR,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (ZMOLE(1),                2,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (DMOLE(1,1),        4*NTION,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (DINTRA,                  1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (IDMOLE(1,1),3*NTION+NDMOLE,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (IATOM2(1),               2,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (NVNx,                    1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr) !SPME
  call MPI_Bcast (NVNy,                    1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr) !SPME
  call MPI_Bcast (NVNz,                    1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr) !SPME
  if (ispme == 1) then
       allocate(PNVxx(NVNx,NVNy,NVNz),PNVyy(NVNx,NVNy,NVNz),PNVzz(NVNx,NVNy,NVNz))
       allocate(PNVxy(NVNx,NVNy,NVNz),PNVxz(NVNx,NVNy,NVNz),PNVyz(NVNx,NVNy,NVNz))
       allocate(THREC(NVNx,NVNy,NVNz))
       allocate(ARQ(NVNx,NVNy,NVNz))
       allocate(DARQ(NVNx,NVNy,NVNz))
  endif
!
2222 continue
!
  call MPI_Bcast (iii(1),                  3,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  nrcut(1) = iii(1)
  nrcut(2) = iii(2)
  NVN      = iii(3)
  if (NVN < -9000)  go to 9999
!
  if (ispme == 1) then !SPME
      if ( .not.allocated(idisp) ) allocate( idisp(0:mpsize-1) ); idisp=0
      if ( .not.allocated(ircnt) ) allocate( ircnt(0:mpsize-1) ); ircnt=0
      call parallel_data_utility( local_NVNz, local_NVNz_offset, "pop" )
      call MPI_Allgather( local_NVNz_offset, 1, MPI_INTEGER, idisp, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr )
      call MPI_Allgather( local_NVNz       , 1, MPI_INTEGER, ircnt, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr )
      ircnt=ircnt*NVNx*NVNy
      idisp=idisp*NVNx*NVNy
      if ( all(ircnt==0) ) then
         call MPI_Bcast (ARQ(1,1,1), NVNx*NVNy*NVNz,MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD,ierr)
         call MPI_Bcast (DARQ(1,1,1), NVNx*NVNy*NVNz,MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD,ierr)
         call MPI_Bcast (THREC(1,1,1), NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
         call MPI_Bcast (PNVxx(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVyy(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVzz(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVxy(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVxz(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVyz(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
      else
         if ( allocated(recvbuf) ) deallocate(recvbuf)
         allocate( recvbuf(NVNx,NVNy,local_NVNz) ); recvbuf=(0.0d0,0.0d0)
         call MPI_Scatterv( ARQ, ircnt, idisp, MPI_DOUBLE_COMPLEX, recvbuf &
                          , NVNx*NVNy*local_NVNz, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr )
         ARQ(:,:,local_NVNz_offset+1:local_NVNz_offset+local_NVNz) = recvbuf(:,:,:)
         call MPI_Scatterv( DARQ, ircnt, idisp, MPI_DOUBLE_COMPLEX, recvbuf &
                          , NVNx*NVNy*local_NVNz, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr )
         DARQ(:,:,local_NVNz_offset+1:local_NVNz_offset+local_NVNz) = recvbuf(:,:,:)
!         call MPI_Bcast (ARQ(1,1,1), NVNx*NVNy*NVNz,MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD,ierr)
!         call MPI_Bcast (DARQ(1,1,1), NVNx*NVNy*NVNz,MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD,ierr)
         call MPI_Bcast (THREC(1,1,1), NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
         call MPI_Bcast (PNVxx(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVyy(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVzz(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVxy(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVxz(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
         call MPI_Bcast (PNVyz(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) 
      end if
      call MPI_Bcast (QCOFF,                  1, MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  endif
  call MPI_Bcast (KRDF,                    1,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)   !WATER-POL
!
  if (KRDF == 2) goto 1112     !WATER-POL
!
  call MPI_Bcast (ZII(1),       ntion+ndmole,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr) !WATER-POL
  call MPI_Bcast (nrecrd,                  9,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
  if (ispme == 0) then
    call MPI_Bcast (NVEC(1,1),           3*NVN,         MPI_INTEGER4,0,MPI_COMM_WORLD,ierr)
    call MPI_Bcast (FNV(1),                NVN,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
    call MPI_Bcast (UNV(1),                NVN,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
    call MPI_Bcast (PNV(1,1),            6*NVN,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  endif
  call MPI_Bcast (rrr(1,1),               14,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  do i=1, 6
    box(i)  = rrr(i,1)
    rbox(i) = rrr(i,2)
  end do
  rcut(1) = rrr(7,1)
  rcut(2) = rrr(7,2)
  call MPI_Bcast (E0(1),                 LTB,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (F0(1),                 LTB,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
!
  call MPI_Bcast (PX(1),        ntion+ndmole,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (PY(1),        ntion+ndmole,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (PZ(1),        ntion+ndmole,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
  call MPI_Bcast (r3limax,                 1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
!
    VAL0N(:) = 0.0D0
!
    VAL0NC(:) = 0.0D0
!
  VIRLSR = 0.0D0
  TQCE   = 0.0D0
!
      NRDF(:,:) = 0
!
  do i = 1, 4
    av3bp(1,i) = 0.0D0
    av3bp(2,i) = 0.0D0
  enddo
!
!  PI2   = PI * 2.0D0
!
  do i = 1, ntion + ndmole   !water-pol
    UI(i) = 0.0D0
    FX(i) = 0.0D0
    FY(i) = 0.0D0
    FZ(i) = 0.0D0
    UIc(i) = 0.0D0  !water-pol
  enddo
!
!     ------------------------------------------ Coulomb reciprocal term
!     ------------------------------------ Coulmb direct and short range
!
  call  EWALDP3  (idiatom, iquantum, iwatpol, ispme, val0n, val0NC,myrank, mpsize)
!
  val0n(2) = virlsr
!
!  call MPI_Recv  (iii, 1, MPI_INTEGER, 0, myrank, MPI_COMM_WORLD,status,ierr)
!  call MPI_Recv  (jjjj,                    1,         MPI_INTEGER4,0,myrank,MPI_COMM_WORLD,status,ierr)
  call MPI_Ssend (val0N(1),               11,MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)
  val0NC(7) = tqce
  call MPI_Ssend (val0NC(1),               7,MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)

  !call MPI_Ssend (ui(1),        ntion+ndmole,MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)
  !call MPI_Ssend (FX(1),        ntion+ndmole,MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)
  !call MPI_Ssend (FY(1),        ntion+ndmole,MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)
  !call MPI_Ssend (FZ(1),        ntion+ndmole,MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)
  !call MPI_Ssend (uic(1),       ntion+ndmole,MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)   !WATER-POL
  call MPI_Reduce( ui , ui, ntion+ndmole, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, ierr )
  call MPI_Reduce( fx , ui, ntion+ndmole, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, ierr )
  call MPI_Reduce( fy , ui, ntion+ndmole, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, ierr )
  call MPI_Reduce( fz , ui, ntion+ndmole, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, ierr )
  call MPI_Reduce( uic, ui, ntion+ndmole, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, ierr )

  call MPI_Ssend (NRDF(1,1),       ltb*NPAIR,        MPI_INTEGER4,0,myrank,MPI_COMM_WORLD,ierr)

  tmp_size(1) = count( idatom /= 0 )
  call MPI_Allreduce( tmp_size(1), array_size, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr )
  allocate( idatom_pack(array_size) ); idatom_pack=0
  allocate( idatom_indx(array_size) ); idatom_indx=0
  allocate( ddatom_pack(array_size) ); ddatom_pack=0.0d0
  k=0
  do j = 1, ntion+ndmole
     do i = 1, idatom101(j)
        if ( idatom(i,j) == 0 ) cycle
        k=k+1
        idatom_indx(k) = j
        idatom_pack(k) = idatom(i,j)
        ddatom_pack(k) = ddatom(i,j)
     end do ! i
  end do ! j
  call MPI_Ssend( idatom_indx, size(idatom_indx), MPI_INTEGER, 0, myrank, MPI_COMM_WORLD, ierr )
  call MPI_Ssend( idatom_pack, size(idatom_pack), MPI_INTEGER, 0, myrank, MPI_COMM_WORLD, ierr )
  call MPI_Ssend( ddatom_pack, size(ddatom_pack), MPI_REAL8  , 0, myrank, MPI_COMM_WORLD, ierr )
  deallocate( ddatom_pack )
  deallocate( idatom_indx )
  deallocate( idatom_pack )

!  call MPI_Ssend (idatom(1,1),101*(ntion+ndmole),     MPI_INTEGER4,0,myrank,MPI_COMM_WORLD,ierr)  !WATER-POL
!  call MPI_Ssend (ddatom(1,1),100*(ntion+ndmole),MPI_DOUBLE_PRECISION,0,myrank,MPI_COMM_WORLD,ierr)   !WATER-POL

  if (ispme == 1) then !SPME
    call parallel_data_utility( local_NVNz, local_NVNz_offset, "pop" )
    tmp_size(1:2) = (/ local_NVNz, local_NVNz_offset /)
    call MPI_Ssend (tmp_size,2,MPI_INTEGER,0,myrank,MPI_COMM_WORLD,ierr)
    call MPI_Ssend (DARQ(1,1,local_NVNz_offset+1),NVNx*NVNy*local_NVNz,MPI_DOUBLE_COMPLEX,0,myrank,MPI_COMM_WORLD,ierr)
!    call MPI_Ssend (DARQ(1,1,1),NVNx*NVNy*NVNz,MPI_DOUBLE_COMPLEX,0,myrank,MPI_COMM_WORLD,ierr)
  endif
!
!
1112 CONTINUE
!
!
  goto 2222
!
9999 call  MPI_Finalize  (ierr)
  STOP
END SUBROUTINE EWALDP2_NEW
