Commit 1b3d9536 authored by Yingzhou Li's avatar Yingzhou Li
Browse files

Fixed a bug in the Davidson method, current version converges

parent c44b144c
......@@ -34,19 +34,11 @@ module ELSI_RCI_DAVIDSON
! Matrix ID of size n by n
!&<
integer(i4), parameter :: MID_HR = 21
integer(i4), parameter :: MID_HR12 = 22
integer(i4), parameter :: MID_HR21 = 23
integer(i4), parameter :: MID_HR22 = 24
integer(i4), parameter :: MID_HR1 = 25
integer(i4), parameter :: MID_HR2 = 26
integer(i4), parameter :: MID_SR = 27
integer(i4), parameter :: MID_SR12 = 28
integer(i4), parameter :: MID_SR21 = 29
integer(i4), parameter :: MID_SR22 = 30
integer(i4), parameter :: MID_SR1 = 31
integer(i4), parameter :: MID_SR2 = 32
integer(i4), parameter :: MID_VR = 33
integer(i4), parameter :: MID_VRsub = 34
integer(i4), parameter :: MID_SR = 22
integer(i4), parameter :: MID_VR = 23
integer(i4), parameter :: MID_VRsub = 24
integer(i4), parameter :: MID_WORK1 = 25
integer(i4), parameter :: MID_WORK2 = 26
!&>
! Stage ID
......@@ -101,7 +93,7 @@ contains
! Allocate matrix of size n by n
if (ijob == SID_ALLOCATE + 2) then
iter = iter + 1
if (iter > 34) then
if (iter > 26) then
call rci_op_stop(task)
else
call rci_op_allocate(iS, task, n, n, iter)
......@@ -149,7 +141,7 @@ contains
! Deallocate matrix of size n by n
if (ijob == SID_DEALLOCATE + 2) then
iter = iter + 1
if (iter > 34) then
if (iter > 26) then
call rci_op_stop(task)
else
call rci_op_deallocate(iS, task, iter)
......@@ -178,19 +170,18 @@ contains
integer :: it, iti
integer, save :: m ! size of basis
integer, save :: n ! number of current states
integer, save :: niter ! CG step num.
integer, save :: n_state ! number of wanted states
integer, save :: next ! number of extended states
integer, save :: niter ! CG step num.
integer, save :: max_iter ! max number of iterations
integer, save :: max_n ! max number of basis in reduced space
real(r8), save :: tol_iter ! convergence tolerance
logical, save, allocatable :: Mat_Conv(:)
real(r8), save, allocatable :: Mat_EW(:)
real(r8), save, allocatable :: Mat_EWsub(:)
real(r8), save, allocatable :: Mat_EWold(:)
real(r8), allocatable :: tmp_vec(:)
real(r8), save, allocatable :: Vec_conv(:)
real(r8), save, allocatable :: Vec_ev(:)
real(r8), save, allocatable :: Vec_evsub(:)
real(r8), save, allocatable :: Vec_evold(:)
!**********************************************!
! -- Init: parameter setup
......@@ -199,19 +190,28 @@ contains
m = r_h%n_basis
n = r_h%n_state
n_state = r_h%n_state
max_iter = r_h%max_iter
tol_iter = r_h%tol_iter
max_n = r_h%davidson_max_n
niter = 0
next = 0
! n = nband
allocate (Vec_conv(max_n))
Vec_conv = 0.0_r8
allocate (Vec_ev(max_n))
Vec_ev = 0.0_r8
allocate (Vec_evsub(max_n))
Vec_evsub = 0.0_r8
allocate (Vec_evold(max_n))
Vec_evold = 0.0_r8
call rci_op_null(task)
return
end if
! -- HPsi = H*Psi
if (ijob <= SID_INIT + 1) then
if (ijob == SID_INIT + 1) then
call rci_op_h_multi(iS, task, 'N', m, n, MID_Psi, MID_HPsi)
ijob = ijob + 1
return
......@@ -243,20 +243,29 @@ contains
end if
! -- HR * VR = SR * VR * Diag(EW)
! - HR = VR
! - calculate reduced generalized eigenvalue problem
if (ijob == SID_INIT + 5) then
call rci_op_copy(iS, task, 'N', MID_HR, MID_VR)
ijob = ijob + 1
return
end if
if (ijob == SID_INIT + 6) then
call rci_op_copy(iS, task, 'N', MID_SR, MID_WORK1)
ijob = ijob + 1
return
end if
if (ijob == SID_INIT + 7) then
call rci_op_hegv(iS, task, 'V', 'L', n, &
MID_HR, max_n, MID_SR, max_n)
MID_VR, max_n, MID_WORK1, max_n)
ijob = ijob + 1
return
end if
! -store old ev for convergence checking
if (ijob == SID_INIT + 6) then
Mat_EW = resvec
if (ijob == SID_INIT + 8) then
Vec_ev = resvec
Vec_evold = resvec
next = n
Mat_EWold = Mat_EW
call rci_op_null(task)
ijob = SID_ITER
return
......@@ -266,26 +275,22 @@ contains
! This is the main loop of the Davidson algorithm.
! - Redistribution of the vectors
! VRsub = VR(:,Mat_Conv) = HR(:,Mat_Conv)
! VRsub = VR(:,not Vec_conv)
if (ijob == SID_ITER) then
!TODO
resvec = 1.0_r8 - Vec_conv
call rci_op_subcol(iS, task, m, n_state, MID_VR, MID_VRsub)
niter = niter + 1
call rci_op_subcol(iS, task, m, next, &
MID_HR, MID_VRsub)
ijob = ijob + 1
return
end if
! EWsub = EW(notcvg)
! EVsub = EV(not Vec_conv)
if (ijob == SID_ITER + 1) then
!TODO
deallocate (Mat_EWsub)
allocate (Mat_EWsub(count(Mat_Conv)))
iti = 0
do it = 1, size(Mat_Conv)
if (.not. Mat_Conv(it)) then
do it = 1, n_state
if (Vec_conv(it) < 0.5_r8) then
iti = iti + 1
Mat_EWsub(iti) = Mat_EW(it)
Vec_evsub(iti) = Vec_ev(it)
end if
end do
call rci_op_null(task)
......@@ -304,7 +309,7 @@ contains
! - PsiExt = PsiExt * Diag(EWsub)
if (ijob == SID_ITER + 3) then
!TODO
resvec = Vec_evsub
call rci_op_colscale(iS, task, m, next, MID_PsiExt, m)
ijob = ijob + 1
return
......@@ -322,7 +327,7 @@ contains
! - approx solve(PsiExt)
! This is a special function only exist for Davidson method
if (ijob == SID_ITER + 5) then
!TODO
resvec = Vec_evsub
call rci_op_approx_solve(iS, task, m, next, MID_PsiExt)
ijob = ijob + 1
return
......@@ -330,7 +335,6 @@ contains
! - norm(PsiExt)
if (ijob == SID_ITER + 6) then
!TODO
call rci_op_col_norm(iS, task, m, next, MID_PsiExt, m)
ijob = ijob + 1
return
......@@ -338,11 +342,13 @@ contains
! - normize(PsiExt)
if (ijob == SID_ITER + 7) then
tmp_vec = resvec
do it = 1, next
tmp_vec(it) = 1.0_r8/resvec(it)
if (resvec(it) > 0) then
resvec(it) = 1.0_r8/resvec(it)
else
resvec(it) = 0.0_r8
end if
end do
!TODO
call rci_op_colscale(iS, task, m, next, MID_PsiExt, m)
ijob = ijob + 1
return
......@@ -360,34 +366,56 @@ contains
if (ijob <= SID_ITER + 9) then
call rci_op_gemm(iS, task, 'C', 'N', n, next, m, 1.0_r8, &
MID_Psi, m, MID_HPsiExt, m, 0.0_r8, &
MID_HR12, max_n)
MID_WORK1, max_n)
ijob = ijob + 1
return
end if
! -- HR(1:n, n+(1:next)) = HR12
! TODO
if (ijob <= SID_ITER + 10) then
call rci_op_subcopy(iS, task, n, next, &
MID_WORK1, max_n, 0, 0, &
MID_HR, max_n, 0, n)
ijob = ijob + 1
return
end if
! -- HR21 = HR12'
! TODO
if (ijob <= SID_ITER + 11) then
call rci_op_copy(iS, task, 'C', MID_WORK1, MID_WORK2)
ijob = ijob + 1
return
end if
! -- HR(n+(1:next),1:n) = HR21
! TODO
if (ijob <= SID_ITER + 12) then
call rci_op_subcopy(iS, task, next, n, &
MID_WORK2, max_n, 0, 0, &
MID_HR, max_n, n, 0)
ijob = ijob + 1
return
end if
! -- HR22 = PsiExt' * HPsiExt
if (ijob <= SID_ITER + 10) then
if (ijob <= SID_ITER + 13) then
call rci_op_gemm(iS, task, 'C', 'N', next, next, m, 1.0_r8, &
MID_PsiExt, m, MID_HPsiExt, m, 0.0_r8, &
MID_HR22, max_n)
MID_WORK1, max_n)
ijob = ijob + 1
return
end if
! -- HR(n+(1:next), n+(1:next)) = HR22
! TODO
if (ijob <= SID_ITER + 14) then
call rci_op_subcopy(iS, task, next, next, &
MID_WORK1, max_n, 0, 0, &
MID_HR, max_n, n, n)
ijob = ijob + 1
return
end if
! -- SPsiExt = S*PsiExt
if (ijob == SID_ITER + 14) then
if (ijob == SID_ITER + 15) then
call rci_op_s_multi(iS, task, 'N', m, next, &
MID_PsiExt, MID_SPsiExt)
ijob = ijob + 1
......@@ -395,53 +423,119 @@ contains
end if
! -- SR12 = Psi' * SPsiExt
if (ijob <= SID_ITER + 15) then
if (ijob == SID_ITER + 16) then
call rci_op_gemm(iS, task, 'C', 'N', n, next, m, 1.0_r8, &
MID_Psi, m, MID_SPsiExt, m, 0.0_r8, &
MID_SR12, max_n)
MID_WORK1, max_n)
ijob = ijob + 1
return
end if
! -- SR(1:n, n+(1:next)) = SR12
! TODO
if (ijob == SID_ITER + 17) then
call rci_op_subcopy(iS, task, n, next, &
MID_WORK1, max_n, 0, 0, &
MID_SR, max_n, 0, n)
ijob = ijob + 1
return
end if
! -- SR21 = SR12'
! TODO
if (ijob == SID_ITER + 18) then
call rci_op_copy(iS, task, 'C', MID_WORK1, MID_WORK2)
ijob = ijob + 1
return
end if
! -- SR(n+(1:next),1:n) = SR21
! TODO
if (ijob == SID_ITER + 19) then
call rci_op_subcopy(iS, task, next, n, &
MID_WORK2, max_n, 0, 0, &
MID_SR, max_n, n, 0)
ijob = ijob + 1
return
end if
! -- SR22 = PsiExt' * SPsiExt
if (ijob <= SID_ITER + 16) then
if (ijob == SID_ITER + 20) then
call rci_op_gemm(iS, task, 'C', 'N', next, next, m, 1.0_r8, &
MID_PsiExt, m, MID_SPsiExt, m, 0.0_r8, &
MID_SR22, max_n)
MID_WORK1, max_n)
ijob = ijob + 1
return
end if
! -- SR(n+(1:next), n+(1:next)) = SR22
! TODO
if (ijob == SID_ITER + 21) then
call rci_op_subcopy(iS, task, next, next, &
MID_WORK1, max_n, 0, 0, &
MID_SR, max_n, n, n)
ijob = ijob + 1
return
end if
! -- HR * VR = SR * VR * Diag(EW)
! - calculate reduced generalized eigenvalue problem
if (ijob == SID_ITER + 21) then
if (ijob == SID_ITER + 22) then
call rci_op_copy(iS, task, 'N', MID_HR, MID_VR)
ijob = ijob + 1
return
end if
if (ijob == SID_ITER + 23) then
call rci_op_copy(iS, task, 'N', MID_SR, MID_WORK1)
ijob = ijob + 1
return
end if
if (ijob == SID_ITER + 24) then
call rci_op_hegv(iS, task, 'V', 'L', n + next, &
MID_HR, max_n, MID_SR, max_n)
MID_VR, max_n, MID_WORK1, max_n)
ijob = ijob + 1
return
end if
! -- Psi(1:m, n+(1:next)) = PsiExt
if (ijob <= SID_ITER + 25) then
call rci_op_subcopy(iS, task, m, next, &
MID_PsiExt, m, 0, 0, &
MID_Psi, m, 0, n)
ijob = ijob + 1
return
end if
! -- HPsi(1:m, n+(1:next)) = HPsiExt
if (ijob <= SID_ITER + 26) then
call rci_op_subcopy(iS, task, m, next, &
MID_HPsiExt, m, 0, 0, &
MID_HPsi, m, 0, n)
ijob = ijob + 1
return
end if
! -- SPsi(1:m, n+(1:next)) = SPsiExt
if (ijob == SID_ITER + 27) then
call rci_op_subcopy(iS, task, m, next, &
MID_SPsiExt, m, 0, 0, &
MID_SPsi, m, 0, n)
ijob = ijob + 1
return
end if
! -store old ev for convergence checking
if (ijob == SID_ITER + 22) then
Mat_EW = resvec
Mat_Conv = (abs(Mat_EW(1:n) - Mat_EWold(1:n)) < tol_iter)
next = count(.not. Mat_Conv)
if (ijob == SID_ITER + 28) then
Vec_ev = resvec
do it = 1, n_state
if (abs(Vec_ev(it) - Vec_evold(it)/abs(Vec_ev(it))) &
< tol_iter) then
Vec_conv(it) = 1.0_r8
else
Vec_conv(it) = 0.0_r8
end if
end do
n = n + next
Mat_EWold = Mat_EW
next = n_state - int(sum(Vec_conv(1:n)), i4)
Vec_evold = Vec_ev
call rci_op_null(task)
if (next == 0 .or. n > max_n .or. niter == max_iter) then
if (next == 0 .or. n + next > max_n .or. niter >= max_iter) then
ijob = SID_FINISH
return
else
......@@ -451,24 +545,46 @@ contains
end if
! - Finish the iteration
! - PsiExt = Psi * VR
if (ijob == SID_FINISH) then
call rci_op_gemm(iS, task, 'N', 'N', m, n, n, 1.0_r8, &
MID_Psi, m, MID_VR, max_n, 0.0_r8, &
resvec = Vec_conv
iti = next
do it = 1, n
if (Vec_conv(it) < 0.5_r8) then
resvec(it) = 1.0_r8
next = next - 1
end if
if (next == 0) then
exit
end if
end do
call rci_op_subcol(iS, task, m, n_state, MID_VR, MID_VRsub)
ijob = ijob + 1
return
end if
! - PsiExt = Psi * VR
if (ijob == SID_FINISH + 1) then
call rci_op_gemm(iS, task, 'N', 'N', m, n_state, n, 1.0_r8, &
MID_Psi, m, MID_VRsub, max_n, 0.0_r8, &
MID_PsiExt, m)
ijob = ijob + 1
return
end if
! - Psi = PsiExt
if (ijob == SID_FINISH + 1) then
if (ijob == SID_FINISH + 2) then
call rci_op_copy(iS, task, 'N', MID_PsiExt, MID_Psi)
ijob = ijob + 1
return
end if
! - Converge or Stop
if (ijob == SID_FINISH + 1) then
if (ijob == SID_FINISH + 3) then
resvec(1) = sum(Vec_ev(1:n_state))
deallocate (Vec_conv)
deallocate (Vec_ev)
deallocate (Vec_evsub)
deallocate (Vec_evold)
call rci_op_converge(task, next == 0)
return
end if
......
......@@ -56,8 +56,9 @@ module ELSI_RCI_OMM
integer(i4), parameter :: SID_UPDATE = 300
integer(i4), parameter :: SID_LS_FAIL = 400
integer(i4), parameter :: SID_LS_CONV = 500
integer(i4), parameter :: SID_ALLOCATE = 600
integer(i4), parameter :: SID_DEALLOCATE = 700
integer(i4), parameter :: SID_FINISH = 600
integer(i4), parameter :: SID_ALLOCATE = 700
integer(i4), parameter :: SID_DEALLOCATE = 800
!&>
contains
......@@ -167,7 +168,6 @@ contains
!**** INPUT ***********************************!
type(rci_handle), intent(in) :: r_h
real(r8), intent(in) :: resvec(:)
!**** OUTPUT **********************************!
integer, intent(out) :: task
......@@ -175,6 +175,7 @@ contains
!**** INOUT ***********************************!
integer, intent(inout) :: ijob ! job id
type(rci_instr), intent(inout) :: iS
real(r8), intent(inout) :: resvec(:)
!**** LOCAL ***********************************!
integer, save :: m ! size of basis
......@@ -563,6 +564,7 @@ contains
ls_conv = .true.
icg = icg + 1
e_diff = 2.0_r8*abs((e_min - e_min_old)/(e_min + e_min_old))
resvec(1) = e_min
!write (*, '("Iter: ", I4, "E_min: ",E15.3,"E_diff: ",E15.3)') &
! icg, e_min, e_diff
if (e_diff <= tol_iter) then
......
......@@ -16,8 +16,8 @@ module ELSI_RCI_CONSTANTS
!&<
! Constant of solver
integer(i4), parameter :: RCI_SOLVER = -1
integer(i4), parameter :: RCI_SOLVER_DAVIDSON = 0
integer(i4), parameter :: RCI_SOLVER_OMM = 1
integer(i4), parameter :: RCI_SOLVER_DAVIDSON = 1
integer(i4), parameter :: RCI_SOLVER_OMM = 2
! Constant of ijob
integer(i4), parameter :: RCI_INIT_IJOB = -1
......@@ -34,16 +34,15 @@ module ELSI_RCI_CONSTANTS
integer(i4), parameter :: RCI_GEMM = 8
integer(i4), parameter :: RCI_AXPY = 9
integer(i4), parameter :: RCI_COPY = 10
integer(i4), parameter :: RCI_TRACE = 11
integer(i4), parameter :: RCI_DOT = 12
integer(i4), parameter :: RCI_SCALE = 13
integer(i4), parameter :: RCI_COLSCALE = 14
integer(i4), parameter :: RCI_ROWSCALE = 15
integer(i4), parameter :: RCI_HEGV = 16
integer(i4), parameter :: RCI_CBIND = 17
integer(i4), parameter :: RCI_RBIND = 18
integer(i4), parameter :: RCI_COL_NORM = 19
integer(i4), parameter :: RCI_SUBCOL = 20
integer(i4), parameter :: RCI_SUBCOPY = 11
integer(i4), parameter :: RCI_TRACE = 12
integer(i4), parameter :: RCI_DOT = 13
integer(i4), parameter :: RCI_SCALE = 14
integer(i4), parameter :: RCI_COLSCALE = 15
integer(i4), parameter :: RCI_ROWSCALE = 16
integer(i4), parameter :: RCI_HEGV = 17
integer(i4), parameter :: RCI_COL_NORM = 18
integer(i4), parameter :: RCI_SUBCOL = 19
integer(i4), parameter :: RCI_APPROX_SOLVE = 51
!&>
......
......@@ -23,11 +23,12 @@ module ELSI_RCI_DATATYPE
character :: trH, trS, trP, trA, trB ! Operation for H, S, P, A, B
integer :: m, n ! size of the output matrix
integer :: m1, m2 ! size of matrices in binding
integer :: n1, n2 ! size of matrices in binding
integer :: k ! size of the intermedia multiplication
integer :: lda, ldb, ldc ! leading size for matrix A, B and C
integer :: rAoff,cAoff ! row and column offset of A
integer :: rBoff,cBoff ! row and column offset of B
integer :: Aidx, Bidx, Cidx ! indices for matrix A, B and C
real(r8) :: alpha, beta ! coefficients
......@@ -46,6 +47,7 @@ module ELSI_RCI_DATATYPE
! Iteration
integer(i4) :: max_iter
integer(i4) :: n_res ! size of resvec
real(r8) :: tol_iter
! Physics
......
......@@ -51,9 +51,8 @@ contains
! B = A^(trA)
! - rci_op_copy(iS,task,trA,Aidx,Bidx)
! B(roffB:rendB,coffB:cendB) = A(roffA:rendA,coffA:cendA)
! TODO
! - rci_op_subcopy(iS,task,trA,m,n,Aidx,Bidx)
! B(rBoff+(1:m),cBoff+(1:n)) = A(rAoff+(1:m),cAoff+(1:n))
! - rci_op_subcopy(iS,task,m,n,Aidx,rAoff,cAoff,Bidx,rBoff,cBoff)
! A = alpha * A
! - rci_op_scale(iS,task,m,n,alpha,Aidx,lda)
......@@ -346,6 +345,39 @@ contains
end subroutine rci_op_copy