pkgbuild/quantum-espresso-gpu/QE-5.0.2_GPU-r216.patch
2013-09-14 23:24:47 +04:00

266 lines
8.6 KiB
Diff

diff -uNr espresso-5.0.2/Modules/mp.f90 espresso-5.0.2-GPU/Modules/mp.f90
--- espresso-5.0.2/Modules/mp.f90 2013-05-31 14:19:32.000000000 +0100
+++ espresso-5.0.2-GPU/Modules/mp.f90 2013-05-31 14:24:52.054367809 +0100
@@ -159,6 +159,10 @@
IF (ierr/=0) CALL mp_stop( 8006 )
# endif
+#if defined(__CUDA) || defined(__PHIGEMM )
+ CALL InitCudaEnv()
+#endif
+
RETURN
END SUBROUTINE mp_start
!
@@ -172,6 +176,10 @@
ierr = 0
taskid = 0
+#if defined(__CUDA) || defined(__PHIGEMM )
+ CALL CloseCudaEnv()
+#endif
+
#if defined __HPM
! terminate the IBM Harware performance monitor
diff -uNr espresso-5.0.2/PW/src/addusdens.f90 espresso-5.0.2-GPU/PW/src/addusdens.f90
--- espresso-5.0.2/PW/src/addusdens.f90 2013-05-31 14:19:32.000000000 +0100
+++ espresso-5.0.2-GPU/PW/src/addusdens.f90 2013-05-31 14:30:03.720305082 +0100
@@ -1,5 +1,5 @@
!
-! Copyright (C) 2001-2006 Quantum ESPRESSO group
+! Copyright (C) 2001-2013 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@@ -24,7 +24,11 @@
IF ( tqr ) THEN
CALL addusdens_r(rho,.true.)
ELSE
+#if defined(__CUDA) && !defined(__DISABLE_CUDA_ADDUSDENS)
+ CALL addusdens_g_gpu(rho)
+#else
CALL addusdens_g(rho)
+#endif
END IF
!
RETURN
diff -uNr espresso-5.0.2/PW/src/cdiaghg.f90 espresso-5.0.2-GPU/PW/src/cdiaghg.f90
--- espresso-5.0.2/PW/src/cdiaghg.f90 2013-05-31 14:19:32.000000000 +0100
+++ espresso-5.0.2-GPU/PW/src/cdiaghg.f90 2013-05-31 14:29:38.584367860 +0100
@@ -1,5 +1,5 @@
!
-! Copyright (C) 2001-2006 Quantum ESPRESSO group
+! Copyright (C) 2001-2013 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@@ -17,6 +17,33 @@
! ... Hv=eSv, with H hermitean matrix, S overlap matrix.
! ... On output both matrix are unchanged
!
+ USE kinds, ONLY : DP
+ !
+ IMPLICIT NONE
+ !
+ INTEGER, INTENT(IN) :: n, m, ldh
+ COMPLEX(DP), INTENT(INOUT) :: h(ldh,n), s(ldh,n)
+ REAL(DP), INTENT(OUT) :: e(n)
+ COMPLEX(DP), INTENT(OUT) :: v(ldh,m)
+ !
+#if defined(__CUDA) && defined(__MAGMA)
+ CALL cdiaghg_gpu( n, m, h, s, ldh, e, v )
+#else
+ CALL cdiaghg_compute( n, m, h, s, ldh, e, v )
+#endif
+ !
+ RETURN
+ !
+END SUBROUTINE cdiaghg
+
+!----------------------------------------------------------------------------
+SUBROUTINE cdiaghg_compute( n, m, h, s, ldh, e, v )
+ !----------------------------------------------------------------------------
+ !
+ ! ... calculates eigenvalues and eigenvectors of the generalized problem
+ ! ... Hv=eSv, with H hermitean matrix, S overlap matrix.
+ ! ... On output both matrix are unchanged
+ !
! ... LAPACK version - uses both ZHEGV and ZHEGVX
!
USE kinds, ONLY : DP
@@ -187,7 +214,7 @@
!
RETURN
!
-END SUBROUTINE cdiaghg
+END SUBROUTINE cdiaghg_compute
!
!----------------------------------------------------------------------------
SUBROUTINE pcdiaghg( n, h, s, ldh, e, v, desc )
diff -uNr espresso-5.0.2/PW/src/newd.f90 espresso-5.0.2-GPU/PW/src/newd.f90
--- espresso-5.0.2/PW/src/newd.f90 2013-05-31 14:19:32.000000000 +0100
+++ espresso-5.0.2-GPU/PW/src/newd.f90 2013-05-31 14:28:51.751391724 +0100
@@ -1,5 +1,5 @@
!
-! Copyright (C) 2001-2010 Quantum ESPRESSO group
+! Copyright (C) 2001-2013 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@@ -10,12 +10,41 @@
CONTAINS
!---------------------------------------
+
SUBROUTINE newq(vr,deeq,skip_vltot)
!
! This routine computes the integral of the perturbed potential with
! the Q function
!
USE kinds, ONLY : DP
+ USE fft_base, ONLY : dfftp
+ USE ions_base, ONLY : nat
+ USE lsda_mod, ONLY : nspin
+ USE uspp_param, ONLY : nhm
+ !
+ IMPLICIT NONE
+ !
+ ! Input: potential , output: contribution to integral
+ REAL(kind=dp), intent(in) :: vr(dfftp%nnr,nspin)
+ REAL(kind=dp), intent(inout) :: deeq( nhm, nhm, nat, nspin )
+ LOGICAL, intent(in) :: skip_vltot
+ !
+#if defined(__CUDA) && !defined(__DISABLE_CUDA_NEWD)
+ CALL newq_compute_gpu(vr,deeq,skip_vltot)
+#else
+ CALL newq_compute(vr,deeq,skip_vltot)
+#endif
+ !
+ RETURN
+
+END SUBROUTINE newq
+
+SUBROUTINE newq_compute(vr,deeq,skip_vltot)
+ !
+ ! This routine computes the integral of the perturbed potential with
+ ! the Q function
+ !
+ USE kinds, ONLY : DP
USE ions_base, ONLY : nat, ntyp => nsp, ityp
USE cell_base, ONLY : omega
USE fft_base, ONLY : dfftp
@@ -176,7 +205,7 @@
!
DEALLOCATE( aux, qgm, qmod, ylmk0 )
!
-END SUBROUTINE newq
+END SUBROUTINE newq_compute
!---------------------------------------
SUBROUTINE newd()
USE uspp, ONLY : deeq
diff -uNr espresso-5.0.2/PW/src/pwscf.f90 espresso-5.0.2-GPU/PW/src/pwscf.f90
--- espresso-5.0.2/PW/src/pwscf.f90 2013-05-31 14:19:32.000000000 +0100
+++ espresso-5.0.2-GPU/PW/src/pwscf.f90 2013-05-31 14:25:31.013375854 +0100
@@ -36,13 +36,13 @@
!
CHARACTER(len=256) :: dirname
!
-#ifdef __MPI
- !
+
CALL mp_startup ( )
! reset IO nodes
! (do this to make each "image head node" an ionode)
! Has to be used ONLY to run nimage copies of pwscf
!
+#ifdef __MPI
IF ( nimage > 1 ) CALL io_image_start( )
#endif
CALL environment_start ( 'PWSCF' )
diff -uNr espresso-5.0.2/PW/src/rdiaghg.f90 espresso-5.0.2-GPU/PW/src/rdiaghg.f90
--- espresso-5.0.2/PW/src/rdiaghg.f90 2013-05-31 14:19:32.000000000 +0100
+++ espresso-5.0.2-GPU/PW/src/rdiaghg.f90 2013-05-31 14:27:59.078331418 +0100
@@ -1,5 +1,5 @@
!
-! Copyright (C) 2003-2006 Quantum ESPRESSO group
+! Copyright (C) 2003-2013 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@@ -14,6 +14,33 @@
! ... Hv=eSv, with H symmetric matrix, S overlap matrix.
! ... On output both matrix are unchanged
!
+ USE kinds, ONLY : DP
+ !
+ IMPLICIT NONE
+ !
+ INTEGER, INTENT(IN) :: n, m, ldh
+ REAL(DP), INTENT(INOUT) :: h(ldh,n), s(ldh,n)
+ REAL(DP), INTENT(OUT) :: e(n)
+ REAL(DP), INTENT(OUT) :: v(ldh,m)
+ !
+#if defined(__CUDA) && defined(__MAGMA)
+ CALL rdiaghg_gpu( n, m, h, s, ldh, e, v )
+#else
+ CALL rdiaghg_compute( n, m, h, s, ldh, e, v )
+#endif
+ !
+ RETURN
+ !
+END SUBROUTINE rdiaghg
+
+!----------------------------------------------------------------------------
+SUBROUTINE rdiaghg_compute( n, m, h, s, ldh, e, v )
+ !----------------------------------------------------------------------------
+ !
+ ! ... calculates eigenvalues and eigenvectors of the generalized problem
+ ! ... Hv=eSv, with H symmetric matrix, S overlap matrix.
+ ! ... On output both matrix are unchanged
+ !
! ... LAPACK version - uses both DSYGV and DSYGVX
!
USE kinds, ONLY : DP
@@ -169,7 +196,7 @@
!
RETURN
!
-END SUBROUTINE rdiaghg
+END SUBROUTINE rdiaghg_compute
!
!----------------------------------------------------------------------------
SUBROUTINE prdiaghg( n, h, s, ldh, e, v, desc )
diff -uNr espresso-5.0.2/PW/src/vloc_psi.f90 espresso-5.0.2-GPU/PW/src/vloc_psi.f90
--- espresso-5.0.2/PW/src/vloc_psi.f90 2013-05-31 14:19:32.000000000 +0100
+++ espresso-5.0.2-GPU/PW/src/vloc_psi.f90 2013-05-31 14:27:27.759367967 +0100
@@ -1,5 +1,5 @@
!
-! Copyright (C) 2003-2009 PWSCF group
+! Copyright (C) 2003-2013 PWSCF group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
@@ -37,6 +37,10 @@
COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
INTEGER :: v_siz, idx, ioff
!
+#if (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && !defined(__PARA)) || (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && defined(__PARA) && defined(__USE_3D_FFT))
+ CALL vloc_psi_gamma_gpu ( lda, n, m, psi, v, hpsi )
+ RETURN
+#endif
!
incr = 2
!
@@ -222,6 +226,10 @@
COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
INTEGER :: v_siz, idx, ioff
!
+#if (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && !defined(__PARA)) || (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && defined(__PARA) && defined(__USE_3D_FFT))
+ CALL vloc_psi_k_gpu ( lda, n, m, psi, v, hpsi )
+ RETURN
+#endif
!
! The following is dirty trick to prevent usage of task groups if
! the number of bands is smaller than the number of task groups