mirror of
https://github.com/arcan1s/pkgbuild.git
synced 2025-04-24 23:47:17 +00:00
266 lines
8.6 KiB
Diff
266 lines
8.6 KiB
Diff
diff -uNr espresso-5.0.2/Modules/mp.f90 espresso-5.0.2-GPU/Modules/mp.f90
|
|
--- espresso-5.0.2/Modules/mp.f90 2013-05-31 14:19:32.000000000 +0100
|
|
+++ espresso-5.0.2-GPU/Modules/mp.f90 2013-05-31 14:24:52.054367809 +0100
|
|
@@ -159,6 +159,10 @@
|
|
IF (ierr/=0) CALL mp_stop( 8006 )
|
|
# endif
|
|
|
|
+#if defined(__CUDA) || defined(__PHIGEMM )
|
|
+ CALL InitCudaEnv()
|
|
+#endif
|
|
+
|
|
RETURN
|
|
END SUBROUTINE mp_start
|
|
!
|
|
@@ -172,6 +176,10 @@
|
|
ierr = 0
|
|
taskid = 0
|
|
|
|
+#if defined(__CUDA) || defined(__PHIGEMM )
|
|
+ CALL CloseCudaEnv()
|
|
+#endif
|
|
+
|
|
#if defined __HPM
|
|
|
|
! terminate the IBM Harware performance monitor
|
|
diff -uNr espresso-5.0.2/PW/src/addusdens.f90 espresso-5.0.2-GPU/PW/src/addusdens.f90
|
|
--- espresso-5.0.2/PW/src/addusdens.f90 2013-05-31 14:19:32.000000000 +0100
|
|
+++ espresso-5.0.2-GPU/PW/src/addusdens.f90 2013-05-31 14:30:03.720305082 +0100
|
|
@@ -1,5 +1,5 @@
|
|
!
|
|
-! Copyright (C) 2001-2006 Quantum ESPRESSO group
|
|
+! Copyright (C) 2001-2013 Quantum ESPRESSO group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
@@ -24,7 +24,11 @@
|
|
IF ( tqr ) THEN
|
|
CALL addusdens_r(rho,.true.)
|
|
ELSE
|
|
+#if defined(__CUDA) && !defined(__DISABLE_CUDA_ADDUSDENS)
|
|
+ CALL addusdens_g_gpu(rho)
|
|
+#else
|
|
CALL addusdens_g(rho)
|
|
+#endif
|
|
END IF
|
|
!
|
|
RETURN
|
|
diff -uNr espresso-5.0.2/PW/src/cdiaghg.f90 espresso-5.0.2-GPU/PW/src/cdiaghg.f90
|
|
--- espresso-5.0.2/PW/src/cdiaghg.f90 2013-05-31 14:19:32.000000000 +0100
|
|
+++ espresso-5.0.2-GPU/PW/src/cdiaghg.f90 2013-05-31 14:29:38.584367860 +0100
|
|
@@ -1,5 +1,5 @@
|
|
!
|
|
-! Copyright (C) 2001-2006 Quantum ESPRESSO group
|
|
+! Copyright (C) 2001-2013 Quantum ESPRESSO group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
@@ -17,6 +17,33 @@
|
|
! ... Hv=eSv, with H hermitean matrix, S overlap matrix.
|
|
! ... On output both matrix are unchanged
|
|
!
|
|
+ USE kinds, ONLY : DP
|
|
+ !
|
|
+ IMPLICIT NONE
|
|
+ !
|
|
+ INTEGER, INTENT(IN) :: n, m, ldh
|
|
+ COMPLEX(DP), INTENT(INOUT) :: h(ldh,n), s(ldh,n)
|
|
+ REAL(DP), INTENT(OUT) :: e(n)
|
|
+ COMPLEX(DP), INTENT(OUT) :: v(ldh,m)
|
|
+ !
|
|
+#if defined(__CUDA) && defined(__MAGMA)
|
|
+ CALL cdiaghg_gpu( n, m, h, s, ldh, e, v )
|
|
+#else
|
|
+ CALL cdiaghg_compute( n, m, h, s, ldh, e, v )
|
|
+#endif
|
|
+ !
|
|
+ RETURN
|
|
+ !
|
|
+END SUBROUTINE cdiaghg
|
|
+
|
|
+!----------------------------------------------------------------------------
|
|
+SUBROUTINE cdiaghg_compute( n, m, h, s, ldh, e, v )
|
|
+ !----------------------------------------------------------------------------
|
|
+ !
|
|
+ ! ... calculates eigenvalues and eigenvectors of the generalized problem
|
|
+ ! ... Hv=eSv, with H hermitean matrix, S overlap matrix.
|
|
+ ! ... On output both matrix are unchanged
|
|
+ !
|
|
! ... LAPACK version - uses both ZHEGV and ZHEGVX
|
|
!
|
|
USE kinds, ONLY : DP
|
|
@@ -187,7 +214,7 @@
|
|
!
|
|
RETURN
|
|
!
|
|
-END SUBROUTINE cdiaghg
|
|
+END SUBROUTINE cdiaghg_compute
|
|
!
|
|
!----------------------------------------------------------------------------
|
|
SUBROUTINE pcdiaghg( n, h, s, ldh, e, v, desc )
|
|
diff -uNr espresso-5.0.2/PW/src/newd.f90 espresso-5.0.2-GPU/PW/src/newd.f90
|
|
--- espresso-5.0.2/PW/src/newd.f90 2013-05-31 14:19:32.000000000 +0100
|
|
+++ espresso-5.0.2-GPU/PW/src/newd.f90 2013-05-31 14:28:51.751391724 +0100
|
|
@@ -1,5 +1,5 @@
|
|
!
|
|
-! Copyright (C) 2001-2010 Quantum ESPRESSO group
|
|
+! Copyright (C) 2001-2013 Quantum ESPRESSO group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
@@ -10,12 +10,41 @@
|
|
|
|
CONTAINS
|
|
!---------------------------------------
|
|
+
|
|
SUBROUTINE newq(vr,deeq,skip_vltot)
|
|
!
|
|
! This routine computes the integral of the perturbed potential with
|
|
! the Q function
|
|
!
|
|
USE kinds, ONLY : DP
|
|
+ USE fft_base, ONLY : dfftp
|
|
+ USE ions_base, ONLY : nat
|
|
+ USE lsda_mod, ONLY : nspin
|
|
+ USE uspp_param, ONLY : nhm
|
|
+ !
|
|
+ IMPLICIT NONE
|
|
+ !
|
|
+ ! Input: potential , output: contribution to integral
|
|
+ REAL(kind=dp), intent(in) :: vr(dfftp%nnr,nspin)
|
|
+ REAL(kind=dp), intent(inout) :: deeq( nhm, nhm, nat, nspin )
|
|
+ LOGICAL, intent(in) :: skip_vltot
|
|
+ !
|
|
+#if defined(__CUDA) && !defined(__DISABLE_CUDA_NEWD)
|
|
+ CALL newq_compute_gpu(vr,deeq,skip_vltot)
|
|
+#else
|
|
+ CALL newq_compute(vr,deeq,skip_vltot)
|
|
+#endif
|
|
+ !
|
|
+ RETURN
|
|
+
|
|
+END SUBROUTINE newq
|
|
+
|
|
+SUBROUTINE newq_compute(vr,deeq,skip_vltot)
|
|
+ !
|
|
+ ! This routine computes the integral of the perturbed potential with
|
|
+ ! the Q function
|
|
+ !
|
|
+ USE kinds, ONLY : DP
|
|
USE ions_base, ONLY : nat, ntyp => nsp, ityp
|
|
USE cell_base, ONLY : omega
|
|
USE fft_base, ONLY : dfftp
|
|
@@ -176,7 +205,7 @@
|
|
!
|
|
DEALLOCATE( aux, qgm, qmod, ylmk0 )
|
|
!
|
|
-END SUBROUTINE newq
|
|
+END SUBROUTINE newq_compute
|
|
!---------------------------------------
|
|
SUBROUTINE newd()
|
|
USE uspp, ONLY : deeq
|
|
diff -uNr espresso-5.0.2/PW/src/pwscf.f90 espresso-5.0.2-GPU/PW/src/pwscf.f90
|
|
--- espresso-5.0.2/PW/src/pwscf.f90 2013-05-31 14:19:32.000000000 +0100
|
|
+++ espresso-5.0.2-GPU/PW/src/pwscf.f90 2013-05-31 14:25:31.013375854 +0100
|
|
@@ -36,13 +36,13 @@
|
|
!
|
|
CHARACTER(len=256) :: dirname
|
|
!
|
|
-#ifdef __MPI
|
|
- !
|
|
+
|
|
CALL mp_startup ( )
|
|
! reset IO nodes
|
|
! (do this to make each "image head node" an ionode)
|
|
! Has to be used ONLY to run nimage copies of pwscf
|
|
!
|
|
+#ifdef __MPI
|
|
IF ( nimage > 1 ) CALL io_image_start( )
|
|
#endif
|
|
CALL environment_start ( 'PWSCF' )
|
|
diff -uNr espresso-5.0.2/PW/src/rdiaghg.f90 espresso-5.0.2-GPU/PW/src/rdiaghg.f90
|
|
--- espresso-5.0.2/PW/src/rdiaghg.f90 2013-05-31 14:19:32.000000000 +0100
|
|
+++ espresso-5.0.2-GPU/PW/src/rdiaghg.f90 2013-05-31 14:27:59.078331418 +0100
|
|
@@ -1,5 +1,5 @@
|
|
!
|
|
-! Copyright (C) 2003-2006 Quantum ESPRESSO group
|
|
+! Copyright (C) 2003-2013 Quantum ESPRESSO group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
@@ -14,6 +14,33 @@
|
|
! ... Hv=eSv, with H symmetric matrix, S overlap matrix.
|
|
! ... On output both matrix are unchanged
|
|
!
|
|
+ USE kinds, ONLY : DP
|
|
+ !
|
|
+ IMPLICIT NONE
|
|
+ !
|
|
+ INTEGER, INTENT(IN) :: n, m, ldh
|
|
+ REAL(DP), INTENT(INOUT) :: h(ldh,n), s(ldh,n)
|
|
+ REAL(DP), INTENT(OUT) :: e(n)
|
|
+ REAL(DP), INTENT(OUT) :: v(ldh,m)
|
|
+ !
|
|
+#if defined(__CUDA) && defined(__MAGMA)
|
|
+ CALL rdiaghg_gpu( n, m, h, s, ldh, e, v )
|
|
+#else
|
|
+ CALL rdiaghg_compute( n, m, h, s, ldh, e, v )
|
|
+#endif
|
|
+ !
|
|
+ RETURN
|
|
+ !
|
|
+END SUBROUTINE rdiaghg
|
|
+
|
|
+!----------------------------------------------------------------------------
|
|
+SUBROUTINE rdiaghg_compute( n, m, h, s, ldh, e, v )
|
|
+ !----------------------------------------------------------------------------
|
|
+ !
|
|
+ ! ... calculates eigenvalues and eigenvectors of the generalized problem
|
|
+ ! ... Hv=eSv, with H symmetric matrix, S overlap matrix.
|
|
+ ! ... On output both matrix are unchanged
|
|
+ !
|
|
! ... LAPACK version - uses both DSYGV and DSYGVX
|
|
!
|
|
USE kinds, ONLY : DP
|
|
@@ -169,7 +196,7 @@
|
|
!
|
|
RETURN
|
|
!
|
|
-END SUBROUTINE rdiaghg
|
|
+END SUBROUTINE rdiaghg_compute
|
|
!
|
|
!----------------------------------------------------------------------------
|
|
SUBROUTINE prdiaghg( n, h, s, ldh, e, v, desc )
|
|
diff -uNr espresso-5.0.2/PW/src/vloc_psi.f90 espresso-5.0.2-GPU/PW/src/vloc_psi.f90
|
|
--- espresso-5.0.2/PW/src/vloc_psi.f90 2013-05-31 14:19:32.000000000 +0100
|
|
+++ espresso-5.0.2-GPU/PW/src/vloc_psi.f90 2013-05-31 14:27:27.759367967 +0100
|
|
@@ -1,5 +1,5 @@
|
|
!
|
|
-! Copyright (C) 2003-2009 PWSCF group
|
|
+! Copyright (C) 2003-2013 PWSCF group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License. See the file `License'
|
|
! in the root directory of the present distribution,
|
|
@@ -37,6 +37,10 @@
|
|
COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
|
|
INTEGER :: v_siz, idx, ioff
|
|
!
|
|
+#if (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && !defined(__PARA)) || (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && defined(__PARA) && defined(__USE_3D_FFT))
|
|
+ CALL vloc_psi_gamma_gpu ( lda, n, m, psi, v, hpsi )
|
|
+ RETURN
|
|
+#endif
|
|
!
|
|
incr = 2
|
|
!
|
|
@@ -222,6 +226,10 @@
|
|
COMPLEX(DP), ALLOCATABLE :: tg_psic(:)
|
|
INTEGER :: v_siz, idx, ioff
|
|
!
|
|
+#if (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && !defined(__PARA)) || (defined(__CUDA) && !defined(__DISABLE_CUDA_VLOCPSI) && defined(__PARA) && defined(__USE_3D_FFT))
|
|
+ CALL vloc_psi_k_gpu ( lda, n, m, psi, v, hpsi )
|
|
+ RETURN
|
|
+#endif
|
|
!
|
|
! The following is dirty trick to prevent usage of task groups if
|
|
! the number of bands is smaller than the number of task groups
|