diff --git a/configure b/configure
index cac64da731..3cabc9155e 100755
--- a/configure
+++ b/configure
@@ -2823,10 +2823,11 @@ static char *f (char * (*g) (char **, int), char **p, ...)
    that is true only with -std.  */
 int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1];
 
-/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
-   inside strings and character constants.  */
-#define FOO(x) '\''x'\''
-int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1];
+SVERSION="5"
+SSUBVERSION="1"
+SPATCHLEVEL="0"
+SREVISION="21574"
+SHASH="08ac288ff"
 
 int test (int i, double x);
 struct s1 {int (*f) (int a);};
diff --git a/driver/yambo.F b/driver/yambo.F
index 35dcab83ca..42190a6c7f 100644
--- a/driver/yambo.F
+++ b/driver/yambo.F
@@ -187,7 +187,7 @@ integer function yambo(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_d
  !
  driver_now=l_HF_and_locXC.and..not.any((/l_sc_run,l_eval_collisions,l_real_time/))
  !
- if (driver_now) call XCo_driver(en,k,Xk,q)
+ if (driver_now) call XCo_driver(en,k,Xk,q,Dip)
  if (driver_now) call mem_manager_report
  !
  ! EXTENDED COLLISIONS 
diff --git a/include/driver/version.h b/include/driver/version.h
new file mode 100644
index 0000000000..d0dc6efe44
--- /dev/null
+++ b/include/driver/version.h
@@ -0,0 +1,30 @@
+/*
+         Copyright (C) 2000-2022 the YAMBO team
+               http://www.yambo-code.org
+ 
+  Authors (see AUTHORS file for details): AM
+  
+  This file is distributed under the terms of the GNU 
+  General Public License. You can redistribute it and/or 
+  modify it under the terms of the GNU General Public 
+  License as published by the Free Software Foundation; 
+  either version 2, or (at your option) any later version.
+ 
+  This program is distributed in the hope that it will 
+  be useful, but WITHOUT ANY WARRANTY; without even the 
+  implied warranty of MERCHANTABILITY or FITNESS FOR A 
+  PARTICULAR PURPOSE.  See the GNU General Public License 
+  for more details.
+ 
+  You should have received a copy of the GNU General Public 
+  License along with this program; if not, write to the Free 
+  Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+  MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+*/
+
+#define YAMBO_VERSION 5
+#define YAMBO_SUBVERSION 1
+#define YAMBO_PATCHLEVEL 0
+#define YAMBO_REVISION 21786
+#define YAMBO_HASH "995236b2a" 
+
diff --git a/interfaces/int_modules/mod_com2y.F b/interfaces/int_modules/mod_com2y.F
index e42298891c..3ca89e8a8c 100644
--- a/interfaces/int_modules/mod_com2y.F
+++ b/interfaces/int_modules/mod_com2y.F
@@ -16,6 +16,7 @@ module mod_com2y
  logical :: force_noSYMM
  logical :: artificial_spin_pol
  logical :: verboseIO
+ logical :: write_Vloc_Vnl
  !
  integer :: ng_vec_abinit, wf_nb_io_user
  !
@@ -47,6 +48,7 @@ subroutine interface_presets(in_string)
      force_noWFs          =index(in_string,'nowf')>0
      artificial_spin_pol  =index(in_string,'dupl')>0
      verboseIO            =index(in_string,'verb')>0
+     write_Vloc_Vnl       =index(in_string,'pseu')>0
      !
      alat_mult_factor=1.
      wf_nb_io_user=0
diff --git a/interfaces/p2y/mod_p2y.F b/interfaces/p2y/mod_p2y.F
index 5f811e96cd..147180c336 100644
--- a/interfaces/p2y/mod_p2y.F
+++ b/interfaces/p2y/mod_p2y.F
@@ -14,7 +14,7 @@ module P2Ym
  use pars,                  ONLY : lchlen,SP,DP,rZERO
  use electrons,             ONLY : levels
  use R_lattice,             ONLY : bz_samp
- use mod_com2y,             ONLY : verboseIO
+ use mod_com2y,             ONLY : verboseIO,write_Vloc_Vnl
  use parallel_m,            ONLY : myid
  use parallel_int,          ONLY : PP_bcast   
  use units,                 ONLY : Da2AU
@@ -851,7 +851,9 @@ subroutine get_xc
       if (ierr/=0) call errore('qexml_read_xc','IOTK error',abs(ierr))
       !
       if (trim(pw_dft)=="B3LYP") call warning('For a full compatible B3LYP calculation consider to use input_dft=B3LYP-V1R')
-      if(pw_lda_plus_u) call error(' LDA+U. Hubbard correction is not considered in yambo.')
+      if (pw_lda_plus_u.and..not.write_Vloc_Vnl) then
+        call error(' DFT+U. Rerun p2y with "-p" flag and use the BareHfromScratch flag for GW runs.')
+      endif
       GS_xc_FUNCTIONAL = XC_yamboID('pwscf_',pw_func=pw_dft)
       if (GS_xc_FUNCTIONAL.eq.XC_HYB_GGA_XC_GAUPBE*XC_FACTOR) then
         GS_xc_KIND = 3
@@ -890,7 +892,9 @@ subroutine get_xc
       if (ierr/=0) call errore('qexsd_read_xc','fmt error',abs(ierr))
       !
       if (trim(pw_dft)=="B3LYP") call warning('For a full compatible B3LYP calculation consider to use input_dft=B3LYP-V1R')
-      if(pw_lda_plus_u) call error(' LDA+U. Hubbard correction is not considered in yambo.')
+      if (pw_lda_plus_u.and..not.write_Vloc_Vnl) then
+        call error(' DFT+U. Rerun p2y with "-p" flag and use the BareHfromScratch flag for GW runs.')
+      endif
       GS_xc_FUNCTIONAL = XC_yamboID('pwscf_',pw_func=pw_dft)
       if (GS_xc_FUNCTIONAL.eq.XC_HYB_GGA_XC_GAUPBE*XC_FACTOR) then
         GS_xc_KIND = 3
diff --git a/interfaces/p2y/p2y_pseudo.F b/interfaces/p2y/p2y_pseudo.F
index 7a34679f85..badd5a315a 100644
--- a/interfaces/p2y/p2y_pseudo.F
+++ b/interfaces/p2y/p2y_pseudo.F
@@ -22,6 +22,9 @@ subroutine p2y_pseudo(k)
  use IO_int,                ONLY:io_control
  use IO_m,                  ONLY:OP_WR_CL,OP_APP_CL,REP
  use read_pseudo_mod,       ONLY:readpp
+ use mod_com2y,             ONLY:write_Vloc_Vnl
+ use pw_data,               ONLY:ngm_,nsp_
+ use vlocal,                ONLY:vloc_yambo
  !
  implicit none
  !
@@ -33,12 +36,14 @@ subroutine p2y_pseudo(k)
  !
  ! Work space
  !
- integer        :: ngmax,ID,ibeta,ik,io_err,itype
+ integer        :: ngmax,ID,ibeta,ik,io_err,itype,ID_vloc
  character(64)  :: input_dft="none"
+ logical        :: l_Vnl
  !
  integer, external :: io_KB_pwscf
  integer, external :: io_NLCC_pwscf
  integer, external :: io_USPP_pwscf
+ integer, external :: io_VLOC_pwscf
  !
  interface
     subroutine fill_basis(basis,struct,ik,ngmax)
@@ -71,57 +76,67 @@ end subroutine fill_basis_rho
  !
  call pw_basis_init(basis_con,struct)
  !
- if( all(atoms%pseudo(:)%nbeta==0) ) return
- pp_n_l_times_proj_max=maxval((/atoms%pseudo(:)%nbeta/))
- !
- allocate(pp_table(3,n_atomic_species,pp_n_l_times_proj_max))
- allocate(pp_n_l_comp(n_atomic_species))
- call PP_alloc_PWscf()
- pp_table=0
- !
- pp_n_l_max=0
- do itype=1,n_atomic_species
-   pp_n_l_comp(itype)=maxval((/atoms%pseudo(itype)%lbeta(:)/))
-   if(atoms%pseudo(itype)%nbeta==0) pp_n_l_comp(itype)=0   
-   do ibeta=1,atoms%pseudo(itype)%nbeta
-     pp_n_l_max=(max(pp_n_l_max,atoms%pseudo(itype)%lbeta(ibeta)+1))
-     pp_table(1,itype,ibeta) = atoms%pseudo(itype)%lbeta(ibeta)+1               ! l+1
-     pp_table(2,itype,ibeta) = 0
-     if(atoms%pseudo(itype)%has_so) &
-&      pp_table(2,itype,ibeta) = nint(2._SP*atoms%pseudo(itype)%jbeta(ibeta))   ! 2j
-   enddo
- enddo
- pp_table(3,:,:)=1                                                              ! pp_spin
+ l_Vnl=.true.
+ if( all(atoms%pseudo(:)%nbeta==0) ) l_Vnl=.false. 
  !
- call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID)
- io_err=io_KB_pwscf(ID)
+ if(.not.l_Vnl.and..not.write_Vloc_Vnl) return
  !
- do ik=1,k%nibz
+ if(l_Vnl) then
    !
-   call fill_basis(basis_con,struct,ik,ngmax)
-   call PP_PWscf_comp(basis_con,atoms)
+   pp_n_l_times_proj_max=maxval((/atoms%pseudo(:)%nbeta/))
    !
-   ! Write pseudovelocity to disk
-   !
-   call io_control(ACTION=OP_APP_CL,COM=REP,SEC=(/ik+1/),ID=ID)
-   io_err=io_KB_pwscf(ID)
+   allocate(pp_table(3,n_atomic_species,pp_n_l_times_proj_max))
+   allocate(pp_n_l_comp(n_atomic_species))
+   call PP_alloc_PWscf()
+   pp_table=0
    !
- end do
- !
- ! write NLCC info if needed
- !
- if( any(atoms%pseudo(:)%has_nlcc) ) then
+   pp_n_l_max=0
+   do itype=1,n_atomic_species
+     pp_n_l_comp(itype)=maxval((/atoms%pseudo(itype)%lbeta(:)/))
+     if(atoms%pseudo(itype)%nbeta==0) pp_n_l_comp(itype)=0   
+     do ibeta=1,atoms%pseudo(itype)%nbeta
+       pp_n_l_max=(max(pp_n_l_max,atoms%pseudo(itype)%lbeta(ibeta)+1))
+       pp_table(1,itype,ibeta) = atoms%pseudo(itype)%lbeta(ibeta)+1               ! l+1
+       pp_table(2,itype,ibeta) = 0
+       if(atoms%pseudo(itype)%has_so) &
+&        pp_table(2,itype,ibeta) = nint(2._SP*atoms%pseudo(itype)%jbeta(ibeta))   ! 2j
+     enddo
+   enddo
+   pp_table(3,:,:)=1                                                              ! pp_spin
    !
-   call PP_nlcc_alloc()
+   call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID)
+   io_err=io_KB_pwscf(ID)
    !
-   call fill_basis_rho(basis_con,struct,ng_vec)
-   call PP_PWscf_comp_nlcc(basis_con,atoms)
+   do ik=1,k%nibz
+     !
+     call fill_basis(basis_con,struct,ik,ngmax)
+     call PP_PWscf_comp(basis_con,atoms)
+     !
+     ! Write pseudovelocity to disk
+     !
+     call io_control(ACTION=OP_APP_CL,COM=REP,SEC=(/ik+1/),ID=ID)
+     io_err=io_KB_pwscf(ID)
+     !
+   end do
    !
-   call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID)
-   io_err=io_NLCC_pwscf(ID)
+   ! write NLCC info if needed
    !
-   call PP_nlcc_free()
+   if( any(atoms%pseudo(:)%has_nlcc) ) then
+     !
+     call PP_nlcc_alloc()
+     !
+     call fill_basis_rho(basis_con,struct,ng_vec)
+     call PP_PWscf_comp_nlcc(basis_con,atoms)
+     !
+     call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID)
+     io_err=io_NLCC_pwscf(ID)
+     !
+     call PP_nlcc_free()
+     !
+   endif
    !
+ else
+   call warning(' Pseudo-potentials contain only local part!! ')
  endif
  !
  ! write USPP data
@@ -131,14 +146,25 @@ end subroutine fill_basis_rho
  !
  call pre_init()
  call allocate_nlpot()
- !
- if ( any(atoms%pseudo(:)%is_uspp) ) then
+ ! [FP]
+ if ( any(atoms%pseudo(:)%is_uspp) .or. write_Vloc_Vnl ) then
    !
 #ifndef _USPP
-   call error("[PPs] Ultrasoft PP not supported")
+   if (.not. write_Vloc_Vnl) call error("[PPs] Ultrasoft PP not supported")
 #endif
+   ! [FP]
    call init_us_1()
    !
+   if (write_Vloc_Vnl) then
+     !
+     allocate(vloc_yambo(ngm_,nsp_))
+     call set_vloc_yambo()
+     !
+     call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID_vloc)
+     io_err = io_VLOC_pwscf(ID_vloc)
+     !
+   endif
+   !
    call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID)
    io_err=io_USPP_pwscf(ID)
    !
@@ -149,7 +175,8 @@ end subroutine fill_basis_rho
  ! cleanup
  !
  call PP_free()
- deallocate(pp_table,pp_n_l_comp)
+ if (allocated(pp_table))   deallocate(pp_table,pp_n_l_comp)
+ if (allocated(vloc_yambo)) deallocate(vloc_yambo) 
  !
  return
  !
@@ -309,3 +336,32 @@ subroutine fill_basis_rho(basis,struct,ngmax)
  ! 
  return
 end subroutine fill_basis_rho
+!
+subroutine set_vloc_yambo()
+ use gvect,     ONLY: ngl,igtongl,gshells
+ use pw_data,   ONLY: ngm_,nsp_
+ use vlocal,    ONLY: vloc,vloc_yambo
+ implicit none
+ integer :: ig
+ !
+ ! Allocate shells: gl, ngl, igtongl
+ !
+ call gshells()
+ !
+ ! QE vloc without strf is dimensioned over |G|^2 shells (ngl)
+ !
+ allocate(vloc(ngl,nsp_))
+ !
+ ! Calculate QE vloc
+ !
+ call init_vloc()
+ !
+ ! Set up vloc as a function of Gvects and not Gshells
+ !
+ do ig=1,ngm_
+   vloc_yambo(ig,:) = vloc(igtongl(ig),:)
+ enddo
+ !
+ deallocate(vloc)
+ !
+end subroutine set_vloc_yambo 
diff --git a/lib/qe_pseudo/.objects b/lib/qe_pseudo/.objects
index 435e791d89..f9efab499d 100644
--- a/lib/qe_pseudo/.objects
+++ b/lib/qe_pseudo/.objects
@@ -5,4 +5,5 @@ PP_objects = atom.o becmod.o constants.o kind.o parameters.o pseudo_types.o s_ps
              sph_ind.o spinor.o sph_bes.o qvan2.o setqf.o matches.o erf.o allocate_nlpot.o \
              init_run.o qe_pseudo_module.o qe_errore.o addusdens.o sum_bec.o \
              d_matrix.o
-objs = $(PP_objects)
+VLOC_objects = vlocal.o init_vloc.o vloc_of_g.o
+objs = $(PP_objects) $(VLOC_objects)
diff --git a/lib/qe_pseudo/DOUBLE_project.dep b/lib/qe_pseudo/DOUBLE_project.dep
index eb9021637a..b393d30a64 100644
--- a/lib/qe_pseudo/DOUBLE_project.dep
+++ b/lib/qe_pseudo/DOUBLE_project.dep
@@ -9,6 +9,7 @@
  init_us_0.o
  init_us_1.o
  init_us_2.o
+ init_vloc.o
  invmat.o
  kind.o
  matches.o
@@ -38,5 +39,7 @@
  upf_to_internal.o
  us_module.o
  uspp.o
+ vloc_of_g.o
+ vlocal.o
  ylmr2.o
 
diff --git a/lib/qe_pseudo/init_vloc.F b/lib/qe_pseudo/init_vloc.F
new file mode 100644
index 0000000000..252bc184f7
--- /dev/null
+++ b/lib/qe_pseudo/init_vloc.F
@@ -0,0 +1,49 @@
+!
+! Copyright (C) 2001-2007 Quantum ESPRESSO group
+! This file is distributed under the terms of the
+! GNU General Public License. See the file `License'
+! in the root directory of the present distribution,
+! or http://www.gnu.org/copyleft/gpl.txt .
+!
+!
+!----------------------------------------------------------------------
+SUBROUTINE init_vloc()
+  !----------------------------------------------------------------------
+  !! This routine computes the fourier coefficient of the local
+  !! potential vloc(ig,it) for each type of atom.
+  !
+  USE atom,           ONLY : msh, rgrid
+  !USE m_gth,          ONLY : vloc_gth
+  USE kinds,          ONLY : DP
+  USE uspp_param,     ONLY : upf
+  USE ions_base,      ONLY : ntyp => nsp
+  USE cell_base,      ONLY : omega, tpiba ! [FP] tpiba2 not in module
+  USE vlocal,         ONLY : vloc
+  USE gvect,          ONLY : ngl, gl, gshells
+  !USE Coul_cut_2D,    ONLY : do_cutoff_2D, cutoff_lr_Vloc
+  !
+  IMPLICIT NONE
+  !
+  INTEGER :: nt
+  ! counter on atomic types
+  !
+  vloc(:,:) = 0._DP
+  !
+  do nt = 1, ntyp
+     !
+     ! compute V_loc(G) for a given type of atom
+     !
+     ! [FP] We only consider the normal case. Check the original
+     !      QE subroutine of the same name for more options
+     !
+     ! normal case
+     !
+     call vloc_of_g( rgrid(nt)%mesh, msh(nt), rgrid(nt)%rab, rgrid(nt)%r, &
+                     upf(nt)%vloc(1), upf(nt)%zp, tpiba**2._DP, ngl, gl, omega, vloc(1,nt) )
+     !
+  enddo
+  !
+  ! [FP] QE cutoff_2D goes here. Not supported so far
+  !
+END SUBROUTINE init_vloc
+
diff --git a/lib/qe_pseudo/qe_pseudo_module.F b/lib/qe_pseudo/qe_pseudo_module.F
index 4f4668cf45..6e1ddec6e1 100644
--- a/lib/qe_pseudo/qe_pseudo_module.F
+++ b/lib/qe_pseudo/qe_pseudo_module.F
@@ -44,7 +44,7 @@ subroutine qe_pseudo_allocate()
     allocate(tau(3,nat))
     !
     call allocate_nlpot()
-    ! 
+    !
   end subroutine
 
   subroutine qe_pseudo_deallocate()
diff --git a/lib/qe_pseudo/recvec.F b/lib/qe_pseudo/recvec.F
index d59c0ea4b7..46d07e5084 100644
--- a/lib/qe_pseudo/recvec.F
+++ b/lib/qe_pseudo/recvec.F
@@ -122,6 +122,62 @@ SUBROUTINE deallocate_gvect_exx()
        IF( ALLOCATED( igtongl ) ) DEALLOCATE( igtongl )
        IF( ALLOCATED( ig_l2g ) ) DEALLOCATE( ig_l2g )
      END SUBROUTINE deallocate_gvect_exx
+
+     !-----------------------------------------------------------------------
+     SUBROUTINE gshells ( )!vc )
+        !----------------------------------------------------------------------
+        !
+        ! calculate number of G shells: ngl, and the index ng = igtongl(ig)
+        ! that gives the shell index ng for (local) G-vector of index ig
+        !
+        USE kinds,              ONLY : DP
+        USE constants,          ONLY : eps8
+        !
+        IMPLICIT NONE
+        !
+        !LOGICAL, INTENT(IN) :: vc
+        !
+        INTEGER :: ng, igl
+        !
+        ! [FP]
+        ALLOCATE( igtongl(ngm) )
+        ! [FP]
+        !IF ( vc ) THEN
+           !
+           ! in case of a variable cell run each G vector has its shell
+           !
+           !ngl = ngm
+           !gl => gg
+           !DO ng = 1, ngm
+           !   igtongl (ng) = ng
+           !ENDDO
+        !ELSE
+           !
+           ! G vectors are grouped in shells with the same norm
+           !
+        ngl = 1
+        igtongl (1) = 1
+        DO ng = 2, ngm
+           IF (gg (ng) > gg (ng - 1) + eps8) THEN
+              ngl = ngl + 1
+           ENDIF
+           igtongl (ng) = ngl
+        ENDDO
+
+        ALLOCATE (gl( ngl))
+        gl (1) = gg (1)
+        igl = 1
+        DO ng = 2, ngm
+           IF (gg (ng) > gg (ng - 1) + eps8) THEN
+              igl = igl + 1
+              gl (igl) = gg (ng)
+           ENDIF
+        ENDDO
+
+        IF (igl /= ngl) CALL errore ('gshells', 'igl <> ngl', ngl)
+
+        !ENDIF
+     END SUBROUTINE gshells
 !=----------------------------------------------------------------------------=!
    END MODULE gvect
 !=----------------------------------------------------------------------------=!
diff --git a/lib/qe_pseudo/vloc_of_g.F b/lib/qe_pseudo/vloc_of_g.F
new file mode 100644
index 0000000000..faa9c3641d
--- /dev/null
+++ b/lib/qe_pseudo/vloc_of_g.F
@@ -0,0 +1,108 @@
+!
+! Copyright (C) 2001-2007 Quantum ESPRESSO group
+! This file is distributed under the terms of the
+! GNU General Public License. See the file `License'
+! in the root directory of the present distribution,
+! or http://www.gnu.org/copyleft/gpl.txt .
+!
+!
+!----------------------------------------------------------------------
+subroutine vloc_of_g (mesh, msh, rab, r, vloc_at, zp, tpiba2, ngl, &
+     gl, omega, vloc)
+  !----------------------------------------------------------------------
+  !
+  !    This routine computes the Fourier transform of the local
+  !    part of an atomic pseudopotential, given in numerical form.
+  !    A term erf(r)/r is subtracted in real space (thus making the
+  !    function short-ramged) and added again in G space (for G<>0)
+  !    The G=0 term contains \int (V_loc(r)+ Ze^2/r) 4pi r^2 dr.
+  !    This is the "alpha" in the so-called "alpha Z" term of the energy.
+  !    Atomic Ry units everywhere.
+  !
+  !   [FP] Unit check (Ry or Ha for yambo)
+  !
+  USE kinds
+  USE constants, ONLY : pi, fpi, e2, eps8
+  !USE esm, ONLY : do_comp_esm, esm_bc
+  !USE Coul_cut_2D, ONLY: do_cutoff_2D, lz 
+  implicit none
+  !
+  !    first the dummy variables
+  !
+  integer, intent(in) :: ngl, mesh, msh
+  ! ngl : the number of shells of G vectors
+  ! mesh: number of grid points in the radial grid
+  ! msh : as above, used for radial integration
+  !
+  real(DP), intent(in) :: zp, rab (mesh), r (mesh), vloc_at (mesh), tpiba2, &
+                          omega, gl (ngl)
+  ! zp : valence pseudocharge
+  ! rab: the derivative of mesh points
+  ! r  : the mesh points
+  ! vloc_at: local part of the atomic pseudopotential on the radial mesh
+  ! tpiba2 : 2 pi / alat
+  ! omega  : the volume of the unit cell
+  ! gl     : the moduli of g vectors for each shell
+  !
+  real(DP), intent(out):: vloc (ngl)
+  !
+  ! vloc: the fourier transform of the potential
+  !
+  !    local variables
+  !
+  real(DP) :: vlcp, fac, gx
+  real(DP), allocatable :: aux (:), aux1 (:)
+  integer :: igl, igl0, ir
+  ! igl :counter on g shells vectors
+  ! igl0:first shell with g != 0
+  ! ir  :counter on mesh points
+  !
+  real(DP), external :: qe_erf
+  !
+  allocate ( aux(msh), aux1(msh) )
+  if (gl (1) < eps8) then
+     !
+     ! first the G=0 term
+     !
+     ! [FP] QE esm and 2D_cutoff for G=0 go here. So far not supported.
+     !      Check the original subroutine of the same name in QE.
+     !
+     do ir = 1, msh
+        aux (ir) = r (ir) * (r (ir) * vloc_at (ir) + zp * e2)
+     enddo
+     !
+     call simpson (msh, aux, rab, vlcp)
+     vloc (1) = vlcp        
+     igl0 = 2
+  else
+     igl0 = 1
+  endif
+  !
+  !   here the G<>0 terms, we first compute the part of the integrand 
+  !   function independent of |G| in real space
+  !
+  do ir = 1, msh
+     aux1 (ir) = r (ir) * vloc_at (ir) + zp * e2 * qe_erf (r (ir) )
+  enddo
+  fac = zp * e2 / tpiba2
+  !
+  !    and here we perform the integral, after multiplying for the |G|
+  !    dependent part
+  !
+  do igl = igl0, ngl
+     gx = sqrt (gl (igl) * tpiba2)
+     do ir = 1, msh
+        aux (ir) = aux1 (ir) * sin (gx * r (ir) ) / gx
+     enddo
+     call simpson (msh, aux, rab, vlcp)
+     !
+     ! [FP] QE esm and cutoff_2D for G<>0 go here.
+     !
+     vlcp = vlcp - fac * exp ( - gl (igl) * tpiba2 * 0.25d0) / gl (igl)
+     vloc (igl) = vlcp
+  enddo
+  vloc (:) = vloc(:) * fpi / omega
+  deallocate (aux, aux1)
+
+!return
+end subroutine vloc_of_g
diff --git a/lib/qe_pseudo/vlocal.F b/lib/qe_pseudo/vlocal.F
new file mode 100644
index 0000000000..45ca9d49f3
--- /dev/null
+++ b/lib/qe_pseudo/vlocal.F
@@ -0,0 +1,74 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): FP
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+module vlocal
+   use kinds, only : DP
+   real(DP), allocatable    :: vloc(:,:)
+   ! vloc(ngl,ntyp) from QE [only needed by p2y]
+   real(DP), allocatable    :: vloc_yambo(:,:)
+   ! vloc_yambo(ngm,ntyp) stored in ns.vloc_pwscf by p2y
+   complex(DP), allocatable :: vloc_full(:)
+   ! vloc_full(ngm) including structure factors
+   complex(DP), allocatable :: strf(:,:)
+   ! srtf(ngm,ntyp) structure factors
+
+   contains
+
+     subroutine vloc_alloc(do_vloc)
+       !
+       ! Allocate Vlocal-related quantities for yambo
+       !
+       ! NB: p2y allocates vloc_yambo and strf independently
+       !
+       use gvect,      ONLY: ngm
+       use ions_base,  ONLY: nsp
+       !
+       logical, intent(in), optional  :: do_vloc
+       !
+       ! Allocate vloc_yambo if reconstructing bare Hamiltonian...
+       !
+       if (present(do_vloc)) then
+         !
+         if (.not.allocated(vloc_yambo)) allocate(vloc_yambo(ngm,nsp))
+         !
+         if (.not.allocated(vloc_full)) allocate(vloc_full(ngm)) 
+         !
+       endif
+       !
+       ! ... otherwise just allocate structure factors
+       !
+       if (.not.allocated(strf)) allocate(strf(ngm,nsp))
+       !
+     end subroutine vloc_alloc
+     !
+     subroutine vloc_dealloc()
+       !
+       ! Dellocate Vlocal-related quantities
+       !
+       if (allocated(vloc_yambo)) deallocate(vloc_yambo) 
+       if (allocated(vloc_full))  deallocate(vloc_full)
+       if (allocated(strf))       deallocate(strf)
+       !
+     end subroutine vloc_dealloc
+     !
+end module
diff --git a/src/dipoles/DIPOLE_driver.F b/src/dipoles/DIPOLE_driver.F
index 5e8991566a..bba20ea49b 100644
--- a/src/dipoles/DIPOLE_driver.F
+++ b/src/dipoles/DIPOLE_driver.F
@@ -49,6 +49,7 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
 #if defined _SLEPC && !defined _NL
  use BS_solvers,     ONLY:BSS_mode
 #endif
+ use com,            ONLY:exp_user
  !
  use timing_m,       ONLY:timing
  !
@@ -62,7 +63,7 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  !
  integer           :: ik,io_err
  character(schlen) :: msg
- logical           :: l_warning,use_dipole_transverse,idir_not_done(3)
+ logical           :: l_warning,use_dipole_transverse,idir_not_done(3),l_Hbare
  !
  call timing('Dipoles',OPR='start')
  !
@@ -91,6 +92,8 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
    endif
  endif
 #endif
+ ! Needed for kinetic part of bare Hamiltonian
+ call parser('BareHfromScratch', l_Hbare)
  !
  ! Setup logicals 
  !================
@@ -102,6 +105,8 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  compute_P2_dipoles          = index(Dip%computed,"P2")  /=0
  compute_Spin_dipoles        = index(Dip%computed,"Spin")/=0
  compute_Orb_dipoles         = index(Dip%computed,"Orb") /=0
+
+ compute_P2_dipoles          = compute_P2_dipoles .or. l_Hbare
 #if defined _SC
  compute_P2_dipoles          = compute_P2_dipoles .or. l_sc_run
 #endif
@@ -118,6 +123,13 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
    Dip%approach='G-space v'
  endif
  !
+ ! If we have additional nonlocal contributions such as Hubbard U,
+ ! we need to switch to the covariant approach to capture them
+ if (l_Hbare.and..not.use_covariant_approach) then
+  !if(.not.exp_user) call error(' Dipoles approach should be set to Covariant for BareH calculations')
+   if(     exp_user) call warning(' Dipoles approach should be set to Covariant for BareH calculations')
+ endif
+ !
  call parser('PDirect'  ,Dip%force_v_g_space)
 #if defined _SC || defined _RT || defined _NL
  Dip%force_v_g_space=Dip%force_v_g_space.or.l_sc_run.or.l_real_time.or.l_nl_optics
diff --git a/src/driver/options_interfaces.c b/src/driver/options_interfaces.c
index 2782a0c517..4a59f390f6 100644
--- a/src/driver/options_interfaces.c
+++ b/src/driver/options_interfaces.c
@@ -70,4 +70,11 @@ void options_interfaces(struct options_struct options[],int *i_opt)
  options[*i_opt].bin="a2y c2y";
  options[*i_opt].yambo_string="dupl";
  options[*i_opt].section="Interface";
+ *i_opt=*i_opt+1;
+ options[*i_opt].short_desc="Write V_loc and V_nl";
+ options[*i_opt].long_opt="pseudofull";
+ options[*i_opt].short_opt='p';
+ options[*i_opt].bin="p2y";
+ options[*i_opt].yambo_string="pseu";
+ options[*i_opt].section="Interface";
 };
diff --git a/src/hamiltonian/.objects b/src/hamiltonian/.objects
index 7ca35091a6..824c5b6fd9 100644
--- a/src/hamiltonian/.objects
+++ b/src/hamiltonian/.objects
@@ -11,7 +11,7 @@ NL_objects = Build_Overlaps_det_NEQ.o Build_tilde_vbands.o Build_W_operator.o Be
 ELECTRIC_objects = Build_Overlaps_det_NEQ.o Build_tilde_vbands.o Build_W_operator.o Berry_polarization_NEQ.o 
 #endif
 #if defined _SC || defined _RT || defined _NL
-objs = Bare_Hamiltonian.o V_Hartree.o XC_additional_SC_potentials.o XC_potentials.o V_qp_basis_to_H.o V_real_space_to_H.o Vgrad_real_space_to_H.o \
+objs = Bare_Hamiltonian_as_E_ks_minus_Vxc.o Bare_Hamiltonian_from_Scratch.o V_Hartree.o XC_additional_SC_potentials.o XC_potentials.o V_qp_basis_to_H.o V_real_space_to_H.o Vgrad_real_space_to_H.o FFT_vloc_G_to_R.o \
        Check_symmetries.o WF_and_dipole_dimensions.o \
        ${MAGNETIC_objects} ${PSEUDO_objects} ${NL_objects} ${ELECTRIC_objects}
 #endif
diff --git a/src/hamiltonian/Bare_Hamiltonian.F b/src/hamiltonian/Bare_Hamiltonian_as_E_ks_minus_Vxc.F
similarity index 80%
rename from src/hamiltonian/Bare_Hamiltonian.F
rename to src/hamiltonian/Bare_Hamiltonian_as_E_ks_minus_Vxc.F
index aba0aae833..c0589acd9c 100644
--- a/src/hamiltonian/Bare_Hamiltonian.F
+++ b/src/hamiltonian/Bare_Hamiltonian_as_E_ks_minus_Vxc.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): DV DS
 !
-subroutine Bare_Hamiltonian(E,Xk,k)
+subroutine Bare_Hamiltonian_as_E_ks_minus_Vxc(E,Xk,k)
  !
  use pars,           ONLY:cZERO
  use electrons,      ONLY:levels,n_sp_pol,spin,n_spin
@@ -20,16 +20,15 @@ subroutine Bare_Hamiltonian(E,Xk,k)
  use interfaces,     ONLY:el_density_and_current,el_magnetization,WF_load
  use H_interfaces,   ONLY:V_real_space_to_H
  use timing_m,       ONLY:timing
+ use parser_m,       ONLY:parser
 #if defined _SC
- use drivers,        ONLY:l_sc_run
+ use drivers,        ONLY:l_sc_run,l_sc_magnetic
 #endif
 #if defined _RT
  use drivers,        ONLY:l_real_time
  use real_time,      ONLY:REF_V_xc_sc,REF_V_hartree_sc,rho_reference,magn_reference
 #endif
-#if defined _SC
- use drivers,        ONLY:l_sc_magnetic
-#endif
+ !
 #include<memory.h>
  !
  type(levels)             :: E
@@ -41,8 +40,12 @@ subroutine Bare_Hamiltonian(E,Xk,k)
  !
  Hzero=cZERO
  !
- call timing('Bare_Hamiltonian',OPR='start')
+ call timing('Bare_Hamiltonian_as_E_ks_minus_Vxc',OPR='start')
  !
+ ! [DEBUG>]
+ open(1, file='REF_Hbare_real.dat')
+ open(2, file='REF_Hbare_imag.dat')
+ ! [<DEBUG]
 #if defined _SC
  !
  if (l_sc_magnetic) then
@@ -60,7 +63,7 @@ subroutine Bare_Hamiltonian(E,Xk,k)
    !
    call Pseudo_KB_G_to_R_space(k,E)
    call Pseudo_Hamiltonian(Xk,E,.true.)
-   call Pseudo_Hamiltonian(Xk,E,.false.)
+   !call Pseudo_Hamiltonian(Xk,E,.false.)
    !
  endif
  !
@@ -136,6 +139,16 @@ subroutine Bare_Hamiltonian(E,Xk,k)
      !
      call V_qp_basis_to_H(ik,i_sp_pol,-QP_Vxc,Hzero(:,:,ik,i_sp_pol))
      !
+     ! [DEBUG>]
+     write (*,*) "H_bare (ik,i_sp_pol) ",ik,i_sp_pol
+     write(1,*) "#Re H_bare (ik,i_sp_pol) ",ik,i_sp_pol
+     write(2,*) "#Im H_bare (ik,i_sp_pol) ",ik,i_sp_pol
+     do ib=H_ref_bands(1),H_ref_bands(2)
+       write(*,*) Hzero(ib,:,ik,i_sp_pol)
+       write(1,*) real (Hzero(ib,:,ik,i_sp_pol))
+       write(2,*) aimag(Hzero(ib,:,ik,i_sp_pol))
+     enddo
+     ! [<DEBUG]
    enddo
    !
  enddo
@@ -156,6 +169,10 @@ subroutine Bare_Hamiltonian(E,Xk,k)
  !
  call PP_redux_wait(Hzero)
  !
- call timing('Bare_Hamiltonian',OPR='stop')
+ close(2)
+ close(1)
+ write (*,*) "End of Bare Hamiltonian as E_ks-V_xc "
+ !
+ call timing('Bare_Hamiltonian_as_E_ks_minus_Vxc',OPR='stop')
  !
-end subroutine Bare_Hamiltonian
+end subroutine Bare_Hamiltonian_as_E_ks_minus_Vxc
diff --git a/src/hamiltonian/Bare_Hamiltonian_from_Scratch.F b/src/hamiltonian/Bare_Hamiltonian_from_Scratch.F
new file mode 100644
index 0000000000..e9f67177fb
--- /dev/null
+++ b/src/hamiltonian/Bare_Hamiltonian_from_Scratch.F
@@ -0,0 +1,338 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): FP, DS
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine Bare_Hamiltonian_from_Scratch(E,Xk,k,q,Dip)
+ !
+ use pars,           ONLY:DP,SP,cZERO,schlen,lchlen
+ use electrons,      ONLY:levels,n_sp_pol,n_spin,l_spin_orbit,spin_occ,n_spinor
+ use dipoles,        ONLY:DIPOLE_t,compute_P2_dipoles,P_square
+ use QP_m,           ONLY:QP_nk,QP_ng_Vxc
+ use interfaces,     ONLY:el_density_and_current,el_magnetization,WF_load
+ use hamiltonian,    ONLY:Hzero,H_ref_bands,WF_G_max,WF_Go_indx,Hext_nl,Hext_loc,&
+&                         V_hartree_sc,rho_n,magn_n,H_alloc_real_space
+ use parallel_int,  ONLY:PP_redux_wait,PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index
+ use parallel_m,    ONLY:PAR_IND_Xk_ibz,PAR_IND_QP,&
+&                        PAR_IND_QP_ID,PAR_IND_Q_ibz,PAR_IND_G_b,&
+&                        PAR_IND_Q_ibz_ID,n_WF_bands_to_load
+ use wave_func,      ONLY:WF
+ use R_lattice,      ONLY:bz_samp,nkibz,qindx_S
+ use D_lattice,      ONLY:DL_vol
+ use timing_m,       ONLY:timing
+ use pseudo,         ONLY:becp,pp_is_uspp,qe_atoms_map
+ use uspp,           ONLY:nkb,ikb_i=>indv_ijkb0,deeq,deeq_nc
+ use uspp_param,     ONLY:nh
+ use com,            ONLY:msg,of_open_close
+ use ions_base,      ONLY:nat,ityp,ntyp=>nsp
+ use vlocal,         ONLY:vloc_dealloc
+ use wrapper,        ONLY:Vstar_dot_VV
+ use fft_m,          ONLY:fft_size,fft_g_table
+ use cuda_m,         ONLY:have_cuda
+ use IO_int,         ONLY:io_control
+ use IO_m,           ONLY:NONE,OP_RD_CL,REP,DUMP
+ use gvect,          ONLY:ngm
+ use stderr,         ONLY:intc
+ use parser_m,       ONLY:parser
+#if defined _SC
+ use drivers,        ONLY:l_sc_run
+#endif
+#if defined _RT
+ use drivers,        ONLY:l_real_time
+#endif
+#include<dev_defs.h>
+#include<memory.h>
+ !
+ type(levels)             :: E ! This is so far not needed by the subroutine 
+ type(bz_samp)            :: Xk,k,q
+ type(DIPOLE_t)           :: Dip
+ !
+ ! Work space
+ !
+ integer     :: ik,ikb,ib1,ib2,i_sp_pol,WFbands(2),ia,it,i1,i2,ia_qe,ia_per_type
+ integer     :: i_wf1,i_wf2,io_DIP_err,ID_DIP,i_qp
+ complex(SP), allocatable :: vloc_full_r(:)
+ character(schlen)  :: DIP_par_scheme
+ character(schlen) :: Hsc_file,headings(6),msg_format
+ character(lchlen) :: msg_to_dump
+ !
+ integer, external :: io_DIPOLES
+ !
+ ! Debug
+ !
+ logical :: l_scale_fermi,l_write_Hsc
+ !
+ !open(1, file='TEST_Hbare_real.dat')
+ !open(2, file='TEST_Hbare_imag.dat')
+ !
+ Hzero   =cZERO
+ Hext_nl =cZERO
+ Hext_loc=cZERO
+ !
+ l_scale_fermi=.false.
+ call parser('BareHScaleFermi',l_scale_fermi)
+ !
+ l_write_Hsc=.false.  
+ call parser('WriteHsc',l_write_Hsc) ! write Hamiltonian on file only for testing purpose 
+ !
+ if(l_write_Hsc) then
+   Hsc_file='H_sc'
+   call of_open_close(Hsc_file,'ot')
+   call msg('o '//Hsc_file,'#')
+   call msg('o '//Hsc_file,'# H-Bare Hamiltonian from scratch ') 
+   call msg('o '//Hsc_file,'#')
+   headings=(/'is_pol','kpt   ','bnd1  ','bnd2  ','Re[H] ','Img[H]'/)
+   call msg('o '//Hsc_file,'#',headings,INDENT=0,USE_TABS=.TRUE.)
+   call msg('o '//Hsc_file,'#')
+   msg_format='(4I7,2F16.10)'
+ endif
+ !
+ call timing('Bare_Hamiltonian_from_Scratch',OPR='start')
+ !
+ if (pp_is_uspp) call error("[PPs] USPP not implemented for bare Hamiltonian reconstruction")
+ !
+ ! Read dipoles
+ !
+ ! (NB : In the final version this may be done in a different subroutine)
+ ! (NB2: Incompatibility of DIP_par_scheme and DIPOLE_IO call in the present version
+ !       is the reason why SE parall. over q is turned off in XCo_driver)
+ !=====================================================================
+ if (.not.compute_P2_dipoles) call error("[DIP] P_square not activated")
+ call DIPOLE_dimensions(E,Dip,H_ref_bands,(/0._SP,0._SP,0._SP/))
+ !
+ DIP_par_scheme="K"
+ if(l_sc_run) DIP_par_scheme="SC"
+
+ call DIPOLE_IO(k,E,Dip,'read',io_DIP_err,DIP_par_scheme)
+ if (.not.allocated(P_square)) call error("[DIP] P_square not found")
+ !
+ ! Load WF, compute Vloc and becp and setup FFT
+ !
+ ! (NB : this turns pp_is_uspp to .true., later on it is restored to .false.)
+ ! (NB2: this calls vloc_alloc(), it is deallocated below) 
+ ! (NB3: quantities in real space [vlocal and VHartree] require loading all occupied states)
+ !=====================================================================
+ WFbands=(/1,max(H_ref_bands(2),maxval(E%nbm))/)
+ ! 
+ ! WF distributed & load
+ !=======================
+ !
+ if (.not.l_sc_run) then
+  call PARALLEL_global_indexes(E,k,q,"Self_Energy")
+  ! 
+  ! Final states (k-q,np)...
+  call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,B_index=PAR_IND_G_b,CLEAN_UP=.TRUE.)
+  ! Initial states (k,n \in QP)...
+  call PARALLEL_WF_distribute(QP_index=PAR_IND_QP)
+  call PARALLEL_WF_index( )
+ endif
+ !
+ call WF_load(WF,QP_ng_Vxc,maxval(qindx_S(:,:,2)),WFbands,(/1,k%nibz/),title='-vloc')
+ !
+ !****************************************************
+ !
+ ! FFT of vloc into real space
+ !
+ ! (NB: grid dimensions are determined by the FFT_setup previously run by WF_load)
+ !=====================================================================
+ YAMBO_ALLOC(vloc_full_r,(fft_size))
+ vloc_full_r=cZERO
+ !
+ call FFT_vloc_G_to_R(vloc_full_r)
+ !
+ ! Calculation of Hzero
+ !
+ ! Hzero = T + Hext = P**2/2. + V_ext^loc + V_ext^nl
+ !
+ ! Hzero_n,m = P_square(n,m)/2. + int_r [WF%c(r,n)]* Vloc(r) * WF%c(r,m)/2.  + sum_ij [becp%k(i,n)]* D_ij * becp%k(j,m)/2.
+ !
+ !=======================================================================================================
+ !
+ do i_sp_pol=1,n_sp_pol
+   !
+   do ik=1,QP_nk
+     !
+     !write (*,*) "H_pseudo (ik,i_sp_pol) ",ik,i_sp_pol
+     !write (*,*) "H_zero (ik,i_sp_pol) ",ik,i_sp_pol
+     !
+     Hext_loc=cZERO
+     Hext_nl=cZERO
+     !
+     do ib1=H_ref_bands(1),H_ref_bands(2)
+       do ib2=H_ref_bands(1),H_ref_bands(2)
+         !
+         !
+         ! Calculate the local part of Hext ...
+         !
+         ! This is a sandwich in R-space of vloc(r) with the wave functions
+         ! ==================================================================
+         i_wf1 = WF%index(ib1,ik,i_sp_pol)
+         i_wf2 = WF%index(ib2,ik,i_sp_pol)
+         !
+         !
+         Hext_loc(ib1,ib2) = Hext_loc(ib1,ib2) + &
+                             Vstar_dot_VV(fft_size,WF%c(:,1,i_wf1),vloc_full_r,WF%c(:,1,i_wf2))
+         !
+         ! Calculate the nonlocal part of Hext ...
+         !
+         ! NB: Unlike for (i) the dipoles or (ii) the Pseudo_Hamiltonian cases,
+         !     here we have to *explicitly* use the nondiagonal deeq matrix to
+         !     compute the pseudo Hamiltonian matrix elements since deeq is *not*
+         !     already diagonalised and incorporated into the KB projectors.
+         !
+         ! The external loops over ntyp, nat are adapted from add_vuspsi_k() 
+         ! in PW/src/add_vuspsi.f90
+         ! =================================================================
+         ikb = 0
+         do it=1,ntyp 
+           !
+           if ( nh(it) == 0 ) cycle
+           !
+           ia_per_type=0
+           do ia=1,nat
+             !
+             if ( ityp(ia) == it ) then
+               ia_per_type=ia_per_type+1
+               !
+               ia_qe=qe_atoms_map(ia_per_type,it)
+               !
+               !write(*,*) it,na,ia_qe,ikb_i(ia)
+               !
+               do i1=1,nh(it) ! number of projectors for atom ia which is of species it
+                 do i2=1,nh(it) ! deeq is nondiagonal if more than one projector per atom
+                   
+                   !write(*,*) i1,i2,deeq(i1,i2,ia_qe,i_sp_pol)
+                   !
+                   if (l_spin_orbit) then
+                     !
+                     Hext_nl(ib1,ib2) = Hext_nl(ib1,ib2) + &
+                                       conjg(becp(ik,i_sp_pol)%k(ikb_i(ia)+i1,ib1)) * &
+                                                      deeq_nc(i1,i2,ia_qe,i_sp_pol) * &
+                                              becp(ik,i_sp_pol)%k(ikb_i(ia)+i2,ib2)
+                     !
+                   else
+                     !
+                     Hext_nl(ib1,ib2) = Hext_nl(ib1,ib2) + &
+                                        conjg(becp(ik,i_sp_pol)%k(ikb_i(ia)+i1,ib1))* &
+                                                         deeq(i1,i2,ia_qe,i_sp_pol) * &
+                                              becp(ik,i_sp_pol)%k(ikb_i(ia)+i2,ib2)
+                     !
+                   endif
+                   !  
+                 enddo
+                 ikb = ikb +1  
+               enddo
+               !
+             endif
+             !
+           enddo
+           !
+         enddo
+         !
+         ! ikb and nkb are here just for debugging, may be removed
+         if (ikb/=nkb) call error("[Pseudo Ham.] Something wrong in counting KB projectors and deeq")
+       enddo
+     enddo
+     !
+     if (l_scale_fermi) then
+       !
+       do ib1=H_ref_bands(1),H_ref_bands(2)
+         Hzero(ib1,ib1,ik,i_sp_pol)=-E%E_fermi
+       enddo
+       !
+     endif
+     !
+     ![DEBUG>]
+     !do ib1=H_ref_bands(1),H_ref_bands(2)
+      ! write(*,*) "P2= ", P_square(ib1,:,ik,i_sp_pol)/2._SP
+      ! write(*,*) "Hl= ", Hext_loc(ib1,:)/2._SP
+      ! write(*,*) "Hn= ", Hext_nl(ib1,:)/2._SP
+      ! if (l_scale_fermi) write(*,*) "Ef= ", Hzero(ib1,:,ik,i_sp_pol)
+      ! write(*,*) ""
+     !enddo
+     ![DEBUG<]
+     !
+     ! Fill Hzero
+     !
+     ! Note: 0.5 factor in Hloc and Hnl is conversion from Ry to Ha
+     ! Note: conjg(P2) is taken because P2 has swapped indices as per
+     !       comment in DIPOLE_p_matrix_elements
+     ! ==============
+     !
+     Hzero(:,:,ik,i_sp_pol) = Hzero(:,:,ik,i_sp_pol) & 
+                              +( conjg(P_square(:,:,ik,i_sp_pol)) + Hext_loc(:,:) + Hext_nl(:,:) )/2._SP
+     !
+     !
+     !write(1,*) "#Re H_bare (ik,i_sp_pol) ",ik,i_sp_pol
+     !write(2,*) "#Im H_bare (ik,i_sp_pol) ",ik,i_sp_pol
+     if(l_write_Hsc) then
+       do ib1=H_ref_bands(1),H_ref_bands(2)
+         do ib2=H_ref_bands(1),H_ref_bands(2)
+           write(msg_to_dump,msg_format) i_sp_pol,ik,ib1,ib2,&
+&             real(Hzero(ib1,ib2,ik,i_sp_pol)),aimag(Hzero(ib1,ib2,ik,i_sp_pol))
+           call msg('o '//Hsc_file,msg_to_dump)
+         enddo
+       enddo
+     endif
+     !do ib1=H_ref_bands(1),H_ref_bands(2)
+     !  write(1,*) real(Hzero(ib1,:,ik,i_sp_pol))
+     !  write(2,*) aimag(Hzero(ib1,:,ik,i_sp_pol))
+     !enddo
+     !
+#if defined _RT
+     if(l_real_time) then
+       if (.not.PAR_IND_WF_k%element_1D(ik)) cycle
+     endif
+#endif
+     !
+   enddo
+   !
+ enddo
+ !
+ pp_is_uspp=.false. 
+ call vloc_dealloc() ! Dealloc here since we couldn't do it in PP_uspp_init 
+ YAMBO_FREE(vloc_full_r)
+ !
+ if(l_write_Hsc) call of_open_close(Hsc_file)
+ !close(1)
+ !close(2)
+ !write(*,*) "End of Bare hamiltonian from scratch "
+ !
+ !
+ ! Now calculating V_Hartree in real space. First density [including all valence states]...
+ !
+ if ((.not.allocated(rho_n)).or.(size(rho_n)/=fft_size)) call H_alloc_real_space()
+ !
+ call el_density_and_current(E,Xk,rho=rho_n,bands=(/1,H_ref_bands(2)/))
+ !
+#if defined _SC
+ if(l_sc_run) then
+   if(n_spin>1) call el_magnetization(E,Xk,magn_n,bands=(/1,H_ref_bands(2)/))
+ endif
+#endif
+ !
+ ! ... then potential
+ !
+ call V_Hartree(rho_n,V_hartree_sc)
+ !
+ call timing('Bare_Hamiltonian_from_Scratch',OPR='stop')
+ !
+end subroutine Bare_Hamiltonian_from_Scratch
diff --git a/src/hamiltonian/DOUBLE_project.dep b/src/hamiltonian/DOUBLE_project.dep
index 15d7a67c65..e8c6cd09fc 100644
--- a/src/hamiltonian/DOUBLE_project.dep
+++ b/src/hamiltonian/DOUBLE_project.dep
@@ -1,9 +1,11 @@
- Bare_Hamiltonian.o
+ Bare_Hamiltonian_as_E_ks_minus_Vxc.o
+ Bare_Hamiltonian_from_Scratch.o
  Berry_polarization_NEQ.o
  Build_Overlaps_det_NEQ.o
  Build_W_operator.o
  Build_tilde_vbands.o
  Check_symmetries.o
+ FFT_vloc_G_to_R.o
  MAG_A_phase.o
  MAG_Hamiltonian.o
  MAG_common_build_A.o
diff --git a/src/hamiltonian/FFT_vloc_G_to_R.F b/src/hamiltonian/FFT_vloc_G_to_R.F
new file mode 100644
index 0000000000..7bf3964a8b
--- /dev/null
+++ b/src/hamiltonian/FFT_vloc_G_to_R.F
@@ -0,0 +1,82 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): FP, DS, CA
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine FFT_vloc_G_to_R(vloc_full_r)
+  !
+  use pars,       ONLY:DP,SP,cZERO
+  use vlocal,     ONLY:vloc_full
+  use fft_m,      ONLY:fft_dim,fftw_plan,fft_g_table,fft_size
+  use gvect,      ONLY:ngm
+  !
+#include<memory.h>
+  !  
+  complex(SP), intent(out) :: vloc_full_r(fft_size)
+  !
+  ! Work space
+  !
+  integer                   :: ig,ir
+  integer                   :: shape_fft_g_table(2)
+  complex(DP), allocatable  :: vloc_aux(:)
+  !
+  vloc_full_r=cZERO
+  !
+  YAMBO_ALLOC(vloc_aux,(fft_size))
+  !
+  shape_fft_g_table=shape(fft_g_table)
+  if (ngm/=shape_fft_g_table(1)) then
+   !
+   call errore('[FFT-vloc] Mismatch in Gvecs number between QE and Yambo.&
+& Check Gthresh warning during setup') 
+   !
+  endif
+  !
+  vloc_aux=cZERO
+  ! 
+  !$omp parallel do default(shared), private(ig)
+  do ig=1,ngm
+    vloc_aux(fft_g_table(ig,1))=vloc_full(ig)
+    !write(201,*) ig,real(vloc_full(ig)),aimag(vloc_full(ig))
+  enddo
+  !$omp end parallel do
+  !
+#if defined _FFTW
+  call dfftw_destroy_plan(fftw_plan)
+  fftw_plan = 0
+  call fft_3d(vloc_aux,fft_dim,1,fftw_plan)
+#else
+  call fft_3d(vloc_aux,fft_dim,1)
+#endif
+  !
+  ! DEBUG <
+  !do ir=1,fft_size
+  !  write(101,*) ir,real(vloc_aux(ir)),aimag(vloc_aux(ir))
+  !enddo
+  ! DEBUG >
+  !
+  ! Convert to single precision and take real part to remove noise 
+  ! but keep complex type even though vloc(r) is real, as required by Vstar_dot_VV
+  vloc_full_r = real(cmplx(vloc_aux,kind=SP))
+  !
+  YAMBO_FREE(vloc_aux)
+  !
+end subroutine FFT_vloc_G_to_R
diff --git a/src/hamiltonian/MAG_Hamiltonian.F b/src/hamiltonian/MAG_Hamiltonian.F
index f50aeb3d56..d369c76c47 100644
--- a/src/hamiltonian/MAG_Hamiltonian.F
+++ b/src/hamiltonian/MAG_Hamiltonian.F
@@ -77,7 +77,7 @@ subroutine MAG_Hamiltonian()
  H_magn_z_on=allocated(A_magn_z)
  !
  ! Symmetries check
- call Check_symmetries((/Bx,By,Bz/),"magnetic")
+ !call Check_symmetries((/Bx,By,Bz/),"magnetic")
  !
  call local_alloc(0) 
  !
diff --git a/src/hamiltonian/NL_project.dep b/src/hamiltonian/NL_project.dep
index 51ce2beb59..2394507c19 100644
--- a/src/hamiltonian/NL_project.dep
+++ b/src/hamiltonian/NL_project.dep
@@ -1,9 +1,11 @@
- Bare_Hamiltonian.o
+ Bare_Hamiltonian_as_E_ks_minus_Vxc.o
+ Bare_Hamiltonian_from_Scratch.o
  Berry_polarization_NEQ.o
  Build_Overlaps_det_NEQ.o
  Build_W_operator.o
  Build_tilde_vbands.o
  Check_symmetries.o
+ FFT_vloc_G_to_R.o
  V_Hartree.o
  V_qp_basis_to_H.o
  V_real_space_to_H.o
diff --git a/src/hamiltonian/Pseudo_Hamiltonian.F b/src/hamiltonian/Pseudo_Hamiltonian.F
index da42aec6e2..b5a931d209 100644
--- a/src/hamiltonian/Pseudo_Hamiltonian.F
+++ b/src/hamiltonian/Pseudo_Hamiltonian.F
@@ -53,6 +53,7 @@ subroutine Pseudo_Hamiltonian(Xk,Xen,l_equilibrium)
  integer                 :: ID,io_KB_real_space_err,ACT
  integer, external       :: io_KB_real_space
  !
+ !
 #if defined _SC
  if(l_sc_magnetic) call fft_setup(0,1,.true.)  ! fft_size
 #endif
@@ -106,7 +107,7 @@ subroutine Pseudo_Hamiltonian(Xk,Xen,l_equilibrium)
      if(l_real_time) call timing('Pseudo kbv I/O',OPR='start')
 #endif
      !
-     ACT=manage_action(OP_IF_START_RD_CL_IF_END,ifrag,1,Xk%nibz*n_sp_pol)
+     ACT=manage_action(OP_RD_CL,ifrag,1,Xk%nibz*n_sp_pol)
      call io_control(ACTION=ACT,SEC=(/ifrag+1/),ID=ID)
      io_KB_real_space_err=io_KB_real_space(ID,kbv_real_space,kbv_real_space_table)
      !
@@ -175,6 +176,14 @@ subroutine Pseudo_Hamiltonian(Xk,Xen,l_equilibrium)
        !
      enddo   ! i1
      !
+     ! [DEBUG>]
+     !write (*,*) "H_pseudo (ik,i_sp_pol) ",ik,i_sp_pol
+     !
+     !do ib=H_ref_bands(1),H_ref_bands(2)
+     !  write(*,*) H_pseudo(ib,:,ik,i_sp_pol)
+     !enddo
+     ! [<DEBUG]
+     !
 #if defined _RT
      if(l_real_time) call timing('Pseudo correction',OPR='stop')
 #endif
diff --git a/src/hamiltonian/Pseudo_KB_G_to_R_space.F b/src/hamiltonian/Pseudo_KB_G_to_R_space.F
index 6b277456be..d7c7e1596f 100644
--- a/src/hamiltonian/Pseudo_KB_G_to_R_space.F
+++ b/src/hamiltonian/Pseudo_KB_G_to_R_space.F
@@ -18,7 +18,7 @@ subroutine Pseudo_KB_G_to_R_space(Xk,Xen)
  use D_lattice,     ONLY:n_atomic_species,n_atoms_species
  use LIVE_t,        ONLY:live_timing
  use IO_int,        ONLY:io_control
- use IO_m,          ONLY:manage_action,OP_WR_CL,OP_RD,OP_IF_START_APP_CL_IF_END,RD_CL_IF_END
+ use IO_m,          ONLY:manage_action,OP_WR_CL,OP_RD,OP_IF_START_APP_CL_IF_END,RD_CL_IF_END,OP_APP_CL
  use timing_m,      ONLY:timing
  use parallel_int,  ONLY:PP_wait
  use parallel_m,    ONLY:master_cpu,PAR_IND_Xk_ibz_ID,PAR_IND_Xk_ibz
diff --git a/src/hamiltonian/RT_project.dep b/src/hamiltonian/RT_project.dep
index fa80c4982e..9096c53676 100644
--- a/src/hamiltonian/RT_project.dep
+++ b/src/hamiltonian/RT_project.dep
@@ -1,5 +1,7 @@
- Bare_Hamiltonian.o
+ Bare_Hamiltonian_as_E_ks_minus_Vxc.o
+ Bare_Hamiltonian_from_Scratch.o
  Check_symmetries.o
+ FFT_vloc_G_to_R.o
  Pseudo_Hamiltonian.o
  Pseudo_KB_FFT.o
  Pseudo_KB_G_to_R_space.o
diff --git a/src/hamiltonian/SC_project.dep b/src/hamiltonian/SC_project.dep
index 15d7a67c65..e8c6cd09fc 100644
--- a/src/hamiltonian/SC_project.dep
+++ b/src/hamiltonian/SC_project.dep
@@ -1,9 +1,11 @@
- Bare_Hamiltonian.o
+ Bare_Hamiltonian_as_E_ks_minus_Vxc.o
+ Bare_Hamiltonian_from_Scratch.o
  Berry_polarization_NEQ.o
  Build_Overlaps_det_NEQ.o
  Build_W_operator.o
  Build_tilde_vbands.o
  Check_symmetries.o
+ FFT_vloc_G_to_R.o
  MAG_A_phase.o
  MAG_Hamiltonian.o
  MAG_common_build_A.o
diff --git a/src/interface/INIT_load.F b/src/interface/INIT_load.F
index 780e0c8b0e..a5c0e5a8d8 100644
--- a/src/interface/INIT_load.F
+++ b/src/interface/INIT_load.F
@@ -666,6 +666,12 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it('f',defs,'SCnlMix','[SC] Use SC non-local mixing',verb_level=V_sc)
  call it('f',defs,'FrozeDensity','[NL] Do not update density (for testing purposes)',verb_level=V_sc)
  !
+ ! Bare H reconstruction
+ !
+ call it('f',defs,'BareHfromScratch','Compute bare H_0 from scratch with p**2, Vloc and becp%k')
+ call it('f',defs,'BareHScaleFermi','Shift BareH matrix elements by Fermi energy')
+ call it('f',defs,'WriteHsc','Print Hamiltonian matrix elements (for testing purposes, serial run)')
+ !
  ! SC [NEQ]
  !
  call it(defs,'SCneqKIND',  '[SC] Options are [contrained-occ/constrained-mu/matsubara]',SC_neq_kind,verb_level=V_sc)
diff --git a/src/io/.objects b/src/io/.objects
index 6962cdbb0c..4c2da66de8 100644
--- a/src/io/.objects
+++ b/src/io/.objects
@@ -22,7 +22,7 @@ objs = ver_is_gt_or_eq.o \
        io_DB1.o       io_WF.o       io_KB_abinit.o   io_KB_pwscf.o   io_HF_and_locXC.o \
        io_BS_Fxc.o  io_NLCC_pwscf.o \
        io_ATMPROJ_pwscf.o  io_descriptors.o \
-       io_BSS_diago.o  io_BSS_Haydock.o io_BSS_invert.o io_USPP_pwscf.o \
+       io_BSS_diago.o  io_BSS_Haydock.o io_BSS_invert.o io_USPP_pwscf.o io_VLOC_pwscf.o \
        io_header.o io_Double_Grid.o \
        io_COL_CUT.o    io_DB1_selective_scan.o variables_X.o variables_DIPOLES.o \
        io_out_of_date.o \
diff --git a/src/io/DOUBLE_project.dep b/src/io/DOUBLE_project.dep
index 50ca185728..4b55f525d1 100644
--- a/src/io/DOUBLE_project.dep
+++ b/src/io/DOUBLE_project.dep
@@ -30,6 +30,7 @@
  io_RT_components_header.o
  io_SC_components.o
  io_USPP_pwscf.o
+ io_VLOC_pwscf.o
  io_Vnl.o
  io_WF.o
  io_X.o
diff --git a/src/io/io_USPP_pwscf.F b/src/io/io_USPP_pwscf.F
index 3646c153cf..cdd45a7f92 100644
--- a/src/io/io_USPP_pwscf.F
+++ b/src/io/io_USPP_pwscf.F
@@ -161,6 +161,12 @@ integer function io_USPP_pwscf(ID)
    !
  enddo specie_loop
  !
+ ! [FP]
+#ifndef _USPP
+ nhtoj=0.d0 ! This is set to zero since not initialised in non-USPP case
+#endif
+ ! [FP]
+ !
  ! uspp
  call io_bulk(ID,VAR="indv",VAR_SZ=shape(indv))
  call io_bulk(ID,I2=indv)
diff --git a/src/io/io_VLOC_pwscf.F b/src/io/io_VLOC_pwscf.F
new file mode 100644
index 0000000000..2b18d80e3e
--- /dev/null
+++ b/src/io/io_VLOC_pwscf.F
@@ -0,0 +1,71 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): FP
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+integer function io_VLOC_pwscf(ID)
+ !
+ use pars,             ONLY: DP,IP
+ use stderr,           ONLY: intc
+ use IO_m,             ONLY: io_sec,read_is_on,write_is_on
+ use IO_int,           ONLY: io_connect,io_disconnect, &
+&                            io_variable_elemental,io_variable_bulk,io_header, &
+&                            def_variable_elemental,def_variable_bulk
+ use qe_pseudo_m,      ONLY: nsp
+ use gvect,            ONLY: ngm,ngl
+ use vlocal,           ONLY: vloc_yambo
+ !
+ implicit none
+ !
+ integer,intent(in) :: ID
+ !
+ ! Work Space
+ !
+ io_VLOC_pwscf=io_connect(desc="vloc_pp_pwscf",type=0,ID=ID)
+ if (io_VLOC_pwscf/=0) goto 1
+ !
+ ! Dimensions
+ !
+ if (any((/io_sec(ID,:)==1/))) then
+   !
+   ! g-vector variables
+   !
+   call def_variable_elemental(ID,VAR="ngm",VAR_SZ=1,VAR_KIND=IP+100,MENU=0)
+   call  io_variable_elemental(ID,I0=ngm,VAR="Number of G-vectors",CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(ID,VAR="ngl",VAR_SZ=1,VAR_KIND=IP+100,MENU=0)
+   call  io_variable_elemental(ID,I0=ngl,VAR="Number of G-shells",CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(ID,VAR="nsp",VAR_SZ=1,VAR_KIND=IP+100,MENU=0)
+   call  io_variable_elemental(ID,I0=nsp,VAR="Number of at. species",CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(ID,VAR="",VAR_SZ=0,VAR_KIND=0,MENU=1)
+   !
+ endif
+ !
+ ! vloc
+ !
+ call def_variable_bulk(ID,"vloc",1,shape(vloc_yambo),DP)
+ call  io_variable_bulk(ID,1,D2=vloc_yambo)
+ !
+1 call io_disconnect(ID=ID)
+ !
+end function io_VLOC_pwscf
diff --git a/src/modules/SET_defaults.F b/src/modules/SET_defaults.F
index c9f11f8cea..e371116d2d 100644
--- a/src/modules/SET_defaults.F
+++ b/src/modules/SET_defaults.F
@@ -33,7 +33,7 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  use D_lattice,  ONLY:i_space_inv,inv_index,n_atoms_species_max,n_atomic_species,mag_syms,&
 &                     input_Tel_is_negative,non_periodic_directions,lattice,Bose_Temp,    &
 &                     molecule_position
- use pseudo,     ONLY:pp_n_l_times_proj_max,pp_kbv_dim
+ use pseudo,     ONLY:pp_n_l_times_proj_max,pp_kbv_dim,l_use_p2_Vloc_becp
  use R_lattice,  ONLY:n_g_shells,ng_closed,bse_scattering,coll_scattering,&
 &                     Xk_grid_is_uniform,RIM_id_epsm1_reference,RIM_epsm1,&
 &                     RIM_anisotropy,RIM_ng,RIM_W_ng,rimw_type,RIM_n_rand_pts,nqibz,q0_def_norm,&
@@ -567,7 +567,9 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  l_force_SndOrd =.FALSE.
  !
 #endif
- !
+ ! [FP]
+ l_use_p2_Vloc_becp=.false.
+ ! [FP]
 #if defined _RT
  !
  ! Real Time
diff --git a/src/modules/mod_D_lattice.F b/src/modules/mod_D_lattice.F
index f7d346e419..cdcc51944f 100644
--- a/src/modules/mod_D_lattice.F
+++ b/src/modules/mod_D_lattice.F
@@ -281,11 +281,15 @@ subroutine atom_mapper(nat,tau,tau_units,map)
           !
         enddo search_loop
         !
-        if (any(map(:,:)==0)) call error(' [Atoms] incomplete mapping found')
-        !
      enddo
      enddo
      !
+     ![FP] old check was wrong if n_atoms_species has different values
+     !if (any(map(:,:)==0)) call error(' [Atoms] incomplete mapping found')
+     do is=1,n_atomic_species
+       if (any(map(1:n_atoms_species(is),is)==0)) call error(' [Atoms] incomplete mapping found')
+     end do
+     !
      YAMBO_FREE(tau_)
      !
    end subroutine atom_mapper
diff --git a/src/modules/mod_hamiltonian.F b/src/modules/mod_hamiltonian.F
index 67b7f088d9..cbd67839df 100644
--- a/src/modules/mod_hamiltonian.F
+++ b/src/modules/mod_hamiltonian.F
@@ -31,6 +31,11 @@ module hamiltonian
  !
  real(SP),    allocatable :: V_xc_sc(:,:)
  real(SP),    allocatable :: V_hartree_sc(:,:)
+ ! [FP] Non-local part of pseudopotential
+ complex(SP),    allocatable :: Hext_nl(:,:)
+ !      Local part of pseudopotential
+ complex(SP),    allocatable :: Hext_loc(:,:)
+ ! [FP]
  !
  logical           :: l_sc_V_is_local=.TRUE.
  logical           :: l_sc_XC_is_local=.TRUE.
@@ -91,9 +96,8 @@ subroutine H_QP_table_setup(E)
    subroutine H_alloc(E,l_real_space,l_rotation_matrix)
      use pars,           ONLY:SP
      use drivers,        ONLY:l_nl_optics,l_sc_run
-     use electrons,      ONLY:n_spin,n_sp_pol,levels,E_reset,E_duplicate
+     use electrons,      ONLY:n_sp_pol,levels,E_reset,E_duplicate
      use FFT_m,          ONLY:fft_size
-     use xc_functionals, ONLY:magn,V_xc
      !
      implicit none
      !
@@ -113,11 +117,27 @@ subroutine H_alloc(E,l_real_space,l_rotation_matrix)
      !
      YAMBO_ALLOC(H_nl_sc,(H_ref_bands(1):H_ref_bands(2),H_ref_bands(1):H_ref_bands(2)))
      !
+     ! [FP]
+     YAMBO_ALLOC(Hext_nl,(H_ref_bands(1):H_ref_bands(2),H_ref_bands(1):H_ref_bands(2)))
+     YAMBO_ALLOC(Hext_loc,(H_ref_bands(1):H_ref_bands(2),H_ref_bands(1):H_ref_bands(2)))
+     ! [FP]
+     !
      if(l_rotation_matrix) then
        YAMBO_ALLOC(H_rotation,(H_ref_bands(1):H_ref_bands(2),H_ref_bands(1):H_ref_bands(2),E%nk,n_sp_pol))
      endif
      !
-     if (.not.l_real_space.or.fft_size==0) return
+     if ((.not.l_real_space).or.fft_size==0) return
+     !
+     call H_alloc_real_space()
+     !
+   end subroutine
+   !
+   subroutine H_alloc_real_space()
+     use FFT_m,          ONLY:fft_size
+     use electrons,      ONLY:n_spin
+     use xc_functionals, ONLY:magn,V_xc
+     !
+     implicit none
      !
      YAMBO_ALLOC(rho_n,(fft_size))
      !
@@ -152,6 +172,7 @@ subroutine H_free()
      YAMBO_FREE(V_xc_sc)
      YAMBO_FREE(V_hartree_sc)
      !
+     !stop 'check'
    end subroutine
    !
 end module
diff --git a/src/modules/mod_pseudo.F b/src/modules/mod_pseudo.F
index 7d8e912c03..c7a179e343 100644
--- a/src/modules/mod_pseudo.F
+++ b/src/modules/mod_pseudo.F
@@ -63,6 +63,9 @@ module pseudo
  logical                 :: qe_pseudo_alloc=.FALSE. 
  integer,     allocatable:: qe_atoms_map(:,:)
  type(bec_type), allocatable:: becp(:,:)
+ ! [FP] Direct H bare calculation (testing)
+ logical              :: l_use_p2_Vloc_becp
+ ! [FP]
  !
  ! PW (OLD)
  !=========
diff --git a/src/parallel/PARALLEL_SETUP_K_scheme.F b/src/parallel/PARALLEL_SETUP_K_scheme.F
index 6a5086c823..5f277ed5d4 100644
--- a/src/parallel/PARALLEL_SETUP_K_scheme.F
+++ b/src/parallel/PARALLEL_SETUP_K_scheme.F
@@ -50,7 +50,7 @@ subroutine PARALLEL_SETUP_K_scheme(PAR_K_scheme_kind)
      PAR_K_scheme%bz_index=PAR_BZINDXk_bz_index
    endif
    PAR_K_scheme%comm_world=mpi_comm_world
- case ("DIP","RT","SC","QED","YPP")
+ case ("DIP","RT","QED","YPP")
    call COMM_copy(PAR_COM_DIPk_ibz_INDEX,PAR_K_scheme%COM_ibz_INDEX)
    call COMM_copy(PAR_COM_DIPk_ibz_A2A,  PAR_K_scheme%COM_ibz_A2A)
    call PAR_INDEX_copy(PAR_IND_DIPk_ibz,PAR_K_scheme%IND_ibz)
@@ -66,7 +66,7 @@ subroutine PARALLEL_SETUP_K_scheme(PAR_K_scheme_kind)
      PAR_K_scheme%bz_index=PAR_DIPk_bz_index
    endif
    PAR_K_scheme%comm_world=mpi_comm_world
- case ("X","NL")
+ case ("X","NL","SC")
    call COMM_copy(PAR_COM_Xk_ibz_INDEX,PAR_K_scheme%COM_ibz_INDEX)
    call COMM_copy(PAR_COM_Xk_ibz_A2A,  PAR_K_scheme%COM_ibz_A2A)
    call PAR_INDEX_copy(PAR_IND_Xk_ibz,PAR_K_scheme%IND_ibz)
diff --git a/src/parallel/PARALLEL_global_Self_Energy.F b/src/parallel/PARALLEL_global_Self_Energy.F
index ec0342c628..680a8693bc 100644
--- a/src/parallel/PARALLEL_global_Self_Energy.F
+++ b/src/parallel/PARALLEL_global_Self_Energy.F
@@ -23,15 +23,17 @@ subroutine PARALLEL_global_Self_Energy(E,Xk,q)
 &                        PAR_COM_Q_A2A
  ! IND
  use parallel_m,    ONLY:PAR_IND_QP,PAR_IND_Q_ibz,PAR_IND_G_b,PAR_IND_Q_bz,PAR_IND_Xk_ibz,PAR_IND_G_k,&
-&                        PAR_IND_DIPk_bz,PAR_IND_DIPk_ibz,PAR_IND_OVLPk_ibz
+&                        PAR_IND_DIPk_bz,PAR_IND_DIPk_ibz,PAR_IND_OVLPk_ibz,PAR_IND_Xk_bz
  ! INDEX
- use parallel_m,    ONLY:PAR_QP_index,PAR_Q_ibz_index,PAR_Q_bz_index,PAR_G_bands_index,PAR_IND_Bp_mat
+ use parallel_m,    ONLY:PAR_QP_index,PAR_Q_ibz_index,PAR_Q_bz_index,PAR_G_bands_index,PAR_IND_Bp_mat,&
+&                        PAR_Xk_ibz_index,PAR_Xk_bz_index
  ! DIMENSIONS
- use parallel_m,    ONLY:PAR_nG_bands,PAR_nQ_ibz,PAR_nQ_bz,PAR_nQP,PAR_n_Bp_mat_elements
+ use parallel_m,    ONLY:PAR_nG_bands,PAR_nQ_ibz,PAR_nQ_bz,PAR_nQP,PAR_n_Bp_mat_elements,&
+&                        PAR_Xk_nbz,PAR_Xk_nibz
  ! ID's
- use parallel_m,    ONLY:PAR_IND_Xk_ibz_ID,PAR_IND_G_b_ID,PAR_IND_QP_ID,PAR_IND_Q_ibz_ID,PAR_IND_Q_bz_ID
+ use parallel_m,    ONLY:PAR_IND_Xk_ibz_ID,PAR_IND_Xk_bz_ID,PAR_IND_G_b_ID,PAR_IND_QP_ID,PAR_IND_Q_ibz_ID,PAR_IND_Q_bz_ID
 #if defined _SC
- use parallel_m,    ONLY:PAR_COM_Xk_ibz_INDEX,PAR_IND_Xk_bz
+ use parallel_m,    ONLY:PAR_COM_Xk_ibz_INDEX
 #endif
 #if defined _SC || defined _RT || defined _QED
  use drivers,       ONLY:l_elphoton_corr,l_life,l_real_time,l_sc_run
@@ -170,9 +172,21 @@ subroutine PARALLEL_global_Self_Energy(E,Xk,q)
  if (trim(WHAT)=="ibz".or.trim(WHAT)=="RIM") then
    call PARALLEL_add_Q_to_K_list('k_qp_q_'//trim(WHAT),PAR_IND_QP,PAR_IND_QP_ID,PAR_IND_Xk_ibz,PAR_IND_Xk_ibz_ID,&
 &                                PAR_IND_Q_ibz,PAR_COM_QP_INDEX,(/0,0/),Xk,q)
+   PAR_Xk_nibz=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1)
+   !
+   ! ... indexes
+   !
+   YAMBO_ALLOC(PAR_Xk_ibz_index,(nXkibz))
+   call PAR_build_index(PAR_IND_Xk_ibz,nXkibz,PAR_Xk_ibz_index,PAR_Xk_nibz)
  else if (trim(WHAT)=="bz") then
    call PARALLEL_add_Q_to_K_list('k_qp_q_'//trim(WHAT),PAR_IND_QP,PAR_IND_QP_ID,PAR_IND_Xk_ibz,PAR_IND_Xk_ibz_ID,&
 &                                PAR_IND_Q_bz,PAR_COM_QP_INDEX,(/0,0/),Xk,q)
+   PAR_Xk_nbz=PAR_IND_Xk_bz%n_of_elements(PAR_IND_Xk_bz_ID+1)
+   !
+   ! ... indexes
+   !
+   YAMBO_ALLOC(PAR_Xk_bz_index,(nXkbz))
+   call PAR_build_index(PAR_IND_Xk_bz,nXkbz,PAR_Xk_bz_index,PAR_Xk_nbz)
  endif
  !
  ! G bands
diff --git a/src/parallel/PHEL_project.dep b/src/parallel/PHEL_project.dep
index b51e13f8a5..5a22ceb339 100644
--- a/src/parallel/PHEL_project.dep
+++ b/src/parallel/PHEL_project.dep
@@ -1,3 +1,2 @@
  PARALLEL_global_dimensions.o
- PARALLEL_global_indexes.o
 
diff --git a/src/qp/.objects b/src/qp/.objects
index 5c82a88aa6..d925739ec4 100644
--- a/src/qp/.objects
+++ b/src/qp/.objects
@@ -1,6 +1,6 @@
 SC_objects =
 #if defined _SC
-SC_objects = QP_load_SC_xc.o
+SC_objects = QP_load_SC_xc.o XCo_from_H_minus_Hbare.o
 #endif
 QED_objects =
 #if defined _QED
@@ -11,4 +11,5 @@ objs = QP_driver.o QP_newton.o QP_mpa.o QP_interpolate_W.o QP_ppa_cohsex.o \
        QP_secant_driver.o QP_real_axis.o QP_W2Sc.o QP_life_transitions.o QP_secant.o QP_bracket.o \
        QP_expand.o QP_descriptions.o QP_report_and_write.o  \
        QP_Green_Function.o QP_of.o QP_states_simmetrize.o \
-       XCo_driver.o XCo_Hartree_Fock.o XCo_local.o XCo_report.o $(SC_objects) $(QED_objects)
+       XCo_driver.o XCo_Hartree_Fock.o XCo_local.o XCo_report.o \
+       $(SC_objects) $(QED_objects)
diff --git a/src/qp/DOUBLE_project.dep b/src/qp/DOUBLE_project.dep
index 0502ba06ee..673c04da46 100644
--- a/src/qp/DOUBLE_project.dep
+++ b/src/qp/DOUBLE_project.dep
@@ -22,6 +22,7 @@
  QP_states_simmetrize.o
  XCo_Hartree_Fock.o
  XCo_driver.o
+ XCo_from_H_minus_Hbare.o
  XCo_local.o
  XCo_report.o
 
diff --git a/src/qp/SC_project.dep b/src/qp/SC_project.dep
index 21daf8ff57..24dcc9526d 100644
--- a/src/qp/SC_project.dep
+++ b/src/qp/SC_project.dep
@@ -3,5 +3,6 @@
  QP_ppa_cohsex.o
  XCo_Hartree_Fock.o
  XCo_driver.o
+ XCo_from_H_minus_Hbare.o
  XCo_local.o
 
diff --git a/src/qp/XCo_driver.F b/src/qp/XCo_driver.F
index 618a76b332..3cd4ccb1d6 100644
--- a/src/qp/XCo_driver.F
+++ b/src/qp/XCo_driver.F
@@ -5,13 +5,14 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine XCo_driver(E,k,Xk,q)
+subroutine XCo_driver(E,k,Xk,q,Dip)
  !
  use zeros,         ONLY:zero_dfl
  use drivers,       ONLY:l_acfdt,l_sc_run,l_real_time
  use pars,          ONLY:schlen,cZERO
  use electrons,     ONLY:levels
  use R_lattice,     ONLY:bz_samp
+ use DIPOLES,       ONLY:DIPOLE_t
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_RD_CL,OP_WR_CL,VERIFY,REP
  use QP_m,          ONLY:XCo_Hartree_Fock,QP_t,Vxc_kind,Vnlxc_kind,&
@@ -20,7 +21,10 @@ subroutine XCo_driver(E,k,Xk,q)
  use wave_func,     ONLY:WF
  use global_XC,     ONLY:WF_exx_fraction,WF_exx_screening,EXT_NONE,WF_kind,&
 &                        global_XC_string,WF_perturbation,WF_xc_functional
+ use parser_m,      ONLY:parser 
+ use parallel_m,    ONLY:ncpu,PARALLEL_CPU_role,PARALLEL_CPU_used
 #if defined _SC
+ use pseudo,        ONLY:l_use_p2_Vloc_becp
  use hamiltonian,   ONLY:l_sc_V_is_local
  use global_XC,     ONLY:H_SE_FOCK,H_SE_HARTREE,H_SE_COH,H_SE_SEX
 #endif
@@ -29,6 +33,7 @@ subroutine XCo_driver(E,k,Xk,q)
  !
  type(levels) ::E       
  type(bz_samp)::k,Xk,q
+ type(DIPOLE_t),optional ::Dip
  !
  ! Reporting
  !
@@ -36,6 +41,9 @@ subroutine XCo_driver(E,k,Xk,q)
  integer           ::WF_kind_no_H
  logical           ::l_section,l_load_SC_V
  logical           ::l_Vxc,l_Vnl_xc,l_screen_hyb
+ ! Vxc from libxc or from H-Hbare
+ logical           ::l_use_libxc,l_use_qe_pseudo
+ integer           ::i_role
  type(QP_t)        ::qp
  !
  ! IO
@@ -88,7 +96,7 @@ subroutine XCo_driver(E,k,Xk,q)
  if (.not.l_Vxc.and..not.l_Vnl_xc) then
   if (l_sc_run) call QP_XCo_alloc(l_Vxc,l_Vnl_xc,l_screen_hyb)
   return
- endif
+ endif 
  !
  ! Section
  !
@@ -141,7 +149,7 @@ subroutine XCo_driver(E,k,Xk,q)
      call XCo_Hartree_Fock(E,k,xk,q,mode='hyb')
      QP_Vnl_sex=QP_Vnl_xc                        
      QP_Vnl_xc=cZERO                                
-   end if
+   endif
    !
    ! ... pure HF
    !
@@ -154,15 +162,50 @@ subroutine XCo_driver(E,k,Xk,q)
  !
  if (l_Vxc) then
    !
-   call XCo_local(E,Xk)
+   l_use_libxc=.true.
+#if defined _SC
+   ! l_use_p2_Vloc_becp changes the behaviour of WF_load causing
+   ! it to enter uspp sections in order to get Hbare parameters
+   call parser('BareHfromScratch', l_use_p2_Vloc_becp)
+   l_use_libxc=.not.l_use_p2_Vloc_becp
+   l_use_qe_pseudo=l_use_p2_Vloc_becp
+   ! FP 15/11/23: MPI q-tasks lead to problems with DIPOLE_IO of P_square in Bare_Hamiltonian_from_Scracth (to be fixed)
+#if defined _MPI 
+   if (l_use_p2_Vloc_becp) then
+     if(ncpu/=1) then
+       do i_role=1,size(PARALLEL_CPU_role)
+         if (trim(PARALLEL_CPU_role(i_role))=='q') then
+           if (PARALLEL_CPU_used(i_role)/=1) then 
+             call error("[PARALLEL] Turn off MPI tasks over "//trim(PARALLEL_CPU_role(i_role))//" for Hbare reconstruction")
+           endif
+           exit
+         endif
+       enddo
+     endif
+   endif
+#endif
+#endif
    !
-   if (.not.l_screen_hyb) then
-     QP_Vxc = QP_Vxc + WF_exx_fraction*QP_Vnl_xc !Hybridize...
-   else
-     QP_Vxc = QP_Vxc + WF_exx_fraction*QP_Vnl_sex 
-     call QP_XCo_free(l_screen_hyb=.true.)
+   if (l_use_libxc) then 
+     !
+     call XCo_local(E,Xk)
+     !
+     if (.not.l_screen_hyb) then
+       QP_Vxc = QP_Vxc + WF_exx_fraction*QP_Vnl_xc !Hybridize...
+     else
+       QP_Vxc = QP_Vxc + WF_exx_fraction*QP_Vnl_sex 
+       call QP_XCo_free(l_screen_hyb=.true.)
+     endif
+     !
    endif
    !
+#if defined _SC
+   if (l_use_qe_pseudo) call XCo_from_H_minus_Hbare(E,Xk,k,q,Dip)
+   !
+   ! Now WF_load can be safely called again
+   l_use_p2_Vloc_becp=.false.
+#endif
+   !
  endif
  !
  ! Kind transfer
@@ -198,6 +241,7 @@ subroutine XCo_driver(E,k,Xk,q)
    io_err=io_HF_and_locXC(ID)
    !
    call WF_free(WF)
+   !
    call XCo_report(qp,E,k)
    !
  endif
diff --git a/src/qp/XCo_from_H_minus_Hbare.F b/src/qp/XCo_from_H_minus_Hbare.F
new file mode 100644
index 0000000000..e21f269520
--- /dev/null
+++ b/src/qp/XCo_from_H_minus_Hbare.F
@@ -0,0 +1,204 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM, DS
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+!> @brief Calculate QP_Vxc = E_bare - h0 - VHartree (i.e., for GW+U calculations)
+!
+! @param[in]       E               energy levels
+! @param[in]       Xk              k-grid X
+! @param[in]       k               k-grid
+! @param[in]       Dip             Dipoles
+!
+! @param[out]      QP_Vxc          DFT xc potential +U term
+!
+subroutine XCo_from_H_minus_Hbare(E,Xk,k,q,Dip)
+ !
+ ! Local V_xc (+ Hubbard term if present): 
+ ! 
+ ! <nk| V_xc +U |nk> = E^KS - <nk| h^0 +V^H |nk>
+ !
+ ! We need the KS eigenvalues, the noninteracting Hamiltonian and the Hartree 
+ ! potential written in the QP basis -> we get QP_Vxc to be subtracted in QP equation
+ !
+ use com,           ONLY:msg 
+ use pars,          ONLY:SP,cZERO
+ use electrons,     ONLY:levels,spin,n_sp_pol
+ use QP_m,          ONLY:QP_Vxc,QP_n_states,QP_table,QP_nk,QP_ng_SH,QP_ng_Vxc
+ use R_lattice,     ONLY:bz_samp,ng_vec,qindx_S
+ use DIPOLES,       ONLY:DIPOLE_t
+ use wave_func,     ONLY:WF
+ use hamiltonian,   ONLY:Hzero,V_hartree_sc,H_alloc,H_free,E_reference,H_ref_bands,&
+&                        WF_G_max!,WF_Go_indx
+ use H_interfaces,  ONLY:V_real_space_to_H
+ use parallel_int,  ONLY:PP_redux_wait
+ use parallel_m,    ONLY:PAR_IND_QP
+ use IO_int,        ONLY:IO_and_Messaging_switch
+ use timing_m,      ONLY:timing
+ use FFT_m,         ONLY:fft_size
+ use parser_m,      ONLY:parser
+ !
+#include<memory.h>
+ !
+ type(levels) ::E
+ type(bz_samp)::Xk,k,q
+ type(DIPOLE_t)::Dip
+ !
+ ! Work Space
+ !
+ integer     ::i1,ib,ibp,ik,i_sp_pol
+ !
+ ! Debug
+ !
+ complex(SP), allocatable :: Vh_test(:,:,:,:)
+ logical ::l_write_Vxc
+ !
+ call parser('WriteVxc',l_write_Vxc) ! only for testing purposes
+ !
+ if (l_write_Vxc) then
+  open(3, file='Hbare_TEST.dat')
+  open(4, file='Hplus_TEST.dat')
+  open(5, file='Eref_TEST.dat')
+  open(6, file='QPVxc_TEST.dat')
+  open(7, file='VHartree_TEST.dat')
+ endif
+ !
+ call timing('XCo_from_H_minus_Hbare',OPR="start")
+ !
+ ! Allocation 
+ !
+ H_ref_bands(1)=minval(QP_table(:,1))
+ H_ref_bands(2)=maxval(QP_table(:,1))
+ !
+ ! Max G-vectors for FFT-vloc, WF for Hbare and VHartree
+ WF_G_max=ng_vec
+ !
+ ! Allocate only quantities not in real space, because
+ ! fft_size might change inside Bare_Hamiltonian_from_Scratch
+ ! when calling WF_load with new WF_G_max
+ call H_alloc(E,.false.,.false.)
+ ! 
+ ! The P2 dipoles (needed for Hzero) are already switched on by 
+ ! the Hbare logical in DIPOLE_driver
+ !
+ ! Here we get Hzero in H space and V_Hartree_sc in real space
+ !
+ call Bare_Hamiltonian_from_Scratch(E,Xk,k,q,Dip)
+ !
+ ! PARALLEL EFFICIENCY FOR THE FOLLOWING LOOPS NEED TO BE CHECKED!
+ ! (may include call to V_real_space_to_H in QP loop but seems wasteful)
+ !
+ ! First we get Hzero => Hzero+Vhartree in H space
+ !
+ ![DEBUG>]
+ if (l_write_Vxc) then ! Debug: get V_Hartree alone
+   !
+   allocate(Vh_test(H_ref_bands(1):H_ref_bands(2),H_ref_bands(1):H_ref_bands(2),E%nk,n_sp_pol))
+   Vh_test=cZERO
+   !
+   do i_sp_pol=1,n_sp_pol
+    do ik=1,QP_nk
+    !
+    call V_real_space_to_H(ik,i_sp_pol,Vh_test(:,:,ik,i_sp_pol),WF,'def',V=V_hartree_sc)
+    !
+    write(7,*) "# Vh    (ik,i_sp_pol) ",ik,i_sp_pol
+    do ib=H_ref_bands(1),H_ref_bands(2)
+      write(7,*) Vh_test(ib,:,ik,i_sp_pol)
+    enddo
+    !
+    enddo
+   enddo
+   close(7)
+   !
+ endif
+ ![DEBUG<]
+ !
+ do i_sp_pol=1,n_sp_pol
+  do ik=1,QP_nk
+  !
+  ![DEBUG>]
+  if (l_write_Vxc) then
+   write(3,*) "# Ho    (ik,i_sp_pol) ",ik,i_sp_pol
+   write(4,*) "# Ho+ VH (ik,i_sp_pol) ",ik,i_sp_pol
+   write(5,*) "# E_reference (ik,i_sp_pol) ",ik,i_sp_pol
+   do ib=H_ref_bands(1),H_ref_bands(2)
+    write(3,*) Hzero(ib,:,ik,i_sp_pol)
+    write(5,*) E_reference%E(ib,ik,i_sp_pol)
+   enddo
+  endif
+  ![DEBUG<]
+  !
+  call V_real_space_to_H(ik,i_sp_pol,Hzero(:,:,ik,i_sp_pol),WF,'def',V=V_hartree_sc)
+  !
+  ![DEBUG>]
+  if (l_write_Vxc) then
+   do ib=H_ref_bands(1),H_ref_bands(2)
+    write(4,*) Hzero(ib,:,ik,i_sp_pol)
+   enddo
+  endif
+  ![DEBUG<]
+  !
+  enddo
+ enddo
+ !  
+ ! Then we get E_dft-Hzero-Vhartree in QP space
+ !
+ do i1=1,QP_n_states
+  !
+  !if (.not.PAR_IND_QP%element_1D(i1)) cycle
+  !
+  ib =QP_table(i1,1)
+  ibp=QP_table(i1,2)
+  ik =QP_table(i1,3)
+  i_sp_pol=spin(QP_table(i1,:))
+  !
+  QP_Vxc(i1)=E_reference%E(ib,ik,i_sp_pol)-Hzero(ib,ibp,ik,i_sp_pol)
+  !
+  ![DEBUG>]
+  if (l_write_Vxc) then
+   write(6,*) "#QP_Vxc (ik,i_sp_pol,ib,ibp) ",ik,i_sp_pol,ib,ibp
+   write(6,*) QP_Vxc(i1)
+  endif
+  ![DEBUG<]
+  !
+ enddo
+ !
+ !call PP_redux_wait(QP_Vxc)
+ !
+ ![DEBUG>]
+ if (l_write_Vxc) then
+  close(3)
+  close(4)
+  close(5)
+  close(6)
+  deallocate(Vh_test)
+ endif
+ ![DEBUG<]
+ !
+ call H_free()
+ !
+ call msg('rsn','[XC] H_BARE reconstruction used to calculate xc functional')
+ !
+ call timing('XCo_from_H_minus_Hbare',OPR="stop")
+ !
+ return
+ !
+end subroutine
diff --git a/src/qp/XCo_local.F b/src/qp/XCo_local.F
index c38222e995..3c7062c3c7 100644
--- a/src/qp/XCo_local.F
+++ b/src/qp/XCo_local.F
@@ -15,7 +15,7 @@ subroutine XCo_local(E,Xk)
  use R_lattice,     ONLY:bz_samp
  use FFT_m,         ONLY:fft_size
  use wave_func,     ONLY:WF
- use xc_functionals,ONLY:E_xc,V_xc,XC_potential_driver,magn,XC_potential_driver
+ use xc_functionals,ONLY:E_xc,V_xc,XC_potential_driver,magn
  use global_XC,     ONLY:WF_xc_functional,WF_exx_fraction,WF_exx_screening,WF_kind
  use wrapper_omp,   ONLY:Vstar_dot_V_omp
  use parallel_m,    ONLY:PAR_IND_WF_linear 
@@ -36,6 +36,7 @@ subroutine XCo_local(E,Xk)
  use SC,            ONLY:load_SC_components,SC_fft_size,compatible_SC_DB
 #endif
  use timing_m,      ONLY:timing
+ use parser_m,      ONLY:parser
  !
 #include<memory.h>
  !
@@ -54,7 +55,11 @@ subroutine XCo_local(E,Xk)
  !
  complex(SP), allocatable :: V_xc_mat(:,:,:)
  !
+ ! Debug
+ logical :: l_write_Vxc
+ !
  call timing('XCo_local',OPR="start")
+ call parser('WriteVxc',l_write_Vxc) ! only for testing purposes
  !
  ! Allocation 
  !
@@ -165,6 +170,24 @@ subroutine XCo_local(E,Xk)
    !
    call timing('XCo_local',OPR="stop")
    !
+   ![DEBUG>]
+   if (l_write_Vxc) then
+     open(4, file='QPVxc_REF.dat')
+     do i1=1,QP_n_states
+      !
+      ib =QP_table(i1,1)
+      ibp=QP_table(i1,2)
+      ik =QP_table(i1,3)
+      i_sp_pol=spin(QP_table(i1,:))
+      !
+      write(4,*) "#QP_Vxc (ik,i_sp_pol,ib,ibp) ",ik,i_sp_pol,ib,ibp
+      write(4,*) QP_Vxc(i1)
+      !
+     enddo
+     close(4)
+   endif
+   ![DEBUG<]
+   !
    return
    !
  endif
diff --git a/src/real_time_initialize/RT_start_and_restart.F b/src/real_time_initialize/RT_start_and_restart.F
index 53ccd06e63..4f91cc2743 100644
--- a/src/real_time_initialize/RT_start_and_restart.F
+++ b/src/real_time_initialize/RT_start_and_restart.F
@@ -92,7 +92,7 @@ subroutine RT_start_and_restart(E,k,q)
    call              el_density_matrix(G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2)),E,k,rho_reference,1)
    if(n_spin>1) call el_magnetization_matrix(G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2)),E,k,magn_reference,1)
    !
-   call Bare_Hamiltonian(E,k,k)
+   call Bare_Hamiltonian_as_E_ks_minus_Vxc(E,k,k)
    !
    ! Reference Hartree and XC
    !==========================
diff --git a/src/sc/SC_driver.F b/src/sc/SC_driver.F
index 73abd8d9ce..a804346215 100644
--- a/src/sc/SC_driver.F
+++ b/src/sc/SC_driver.F
@@ -47,6 +47,8 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
  use collision_ext,  ONLY:COH_COLL_element,HXC_COLL_element,COLLISIONS_have_HARTREE
  use electrons,      ONLY:Spin_magn
  use electric,       ONLY:ELECTRIC_alloc,ELECTRIC_free,W_electric
+ use parser_m,       ONLY:parser
+ use pseudo,         ONLY:l_use_p2_Vloc_becp 
  !
 #include<memory.h>
  !
@@ -132,6 +134,10 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
 #if defined _ELECTRIC
  l_load_dipoles=l_load_dipoles.or.l_sc_electric
 #endif
+ ! [FP]
+ call parser('BareHfromScratch', l_use_p2_Vloc_becp)
+ !l_load_dipoles=l_load_dipoles.or.l_use_p2_Vloc_becp
+ ! [FP]
  if ( l_load_dipoles ) then
    call DIPOLE_dimensions(E,Dip,SC_bands,(/0._SP,0._SP,0._SP/))
    call DIPOLE_IO(k,E,Dip,'read ',io_DIP_err,"SC")
@@ -185,7 +191,15 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
  !
  !  Hzero (kinetic + potential)
  !=============================
- call Bare_Hamiltonian(E,Xk,k)
+ !
+ ! [FP]
+ if (l_use_p2_Vloc_becp) then 
+   call Bare_Hamiltonian_from_Scratch(E,Xk,k,Dip)
+ else
+   call Bare_Hamiltonian_as_E_ks_minus_Vxc(E,Xk,k)
+ endif
+ l_use_p2_Vloc_becp=.false.
+ ! [FP]
  !
  call OCCUPATIONS_Fermi(E,k,"E",mode="OCCUPATIONS") 
  !
diff --git a/src/setup/G_shells_finder.F b/src/setup/G_shells_finder.F
index 8a6640f61c..f30262cfcf 100644
--- a/src/setup/G_shells_finder.F
+++ b/src/setup/G_shells_finder.F
@@ -183,9 +183,15 @@ subroutine G_shells_finder()
  enddo
  !
  if (n_g_shells_no_holes/=n_g_shells) then
-   call warning(' Found non closed shells. Max cutoff will be reduced.')
-   call warning(' Set Gthresh>1.E-5 in input to avoid this. Too big Gthresh will pack shells together')
-   call msg('rn','Full and reduced cutoff',(/E_of_shell_TMP(n_g_shells_no_holes),E_of_shell_TMP(n_g_shells)/)*1000._SP,'[mHa]')
+   !
+   if(G_mod_zero/=1.E-5) then
+     call warning(' Found non closed shells. Max cutoff will be reduced.')
+     call warning(' Try to change Gthresh in input to avoid this. Too big Gthresh will pack shells together')
+     call msg('rn','Full and reduced cutoff',(/E_of_shell_TMP(n_g_shells_no_holes),E_of_shell_TMP(n_g_shells)/)*1000._SP,'[mHa]')
+   else
+     call error(' Found non closed shells. Set Gthresh>1.E-5 in input to avoid this.')
+   endif
+   !
  endif
  !
  n_g_shells=n_g_shells_no_holes
diff --git a/src/wf_and_fft/PP_compute_becp.F b/src/wf_and_fft/PP_compute_becp.F
index 14454d820e..f4bae23737 100644
--- a/src/wf_and_fft/PP_compute_becp.F
+++ b/src/wf_and_fft/PP_compute_becp.F
@@ -37,6 +37,7 @@ subroutine PP_compute_becp(becp, npwk, wf_nb, wf_c, wf_b_indx)
  !
  ! checks
  !
+ !write (*,*) 'PP_compute_becp: pp_is_uspp is ',pp_is_uspp 
  if (.not.pp_is_uspp) return
  if (.not.qe_pseudo_alloc) call error(' [PP] qe_pseudo not alloc in PP_compute_becp')
  call timing("PP_compute_becp","start")
diff --git a/src/wf_and_fft/PP_uspp_init.F b/src/wf_and_fft/PP_uspp_init.F
index 4bbd29b9f5..05af778a94 100644
--- a/src/wf_and_fft/PP_uspp_init.F
+++ b/src/wf_and_fft/PP_uspp_init.F
@@ -1,6 +1,7 @@
 !
 ! License-Identifier: GPL
 !
+! Authors (see AUTHORS file for details): AF, IM, FP
 ! Copyright (C) 2018 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): AF IM
@@ -16,7 +17,8 @@ subroutine PP_uspp_init()
  use vec_operate,   ONLY:c2a
  use D_lattice,     ONLY:nsym,dl_sop,atom_mapper
  use R_lattice,     ONLY:b,g_vec,ng_vec
- use pseudo,        ONLY:pp_is_uspp,qe_pseudo_alloc,PP_uspp_free,qe_atoms_map
+ use pseudo,        ONLY:pp_is_uspp,qe_pseudo_alloc,PP_uspp_free,qe_atoms_map,&
+&                        l_use_p2_Vloc_becp,PP_uspp_alloc
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_RD_CL,REP
  !
@@ -24,26 +26,36 @@ subroutine PP_uspp_init()
  use qe_pseudo_m,   ONLY:nkb,qe_nsym=>nsym,d1,d2,d3
  use gvect,         ONLY:qe_eigts1=>eigts1, qe_eigts2=>eigts2, qe_eigts3=>eigts3,&
 &                        qe_g=>g, qe_gg=>gg, qe_ngm=>ngm, qe_mill=>mill
+ use vlocal,        ONLY:strf,vloc_alloc,vloc_dealloc
  use timing_m,      ONLY:timing
+ use parser_m,      ONLY:parser
  !
 #include<memory.h>
  !
  ! Work Space 
  !
- integer  :: io_err,ID
+ integer  :: io_err,ID,ID_vloc
  integer  :: gv_min,gv_max,i,ig,nfft(3)
  real(SP) :: v1(3)
  real(DP) :: qe_b(3,3)
- complex(DP), allocatable :: strf(:,:)
+ !complex(DP), allocatable :: strf(:,:)
+ logical :: l_will_need_becp
  !
  integer, external :: io_USPP_pwscf
+ integer, external :: io_VLOC_pwscf
 
+ call parser('BareHfromScratch', l_will_need_becp)
+ if (.not.l_will_need_becp) return
  !
  ! in case, assume qe_pseudo already alloc and init
  !
  if (qe_pseudo_alloc) return
  call timing("PP_uspp_init","start")
-
+ ! [FP]
+ if (l_use_p2_Vloc_becp) then
+   call PP_uspp_alloc() ! projectors for nonlocal part
+ endif
+ ! [FP] 
  !
  ! perform main data IO
  !
@@ -52,22 +64,34 @@ subroutine PP_uspp_init()
  !
  pp_is_uspp=(io_err==0)
  !
+ ! [FP] 
 #ifndef _USPP
- if (pp_is_uspp) call error("[PPs] Ultrasoft PP not supported")
+ if (pp_is_uspp.and..not.l_will_need_becp) then
+   call error("[PPs] Ultrasoft PP not supported. [Is ns.uspp_pp_pwscf present but should not?]")
+ endif
 #endif
  !
- if (.not.pp_is_uspp.or.nkb<=0) then
+ ! atom mapping
+ !
+ call atom_mapper(qe_nat,real(qe_tau*qe_alat,SP),"cc",qe_atoms_map)
+ !
+ if ((pp_is_uspp.and.l_will_need_becp.and..not.l_use_p2_Vloc_becp).or.&
+    (.not.pp_is_uspp.or.nkb<=0)) then
+ !if (.not.pp_is_uspp.or.nkb<=0) then
+   ! [FP]
    pp_is_uspp=.false.
    call PP_uspp_free()
    call timing("PP_uspp_init","stop")
    return
  endif
  
+
  !
  ! checks
- !
+ ! [FP]
  if (.not.( l_setup.or.l_col_cut.or.l_rim.or.list_dbs.or. &
-            l_HF_and_locXC) ) then
+            l_HF_and_locXC.or.l_use_p2_Vloc_becp) ) then
+    ! [FP]
     call error("[PPs] USPP not implemented for current runlevel")
  endif
 
@@ -76,20 +100,17 @@ subroutine PP_uspp_init()
  !
  qe_nsym=nsym
  call d_matrix(nsym,real(dl_sop,DP),d1,d2,d3)
-
- !
- ! atom mapping
- !
- call atom_mapper(qe_nat,real(qe_tau*qe_alat,SP),"cc",qe_atoms_map)
  
  !
  ! init g vectors and structure factors
  !
  qe_ngm=ng_vec
  !
+
  YAMBO_ALLOC(qe_g,(3,qe_ngm))
  YAMBO_ALLOC(qe_gg,(qe_ngm))
  YAMBO_ALLOC(qe_mill,(3,qe_ngm))
+
  !
  do ig = 1, ng_vec
     !
@@ -104,6 +125,7 @@ subroutine PP_uspp_init()
     !
  enddo
  !
+
  do i = 1, 3
    gv_min=minval(qe_mill(i,:))
    gv_max=maxval(qe_mill(i,:))
@@ -113,16 +135,159 @@ subroutine PP_uspp_init()
  YAMBO_ALLOC(qe_eigts1,(-nfft(1):nfft(1),qe_nat))
  YAMBO_ALLOC(qe_eigts2,(-nfft(2):nfft(2),qe_nat))
  YAMBO_ALLOC(qe_eigts3,(-nfft(3):nfft(3),qe_nat))
- YAMBO_ALLOC(strf,(qe_ngm,qe_nsp))
+ !
+ ! [FP]
+ ! vlocal quantities: strf and - if needed - vloc_yambo and vloc_full
+ call vloc_alloc(l_use_p2_Vloc_becp)
+ ! [FP]
  !
  qe_b=transpose(b)/qe_tpiba
  !
  call struc_fact(qe_nat,qe_tau,qe_nsp,qe_ityp,qe_ngm,qe_g,qe_b,&
 &                nfft(1),nfft(2),nfft(3), &
-&                .false.,strf,.true.,qe_eigts1,qe_eigts2,qe_eigts3)
- YAMBO_FREE(strf)
+&                .true.,strf,.true.,qe_eigts1,qe_eigts2,qe_eigts3)
+ !
+ ! [FP]
+ if (l_use_p2_Vloc_becp) then
+   !
+   ! Now that we have the structure factors, read and compute vloc
+   call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),ID=ID_vloc)
+   io_err=io_VLOC_pwscf(ID_vloc)
+   !
+   call PP_compute_Vloc()
+   !
+   ! Compute D_ij coefficients for nonlocal part of PP 
+   call PP_compute_deeq()
+   !
+ else
+   !
+   ! Dealloc strf here
+   call vloc_dealloc()
+   !
+ endif
+ ! [FP]
  !
  call timing("PP_uspp_init","stop")
  return
  !
 end subroutine PP_uspp_init
+
+subroutine PP_compute_deeq()
+ !
+ ! This subroutine gets the D-term needed to compute the
+ ! nonlocal part of the Hamiltonian together with the KB
+ ! projectors.
+ !
+ ! Adapted from subroutine newd() in PW/src/newd.f90 in QE (ovkan=.false. case).
+ !
+ ! USPP part (Q function) NOT implemented.
+ !
+ use pars,                   ONLY:SP,DP
+ ! data already stored by p2y
+ use uspp,                   ONLY:deeq,deeq_nc,dvan,dvan_so
+ use uspp_param,             ONLY:nh
+ use ions_base,              ONLY:nat,ityp
+ use lsda_mod,               ONLY:nspin
+ use noncollin_module,       ONLY: noncolin
+ ! qe_pseudo internal modules
+ use spin_orb,               ONLY:qe_lspinorb=>lspinorb
+ !
+ ! Work Space
+ !
+ integer :: it, ia, is, nht
+ ! counters atom type, atoms, spin, aux
+ !
+ do ia = 1, nat
+   !
+   it  = ityp(ia)
+   nht = nh(it)
+   !
+   if ( qe_lspinorb ) then
+     !
+     deeq_nc(1:nht,1:nht,ia,1:nspin) = dvan_so(1:nht,1:nht,1:nspin,it)
+     !
+   elseif ( noncolin ) then
+     !
+     deeq_nc(1:nht,1:nht,ia,1) = dvan(1:nht,1:nht,it)
+     deeq_nc(1:nht,1:nht,ia,2) = ( 0.D0, 0.D0 )
+     deeq_nc(1:nht,1:nht,ia,3) = ( 0.D0, 0.D0 )
+     deeq_nc(1:nht,1:nht,ia,4) = dvan(1:nht,1:nht,it)
+     !
+   else
+     !
+     do is = 1, nspin
+       !
+       deeq(1:nht,1:nht,ia,is) = dvan(1:nht,1:nht,it)
+       !
+     enddo
+     !
+   endif
+   !
+ enddo
+ !
+end subroutine
+
+subroutine PP_compute_Vloc()
+ !
+ ! This subroutine multiplies the structure factors to the Vloc in G-space:
+ !
+ ! vloc_full(G) = sum_sp vloc(G,sp) * struct_factor(G,sp)
+ !
+ ! vloc (qe: vloc_of_g / vloc_yambo) is read from database 
+ ! [use p2y -p when generating SAVE]
+ !
+ ! Adapted from subroutine setlocal() in PW/src/setlocal.f90 in QE
+ !
+ ! 2D cutoff and ESM not supported.
+ !
+ ! qe_pseudo internal modules
+ use pars,           ONLY: DP
+ use constants,      ONLY: eps8
+ !use ions_base,      ONLY: zv, ntyp => nsp
+ use cell_base,      ONLY: omega
+ use gvect,          ONLY: igtongl, gg, qe_ngm => ngm
+ use qe_pseudo_m,    ONLY: qe_nsp => nsp
+ use vlocal,         ONLY: vloc_yambo, vloc_full, strf
+ !use fft_base,       ONLY: dfftp
+ use FFT_m,          ONLY:fft_size
+ use control_flags,  ONLY: gamma_only
+ !use mp_bands,       ONLY: intra_bgrp_comm
+ !use mp,             ONLY: mp_sum
+ !
+ ! - vloc_yambo contains the G-components per atomic species
+ ! - vloc_full is the actual vloc(G) including the structure factors
+ !   (called 'aux' in original QE routine since they just used it for invfft)
+ !
+ ! Work space
+ !
+ INTEGER :: it, ig
+ ! counter on atom types
+ ! counter on g vectors
+ !
+ vloc_full(:) = (0.d0,0.d0)
+ !
+ ! Compute vloc_full
+ !
+ do it = 1, qe_nsp
+    do ig = 1, qe_ngm ! NOTA BENE: qe_ngm = dense G-vectors (per proc. in QE) > wf_ng 
+       vloc_full(ig) = vloc_full(ig) + vloc_yambo(ig,it) * strf(ig,it)
+    enddo
+ enddo
+ !
+ if (gamma_only) then
+    do ig = 1, qe_ngm
+       vloc_full(ig) = CONJG( vloc_full(ig) )
+    enddo
+ endif
+ ! ... vloc_full = potential in G-space.
+ !
+ ! [FP] The following QE part should not be needed. Commented.
+ !
+ ! ... v_of_0 is (Vloc)(G=0)
+ !
+ !v_of_0 = 0.0_DP
+ !IF (gg(1) < eps8) v_of_0 = DBLE( vloc_full(dfftp%nl(1)) )
+ !
+ !CALL mp_sum( v_of_0, intra_bgrp_comm )
+ !
+end subroutine PP_compute_Vloc
diff --git a/src/wf_and_fft/WF_load.F b/src/wf_and_fft/WF_load.F
index 0f42af3ce4..eeb71d62e6 100644
--- a/src/wf_and_fft/WF_load.F
+++ b/src/wf_and_fft/WF_load.F
@@ -42,13 +42,15 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  use IO_m,          ONLY:OP_RD,NONE,VERIFY,RD,RD_CL,DUMP
  use wrapper,       ONLY:Vstar_dot_V
 #if defined _SC
- use SC,            ONLY:compatible_SC_DB,load_SC_components,SC_bands,found_SC_DB,SC_bands
+ use SC,            ONLY:compatible_SC_DB,load_SC_components,SC_bands,found_SC_DB,&
+&                        SC_bands
  use global_XC,     ONLY:WF_kind,WF_xc_functional,WF_perturbation,global_XC_string
 #endif
- use pseudo,        ONLY:pp_is_uspp,becp
+ use pseudo,        ONLY:pp_is_uspp,becp,l_use_p2_Vloc_becp
  use qe_pseudo_m,   ONLY:vkb,tpiba
  use timing_m,      ONLY:timing
  use cuda_m,        ONLY:have_cuda
+ use wvfct,         ONLY:npwx
  !
 #include<dev_defs.h>
 #include<memory.h>
@@ -72,7 +74,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
 &                   npwk
  real(SP)         ::mndp,mxdp,xk(3)
  complex(SP)      ::c
- logical          ::loaded_bounds_ok,free_and_alloc,buffer_is_ok,clean_up_states,force_WFo_,QUIET_wf,SIZE_msg
+ logical          ::loaded_bounds_ok,free_and_alloc,buffer_is_ok,clean_up_states,force_WFo_,QUIET_wf,SIZE_msg,do_becp
  complex(SP), allocatable :: wf_disk(:,:,:)
  complex(DP), allocatable :: wf_DP(:)
  !
@@ -85,6 +87,12 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  !
  integer ::io_err,ID
  !
+ ! [FP]
+ ! Setting logicals for becp computation
+ do_becp=pp_is_uspp
+ if (l_use_p2_Vloc_becp) do_becp=.true.
+ ! [FP]
+ !
  ! Close iG/iGo_max to the nearest shell
  !
  iG_max=iG_in
@@ -172,7 +180,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
        !
        fft_dim=fft_dim_loaded
        fft_size=product(fft_dim)
-       return
+       if (.not.do_becp) return
      endif
    endif
    !
@@ -307,7 +315,9 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
      !
      ! USPP init
      !
-     if (pp_is_uspp) then
+     ! [FP]
+     if (do_becp) then
+       ! [FP]
        !
        call c2a(b,k_pt(ikibz,:),xk,mode="ki2c")
        xk=xk/real(tpiba,SP)
@@ -356,7 +366,9 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
            io_err=io_WF(ID,wf_disk)
          endif
          !
-         if (pp_is_uspp) then
+         ! [FP]
+         if (do_becp) then
+            ! [FP] 
             call PP_compute_becp( becp(ikibz,i_sp_pol), wf_nc_k(ikibz), wf_nb_to_load, wf_disk, wf_b_indx)
          endif
          !