From 7c36c492983a7eed1f70d4c981a1f400dc1ec01d Mon Sep 17 00:00:00 2001
From: jingan-181 <78459531+jingan-181@users.noreply.github.com>
Date: Wed, 11 Dec 2024 19:39:17 +0800
Subject: [PATCH 1/7] Fixed the bug of wannier90 interface (#5719)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update Wannier90 interface document.

* Fixed the bug of wannier90 interface

The class “toWannier90_LCAO_IN_PW“ has not initialized tpiba and omega.
---
 source/module_esolver/esolver_ks_lcao.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 63a1201bef..e515c23903 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -1224,7 +1224,7 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
                                              PARAM.inp.out_wannier_wvfn_formatted,
                                              PARAM.inp.nnkpfile,
                                              PARAM.inp.wannier_spin);
-
+            myWannier.set_tpiba_omega(ucell.tpiba, ucell.omega);
             myWannier.calculate(ucell,
                                 this->pelec->ekb, 
                                 this->pw_wfc, 

From 7b33110f29be6e996b1b30f536b6c77b04319a94 Mon Sep 17 00:00:00 2001
From: Qianrui Liu <76200646+Qianruipku@users.noreply.github.com>
Date: Wed, 11 Dec 2024 20:23:40 +0800
Subject: [PATCH 2/7] add check for smearing_method (#5718)

---
 source/module_io/read_input_item_elec_stru.cpp       | 12 ++++++++++++
 .../module_io/test_serial/read_input_item_test.cpp   |  8 ++++++++
 2 files changed, 20 insertions(+)

diff --git a/source/module_io/read_input_item_elec_stru.cpp b/source/module_io/read_input_item_elec_stru.cpp
index 44bc76d691..75026089c8 100644
--- a/source/module_io/read_input_item_elec_stru.cpp
+++ b/source/module_io/read_input_item_elec_stru.cpp
@@ -330,6 +330,18 @@ void ReadInput::item_elec_stru()
         Input_Item item("smearing_method");
         item.annotation = "type of smearing_method: gauss; fd; fixed; mp; mp2; mv";
         read_sync_string(input.smearing_method);
+        item.check_value = [](const Input_Item& item, const Parameter& para) {
+            const std::vector<std::string> methods = {"gauss", "gaussian", 
+                                                      "fd", "fermi-dirac",
+                                                      "fixed",
+                                                      "mp", "mp2", "mp3"
+                                                      "marzari-vanderbilt", "cold", "mv"};
+            if (std::find(methods.begin(), methods.end(), para.input.smearing_method) == methods.end())
+            {
+                const std::string warningstr = nofound_str(methods, "smearing_method");
+                ModuleBase::WARNING_QUIT("ReadInput", warningstr);
+            }
+        };
         this->add_item(item);
     }
     {
diff --git a/source/module_io/test_serial/read_input_item_test.cpp b/source/module_io/test_serial/read_input_item_test.cpp
index b83e2df05a..e8da3f478b 100644
--- a/source/module_io/test_serial/read_input_item_test.cpp
+++ b/source/module_io/test_serial/read_input_item_test.cpp
@@ -99,6 +99,14 @@ TEST_F(InputTest, Item_test)
         output = testing::internal::GetCapturedStdout();
         EXPECT_THAT(output, testing::HasSubstr("NOTICE"));
     }
+    { // smearing_method
+        auto it = find_label("smearing_method", readinput.input_lists);
+        param.input.smearing_method = "fix";
+        testing::internal::CaptureStdout();
+        EXPECT_EXIT(it->second.check_value(it->second, param), ::testing::ExitedWithCode(1), "");
+        output = testing::internal::GetCapturedStdout();
+        EXPECT_THAT(output, testing::HasSubstr("NOTICE"));
+    }
     { // kspacing
         auto it = find_label("kspacing", readinput.input_lists);
         it->second.str_values = {"1"};

From 3b3466eae588a866a709dfaa6547614f6f7bd497 Mon Sep 17 00:00:00 2001
From: Liang Sun <50293369+sunliang98@users.noreply.github.com>
Date: Thu, 12 Dec 2024 08:54:20 +0800
Subject: [PATCH 3/7] Fix: Fix the Ewald force and stress when atom number of
 some elements are zero.  (#5721)

* Fix: Fix the Ewald force when atom number is zero.

* Fix: Fix the Ewald stress when atom number is zero.

* Test: Add an integrate test 123_PW_zero_atom
---
 source/module_cell/atom_spec.cpp              | 89 ++++++++++---------
 source/module_cell/read_atoms.cpp             | 19 +++-
 .../module_hamilt_pw/hamilt_pwdft/forces.cpp  | 76 ++++++++--------
 .../hamilt_pwdft/stress_func_ewa.cpp          | 35 ++++----
 tests/integrate/123_PW_zero_atom/INPUT        | 28 ++++++
 tests/integrate/123_PW_zero_atom/KPT          |  4 +
 tests/integrate/123_PW_zero_atom/STRU         | 23 +++++
 tests/integrate/123_PW_zero_atom/jd           |  1 +
 tests/integrate/123_PW_zero_atom/result.ref   |  5 ++
 tests/integrate/CASES_CPU.txt                 |  1 +
 10 files changed, 186 insertions(+), 95 deletions(-)
 create mode 100644 tests/integrate/123_PW_zero_atom/INPUT
 create mode 100644 tests/integrate/123_PW_zero_atom/KPT
 create mode 100644 tests/integrate/123_PW_zero_atom/STRU
 create mode 100644 tests/integrate/123_PW_zero_atom/jd
 create mode 100644 tests/integrate/123_PW_zero_atom/result.ref

diff --git a/source/module_cell/atom_spec.cpp b/source/module_cell/atom_spec.cpp
index f00a062c1d..0bf5045f2d 100644
--- a/source/module_cell/atom_spec.cpp
+++ b/source/module_cell/atom_spec.cpp
@@ -102,51 +102,54 @@ void Atom::bcast_atom(void)
     Parallel_Common::bcast_bool(this->flag_empty_element);
     Parallel_Common::bcast_double(mass);
 
-    if (GlobalV::MY_RANK != 0)
+    if (na > 0)
     {
-        assert(na != 0);
-        this->tau.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
-        this->dis.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
-        this->taud.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
-        this->vel.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
-        this->mag.resize(na, 0);
-        this->angle1.resize(na, 0);
-        this->angle2.resize(na, 0);
-        this->m_loc_.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
-        this->mbl.resize(na, ModuleBase::Vector3<int>(0, 0, 0));
-        this->lambda.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
-        this->constrain.resize(na, ModuleBase::Vector3<int>(0, 0, 0));
-    }
+        if (GlobalV::MY_RANK != 0)
+        {
+            assert(na != 0);
+            this->tau.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
+            this->dis.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
+            this->taud.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
+            this->vel.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
+            this->mag.resize(na, 0);
+            this->angle1.resize(na, 0);
+            this->angle2.resize(na, 0);
+            this->m_loc_.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
+            this->mbl.resize(na, ModuleBase::Vector3<int>(0, 0, 0));
+            this->lambda.resize(na, ModuleBase::Vector3<double>(0, 0, 0));
+            this->constrain.resize(na, ModuleBase::Vector3<int>(0, 0, 0));
+        }
 
-    for (int i = 0; i < na; i++)
-    {
-        Parallel_Common::bcast_double(tau[i].x);
-        Parallel_Common::bcast_double(tau[i].y);
-        Parallel_Common::bcast_double(tau[i].z);
-        Parallel_Common::bcast_double(taud[i].x);
-        Parallel_Common::bcast_double(taud[i].y);
-        Parallel_Common::bcast_double(taud[i].z);
-        Parallel_Common::bcast_double(dis[i].x);
-        Parallel_Common::bcast_double(dis[i].y);
-        Parallel_Common::bcast_double(dis[i].z);
-        Parallel_Common::bcast_double(vel[i].x);
-        Parallel_Common::bcast_double(vel[i].y);
-        Parallel_Common::bcast_double(vel[i].z);
-        Parallel_Common::bcast_double(mag[i]);
-        Parallel_Common::bcast_double(angle1[i]);
-        Parallel_Common::bcast_double(angle2[i]);
-        Parallel_Common::bcast_double(m_loc_[i].x);
-        Parallel_Common::bcast_double(m_loc_[i].y);
-        Parallel_Common::bcast_double(m_loc_[i].z);
-        Parallel_Common::bcast_int(mbl[i].x);
-        Parallel_Common::bcast_int(mbl[i].y);
-        Parallel_Common::bcast_int(mbl[i].z);
-        Parallel_Common::bcast_double(lambda[i].x);
-        Parallel_Common::bcast_double(lambda[i].y);
-        Parallel_Common::bcast_double(lambda[i].z);
-        Parallel_Common::bcast_int(constrain[i].x);
-        Parallel_Common::bcast_int(constrain[i].y);
-        Parallel_Common::bcast_int(constrain[i].z);
+        for (int i = 0; i < na; i++)
+        {
+            Parallel_Common::bcast_double(tau[i].x);
+            Parallel_Common::bcast_double(tau[i].y);
+            Parallel_Common::bcast_double(tau[i].z);
+            Parallel_Common::bcast_double(taud[i].x);
+            Parallel_Common::bcast_double(taud[i].y);
+            Parallel_Common::bcast_double(taud[i].z);
+            Parallel_Common::bcast_double(dis[i].x);
+            Parallel_Common::bcast_double(dis[i].y);
+            Parallel_Common::bcast_double(dis[i].z);
+            Parallel_Common::bcast_double(vel[i].x);
+            Parallel_Common::bcast_double(vel[i].y);
+            Parallel_Common::bcast_double(vel[i].z);
+            Parallel_Common::bcast_double(mag[i]);
+            Parallel_Common::bcast_double(angle1[i]);
+            Parallel_Common::bcast_double(angle2[i]);
+            Parallel_Common::bcast_double(m_loc_[i].x);
+            Parallel_Common::bcast_double(m_loc_[i].y);
+            Parallel_Common::bcast_double(m_loc_[i].z);
+            Parallel_Common::bcast_int(mbl[i].x);
+            Parallel_Common::bcast_int(mbl[i].y);
+            Parallel_Common::bcast_int(mbl[i].z);
+            Parallel_Common::bcast_double(lambda[i].x);
+            Parallel_Common::bcast_double(lambda[i].y);
+            Parallel_Common::bcast_double(lambda[i].z);
+            Parallel_Common::bcast_int(constrain[i].x);
+            Parallel_Common::bcast_int(constrain[i].y);
+            Parallel_Common::bcast_int(constrain[i].z);
+        }
     }
 
     return;
diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp
index 81608ce609..30ae5c504b 100644
--- a/source/module_cell/read_atoms.cpp
+++ b/source/module_cell/read_atoms.cpp
@@ -504,7 +504,18 @@ bool UnitCell::read_atom_positions(std::ifstream &ifpos, std::ofstream &ofs_runn
                 ModuleBase::WARNING("read_atom_positions", " atom number < 0.");
                 return false;
             }
-            if (na > 0)
+            else if (na == 0)
+            {
+                std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl;
+                std::cout << " Warning: atom number is 0 for atom type: " << atoms[it].label << std::endl;
+                std::cout << " If you are confident that this is not a mistake, please ignore this warning." << std::endl;
+                std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl;
+                ofs_running << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl;
+                ofs_running << " Warning: atom number is 0 for atom type: " << atoms[it].label << std::endl;
+                ofs_running << " If you are confident that this is not a mistake, please ignore this warning." << std::endl;
+                ofs_running << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl;
+            }
+            else if (na > 0)
             {
                 atoms[it].tau.resize(na, ModuleBase::Vector3<double>(0,0,0));
                 atoms[it].dis.resize(na, ModuleBase::Vector3<double>(0,0,0));
@@ -891,6 +902,12 @@ bool UnitCell::read_atom_positions(std::ifstream &ifpos, std::ofstream &ofs_runn
     ofs_running << std::endl;
     ModuleBase::GlobalFunc::OUT(ofs_running,"TOTAL ATOM NUMBER",nat);
 
+    if (nat == 0)
+    {
+        ModuleBase::WARNING("read_atom_positions","no atom in the system!");
+        return false;
+    }
+
     // mohan add 2010-06-30    
     //xiaohui modify 2015-03-15, cancel outputfile "STRU_READIN.xyz"
     //this->print_cell_xyz("STRU_READIN.xyz");
diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp
index c1fcd2299c..dd24ce82c7 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp
@@ -595,21 +595,24 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
         }
         for (int it = 0; it < ucell.ntype; it++)
         {
-            double dzv;
-            if (PARAM.inp.use_paw)
+            if (ucell.atoms[it].na != 0)
             {
-#ifdef USE_PAW
-                dzv = GlobalC::paw_cell.get_val(it);
-#endif
-            }
-            else
-            {
-                dzv = ucell.atoms[it].ncpp.zv;
-            }
+                double dzv;
+                if (PARAM.inp.use_paw)
+                {
+    #ifdef USE_PAW
+                    dzv = GlobalC::paw_cell.get_val(it);
+    #endif
+                }
+                else
+                {
+                    dzv = ucell.atoms[it].ncpp.zv;
+                }
 
-            for (int ig = igb; ig < ig_end; ++ig)
-            { // accumulate aux
-                aux[ig] += dzv * conj(p_sf->strucFac(it, ig));
+                for (int ig = igb; ig < ig_end; ++ig)
+                { // accumulate aux
+                    aux[ig] += dzv * conj(p_sf->strucFac(it, ig));
+                }
             }
         }
     }
@@ -714,30 +717,33 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
                 last_it = it;
             }
 
-            const auto ig_loop = [&](int ig_beg, int ig_end) {
-                for (int ig = ig_beg; ig < ig_end; ig++)
-                {
-                    const ModuleBase::Vector3<double> gcar = rho_basis->gcar[ig];
-                    const double arg = ModuleBase::TWO_PI * (gcar * ucell.atoms[it].tau[ia]);
-                    double sinp, cosp;
-                    ModuleBase::libm::sincos(arg, &sinp, &cosp);
-                    double sumnb = -cosp * aux[ig].imag() + sinp * aux[ig].real();
-                    forceion(iat, 0) += gcar[0] * sumnb;
-                    forceion(iat, 1) += gcar[1] * sumnb;
-                    forceion(iat, 2) += gcar[2] * sumnb;
-                }
-            };
+            if (ucell.atoms[it].na != 0)
+            {
+                const auto ig_loop = [&](int ig_beg, int ig_end) {
+                    for (int ig = ig_beg; ig < ig_end; ig++)
+                    {
+                        const ModuleBase::Vector3<double> gcar = rho_basis->gcar[ig];
+                        const double arg = ModuleBase::TWO_PI * (gcar * ucell.atoms[it].tau[ia]);
+                        double sinp, cosp;
+                        ModuleBase::libm::sincos(arg, &sinp, &cosp);
+                        double sumnb = -cosp * aux[ig].imag() + sinp * aux[ig].real();
+                        forceion(iat, 0) += gcar[0] * sumnb;
+                        forceion(iat, 1) += gcar[1] * sumnb;
+                        forceion(iat, 2) += gcar[2] * sumnb;
+                    }
+                };
 
-            // skip ig_gge0 point by separating ig loop into two part
-            ig_loop(0, ig_gap);
-            ig_loop(ig_gap + 1, rho_basis->npw);
+                // skip ig_gge0 point by separating ig loop into two part
+                ig_loop(0, ig_gap);
+                ig_loop(ig_gap + 1, rho_basis->npw);
 
-            forceion(iat, 0) *= it_fact;
-            forceion(iat, 1) *= it_fact;
-            forceion(iat, 2) *= it_fact;
+                forceion(iat, 0) *= it_fact;
+                forceion(iat, 1) *= it_fact;
+                forceion(iat, 2) *= it_fact;
 
-            ++iat;
-            ucell.step_iait(&ia, &it);
+                ++iat;
+                ucell.step_iait(&ia, &it);
+            }
         }
 
         // means that the processor contains G=0 term.
@@ -771,7 +777,7 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
                 int T2 = 0;
                 while (iat2 < this->nat)
                 {
-                    if (iat1 != iat2)
+                    if (iat1 != iat2 && ucell.atoms[T2].na != 0 && ucell.atoms[T1].na != 0)
                     {
                         ModuleBase::Vector3<double> d_tau
                             = ucell.atoms[T1].tau[I1] - ucell.atoms[T2].tau[I2];
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp
index fd6bf8dc0a..5dbe2cd6b4 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp
@@ -137,25 +137,28 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 
 		while (ijat < ijat_end)
 		{
-			//calculate tau[na]-tau[nb]
-			d_tau = ucell.atoms[it].tau[i] - ucell.atoms[jt].tau[j];
-			//generates nearest-neighbors shells 
-			H_Ewald_pw::rgen(d_tau, rmax, irr, ucell.latvec, ucell.G, r, r2, nrm);
-			for(int nr=0 ; nr<nrm ; nr++)
+			if (ucell.atoms[it].na != 0 && ucell.atoms[jt].na != 0)
 			{
-				rr=sqrt(r2[nr]) * ucell.lat0;
-				fac = -ModuleBase::e2/2.0/ucell.omega*pow(ucell.lat0,2)*ucell.atoms[it].ncpp.zv * ucell.atoms[jt].ncpp.zv / pow(rr,3) * (erfc(sqa*rr)+rr * sq8a_2pi *  ModuleBase::libm::exp(-alpha * pow(rr,2)));
-				for(int l=0; l<3; l++)
+				//calculate tau[na]-tau[nb]
+				d_tau = ucell.atoms[it].tau[i] - ucell.atoms[jt].tau[j];
+				//generates nearest-neighbors shells 
+				H_Ewald_pw::rgen(d_tau, rmax, irr, ucell.latvec, ucell.G, r, r2, nrm);
+				for(int nr=0 ; nr<nrm ; nr++)
 				{
-					for(int m=0; m<l+1; m++)
+					rr=sqrt(r2[nr]) * ucell.lat0;
+					fac = -ModuleBase::e2/2.0/ucell.omega*pow(ucell.lat0,2)*ucell.atoms[it].ncpp.zv * ucell.atoms[jt].ncpp.zv / pow(rr,3) * (erfc(sqa*rr)+rr * sq8a_2pi *  ModuleBase::libm::exp(-alpha * pow(rr,2)));
+					for(int l=0; l<3; l++)
 					{
-						r0[0] = r[nr].x;
-						r0[1] = r[nr].y;
-						r0[2] = r[nr].z;
-						local_sigma(l,m) += fac * r0[l] * r0[m];
-					}//end m
-				}//end l
-			}//end nr
+						for(int m=0; m<l+1; m++)
+						{
+							r0[0] = r[nr].x;
+							r0[1] = r[nr].y;
+							r0[2] = r[nr].z;
+							local_sigma(l,m) += fac * r0[l] * r0[m];
+						}//end m
+					}//end l
+				}//end nr
+			}
 
 			++ijat;
 			ucell.step_jajtiait(&j, &jt, &i, &it);
diff --git a/tests/integrate/123_PW_zero_atom/INPUT b/tests/integrate/123_PW_zero_atom/INPUT
new file mode 100644
index 0000000000..8488f61585
--- /dev/null
+++ b/tests/integrate/123_PW_zero_atom/INPUT
@@ -0,0 +1,28 @@
+INPUT_PARAMETERS
+#Parameters (1.General)
+suffix			autotest
+calculation     	scf
+
+nbands			6
+symmetry		0
+pseudo_dir      	../../PP_ORB/
+
+#Parameters (2.Iteration)
+ecutwfc			20
+
+#Parameters (3.Basis)
+basis_type		pw
+
+#Parameters (4.Smearing)
+smearing_method		gauss
+smearing_sigma		0.0002
+
+#Parameters (5.Mixing)
+mixing_type		broyden
+mixing_beta		0.7
+cal_force		1
+test_force		1
+cal_stress		1
+test_stress		1
+
+pw_seed         1
\ No newline at end of file
diff --git a/tests/integrate/123_PW_zero_atom/KPT b/tests/integrate/123_PW_zero_atom/KPT
new file mode 100644
index 0000000000..f5f7f4ec34
--- /dev/null
+++ b/tests/integrate/123_PW_zero_atom/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+2 2 2 0 0 0
diff --git a/tests/integrate/123_PW_zero_atom/STRU b/tests/integrate/123_PW_zero_atom/STRU
new file mode 100644
index 0000000000..49a2ef9cb7
--- /dev/null
+++ b/tests/integrate/123_PW_zero_atom/STRU
@@ -0,0 +1,23 @@
+ATOMIC_SPECIES
+Al 13 Al_ONCV_PBE-1.0.upf upf201
+Si 14 Si_ONCV_PBE-1.0.upf upf201
+
+LATTICE_CONSTANT
+10.2  // add lattice constant
+
+LATTICE_VECTORS
+0.5 0.5 0.0
+0.5 0.0 0.5
+0.0 0.5 0.5
+
+ATOMIC_POSITIONS
+Direct 
+
+Al
+0.0
+0
+Si      // Element type
+0.0     // magnetism
+2
+0.00 0.00 0.00 1 1 1
+0.25 0.25 0.25 1 1 1
diff --git a/tests/integrate/123_PW_zero_atom/jd b/tests/integrate/123_PW_zero_atom/jd
new file mode 100644
index 0000000000..ebecbd81ec
--- /dev/null
+++ b/tests/integrate/123_PW_zero_atom/jd
@@ -0,0 +1 @@
+test calculation when atom number is zero.
diff --git a/tests/integrate/123_PW_zero_atom/result.ref b/tests/integrate/123_PW_zero_atom/result.ref
new file mode 100644
index 0000000000..c93cf82f0f
--- /dev/null
+++ b/tests/integrate/123_PW_zero_atom/result.ref
@@ -0,0 +1,5 @@
+etotref -211.8003327929410489
+etotperatomref -105.9001663965
+totalforceref 0.000014
+totalstressref 368.726447
+totaltimeref 0.61
diff --git a/tests/integrate/CASES_CPU.txt b/tests/integrate/CASES_CPU.txt
index 8277d51eab..1c76104ae0 100644
--- a/tests/integrate/CASES_CPU.txt
+++ b/tests/integrate/CASES_CPU.txt
@@ -98,6 +98,7 @@
 121_PW_KPAR
 121_PW_kspacing
 127_PW_15_PK_AF
+123_PW_zero_atom
 128_PW_zero_ntype
 133_PW_DJ_PK
 135_PW_15_PK

From d73b1d2b51cb59f09a90ece3b1d64dc96ec47f78 Mon Sep 17 00:00:00 2001
From: Erjie Wu <110683255+ErjieWu@users.noreply.github.com>
Date: Thu, 12 Dec 2024 08:55:37 +0800
Subject: [PATCH 4/7] Refactor: Combine gamma-only and multi-k versions of some
 functions in DeePKS. (#5717)

* Add support for INPUT deepks_v_delta>0 in multi-k points DeePKS calculations

* Refactor: Change LCAO_Deepks_Interface to template class.

* Remove the h_mat and h_mat_k variables in LCAO_Deepks and change H_V_delta to form consistent with H_V_delta_k.

* Change functions in deepks_hmat to template.

* Combine gamma-only and multi-k for v_delta_precalc.

* Change functions about v_delta_precalc and psialpha in deepks_v_delta calculations to templates.

* Change save_npy_h to template.

* Change some functions in LCAO_deepks_io to templates.

* Remove ld.V_deltaR.

* Change cal_orbital_precalc to template.

* Remove orbital_precalc_k.cpp.

* Change cal_gdmx into template function.

* [pre-commit.ci lite] apply automatic fixes

* Update LCAO_deepks_interface.cpp

* Update FORCE_STRESS.cpp

* Update FORCE_gamma.cpp

* Update deepks_lcao.cpp

* Update LCAO_deepks.cpp

* Update LCAO_deepks.cpp

* Update LCAO_deepks.h

---------

Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
---
 source/Makefile.Objects                       |   3 -
 source/module_esolver/esolver_ks_lcao.cpp     |  50 +-
 .../hamilt_lcaodft/FORCE_STRESS.cpp           |  16 +-
 .../hamilt_lcaodft/FORCE_gamma.cpp            |   5 +-
 .../operator_lcao/deepks_lcao.cpp             |   6 +-
 .../module_deepks/CMakeLists.txt              |   3 -
 .../module_deepks/LCAO_deepks.cpp             |  45 +-
 .../module_deepks/LCAO_deepks.h               |  87 +---
 .../module_deepks/LCAO_deepks_interface.cpp   | 460 ++++++------------
 .../module_deepks/LCAO_deepks_interface.h     |  23 +-
 .../module_deepks/LCAO_deepks_io.cpp          | 226 ++++-----
 .../module_deepks/LCAO_deepks_io.h            |  95 ++--
 .../module_deepks/LCAO_deepks_odelta.cpp      |   6 +-
 .../module_deepks/LCAO_deepks_pdm.cpp         |   5 +-
 .../module_deepks/LCAO_deepks_torch.cpp       | 213 ++++----
 .../module_deepks/LCAO_deepks_vdelta.cpp      |   6 +-
 .../module_deepks/cal_gdmx.cpp                |  97 +++-
 .../module_deepks/cal_gdmx_k.cpp              | 249 ----------
 .../module_deepks/deepks_hmat.cpp             | 144 ++----
 .../module_deepks/deepks_hmat.h               |  32 +-
 .../module_deepks/orbital_precalc.cpp         | 290 ++++++++---
 .../module_deepks/orbital_precalc_k.cpp       | 350 -------------
 .../module_deepks/test/LCAO_deepks_test.cpp   |   4 +-
 .../module_deepks/v_delta_precalc.cpp         | 185 +++++--
 .../module_deepks/v_delta_precalc_k.cpp       | 234 ---------
 25 files changed, 946 insertions(+), 1888 deletions(-)
 delete mode 100644 source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp
 delete mode 100644 source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp
 delete mode 100644 source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp

diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 661db25611..ae07f24df8 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -201,14 +201,11 @@ OBJS_DEEPKS=LCAO_deepks.o\
         deepks_hmat.o\
         LCAO_deepks_interface.o\
         orbital_precalc.o\
-        orbital_precalc_k.o\
         cal_gdmx.o\
-        cal_gdmx_k.o\
         cal_gedm.o\
         cal_gvx.o\
         cal_descriptor.o\
         v_delta_precalc.o\
-        v_delta_precalc_k.o\
         
 
 OBJS_ELECSTAT=elecstate.o\
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index e515c23903..3d089e568e 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -961,7 +961,7 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
     // 6) write Hamiltonian and Overlap matrix
     for (int ik = 0; ik < this->kv.get_nks(); ++ik)
     {
-        if (PARAM.inp.out_mat_hs[0] || PARAM.inp.deepks_v_delta)
+        if (PARAM.inp.out_mat_hs[0])
         {
             this->p_hamilt->updateHk(ik);
         }
@@ -1000,12 +1000,6 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
                                     this->pv,
                                     GlobalV::DRANK);
             }
-#ifdef __DEEPKS
-            if (PARAM.inp.deepks_out_labels && PARAM.inp.deepks_v_delta)
-            {
-                DeePKS_domain::save_h_mat(h_mat.p, this->pv.nloc, ik);
-            }
-#endif
         }
     }
 
@@ -1023,24 +1017,30 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
 
     //! 8) Write DeePKS information
 #ifdef __DEEPKS
-    std::shared_ptr<LCAO_Deepks> ld_shared_ptr(&GlobalC::ld, [](LCAO_Deepks*) {});
-    LCAO_Deepks_Interface LDI = LCAO_Deepks_Interface(ld_shared_ptr);
-    ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels");
-    LDI.out_deepks_labels(this->pelec->f_en.etot,
-                          this->pelec->klist->get_nks(),
-                          ucell.nat,
-                          PARAM.globalv.nlocal,
-                          this->pelec->ekb,
-                          this->pelec->klist->kvec_d,
-                          ucell,
-                          orb_,
-                          GlobalC::GridD,
-                          &(this->pv),
-                          *(this->psi),
-                          dynamic_cast<const elecstate::ElecStateLCAO<TK>*>(this->pelec)->get_DM(),
-                          PARAM.inp.deepks_v_delta);
-
-    ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels");
+    if (this->psi != nullptr && (istep % PARAM.inp.out_interval == 0))
+    {
+        hamilt::HamiltLCAO<TK, TR>* p_ham_deepks
+            = dynamic_cast<hamilt::HamiltLCAO<TK, TR>*>(this->p_hamilt);
+        std::shared_ptr<LCAO_Deepks> ld_shared_ptr(&GlobalC::ld, [](LCAO_Deepks*) {});
+        LCAO_Deepks_Interface<TK, TR> LDI(ld_shared_ptr);
+
+        ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels");
+        LDI.out_deepks_labels(this->pelec->f_en.etot,
+                            this->pelec->klist->get_nks(),
+                            ucell.nat,
+                            PARAM.globalv.nlocal,
+                            this->pelec->ekb,
+                            this->pelec->klist->kvec_d,
+                            ucell,
+                            orb_,
+                            GlobalC::GridD,
+                            &(this->pv),
+                            *(this->psi),
+                            dynamic_cast<const elecstate::ElecStateLCAO<TK>*>(this->pelec)->get_DM(),
+                            p_ham_deepks);
+
+        ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels");
+    }
 #endif
 
     //! 9) Perform RDMFT calculations
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
index 43a633daf2..4a262da0e4 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
@@ -522,7 +522,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                     {
                         const std::vector<std::vector<double>>& dm_gamma
                             = dynamic_cast<const elecstate::ElecStateLCAO<double>*>(pelec)->get_DM()->get_DMK_vector();
-                        GlobalC::ld.cal_gdmx(dm_gamma[0], ucell, orb, GlobalC::GridD, isstress);
+                        GlobalC::ld.cal_gdmx(dm_gamma, ucell, orb, GlobalC::GridD, kv.get_nks(), kv.kvec_d, isstress);
                     }
                     else
                     {
@@ -531,13 +531,13 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                                   ->get_DM()
                                   ->get_DMK_vector();
 
-                        GlobalC::ld.cal_gdmx_k(dm_k,
-                                               ucell,
-                                               orb,
-                                               GlobalC::GridD,
-                                               kv.get_nks(),
-                                               kv.kvec_d,
-                                               isstress);
+                        GlobalC::ld.cal_gdmx(dm_k,
+                                             ucell,
+                                             orb,
+                                             GlobalC::GridD,
+                                             kv.get_nks(),
+                                             kv.kvec_d,
+                                             isstress);
                     }
                     if (PARAM.inp.deepks_out_unittest)
                     {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
index c7aad83123..38e5be5051 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
@@ -260,7 +260,8 @@ void Force_LCAO<double>::ftable(const bool isforce,
 
         if (PARAM.inp.deepks_out_unittest)
         {
-            LCAO_deepks_io::print_dm(dm_gamma[0], PARAM.globalv.nlocal, this->ParaV->nrow);
+            const int nks = 1; // 1 for gamma-only
+            LCAO_deepks_io::print_dm(nks, PARAM.globalv.nlocal, this->ParaV->nrow, dm_gamma); 
 
             GlobalC::ld.check_projected_dm();
 
@@ -268,7 +269,7 @@ void Force_LCAO<double>::ftable(const bool isforce,
 
             GlobalC::ld.check_gedm();
 
-            GlobalC::ld.cal_e_delta_band(dm_gamma);
+            GlobalC::ld.cal_e_delta_band(dm_gamma,nks);
 
             std::ofstream ofs("E_delta_bands.dat");
             ofs << std::setprecision(10) << GlobalC::ld.e_delta_band;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
index 95df523db2..47d2f57c9f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
@@ -186,7 +186,7 @@ void DeePKS<OperatorLCAO<std::complex<double>, double>>::contributeHR()
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
 
-        GlobalC::ld.cal_projected_DM_k(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD);
+        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD);
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         // calculate dE/dD
         GlobalC::ld.cal_gedm(this->ucell->nat);
@@ -219,7 +219,7 @@ void DeePKS<OperatorLCAO<std::complex<double>, std::complex<double>>>::contribut
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
 
-        GlobalC::ld.cal_projected_DM_k(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD);
+        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD);
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         // calculate dE/dD
         GlobalC::ld.cal_gedm(this->ucell->nat);
@@ -497,7 +497,7 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::cal_HR_IJR(const double* hr_i
 
 inline void get_h_delta_k(int ik, double*& h_delta_k)
 {
-    h_delta_k = GlobalC::ld.H_V_delta.data();
+    h_delta_k = GlobalC::ld.H_V_delta[ik].data();
     return;
 }
 inline void get_h_delta_k(int ik, std::complex<double>*& h_delta_k)
diff --git a/source/module_hamilt_lcao/module_deepks/CMakeLists.txt b/source/module_hamilt_lcao/module_deepks/CMakeLists.txt
index e7e5110c6f..f32151bafe 100644
--- a/source/module_hamilt_lcao/module_deepks/CMakeLists.txt
+++ b/source/module_hamilt_lcao/module_deepks/CMakeLists.txt
@@ -13,14 +13,11 @@ if(ENABLE_DEEPKS)
       deepks_hmat.cpp
       LCAO_deepks_interface.cpp
       orbital_precalc.cpp
-      orbital_precalc_k.cpp
       cal_gdmx.cpp
-      cal_gdmx_k.cpp
       cal_gedm.cpp
       cal_gvx.cpp      
       cal_descriptor.cpp
       v_delta_precalc.cpp
-      v_delta_precalc_k.cpp
   )
 
   add_library(
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp
index 84eb2d6e6b..1676838282 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp
@@ -17,7 +17,6 @@
 //4. subroutines that are related to V_delta:
 //  - allocate_V_delta : allocates H_V_delta; if calculating force, it also calls
 //      init_gdmx, as well as allocating F_delta
-//  - allocate_V_deltaR : allcoates H_V_deltaR, for multi-k calculations
 
 #ifdef __DEEPKS
 
@@ -35,7 +34,6 @@ LCAO_Deepks::LCAO_Deepks()
     alpha_index = new ModuleBase::IntArray[1];
     inl_index = new ModuleBase::IntArray[1];
     inl_l = nullptr;
-    H_V_deltaR = nullptr;
     gedm = nullptr;
 }
 
@@ -45,7 +43,6 @@ LCAO_Deepks::~LCAO_Deepks()
     delete[] alpha_index;
     delete[] inl_index;
     delete[] inl_l;
-    delete[] H_V_deltaR;
 
     //=======1. to use deepks, pdm is required==========
     //delete pdm**
@@ -92,7 +89,10 @@ void LCAO_Deepks::init(
     
     int tot_inl = tot_inl_per_atom * nat;
 
-    if(PARAM.inp.deepks_equiv) tot_inl = nat;
+    if(PARAM.inp.deepks_equiv) 
+    {
+        tot_inl = nat;
+    }
 
     this->lmaxd = lm;
     this->nmaxd = nm;
@@ -143,25 +143,6 @@ void LCAO_Deepks::init(
 
     this->pv = &pv_in;
 
-    if(PARAM.inp.deepks_v_delta)
-    {
-        //allocate and init h_mat
-        if(PARAM.globalv.gamma_only_local)
-        {
-            int nloc=this->pv->nloc;
-            this->h_mat.resize(nloc,0.0);
-        }
-        else
-        {
-            int nloc=this->pv->nloc;
-            this->h_mat_k.resize(nks);
-            for (int ik = 0; ik < nks; ik++)
-            {
-                this->h_mat_k[ik].resize(nloc,std::complex<double>(0.0,0.0));
-            }
-        }
-    }
-
     return;
 }
 
@@ -335,8 +316,9 @@ void LCAO_Deepks::allocate_V_delta(const int nat, const int nks)
     //initialize the H matrix H_V_delta
     if(PARAM.globalv.gamma_only_local)
     {
-        this->H_V_delta.resize(pv->nloc);
-        ModuleBase::GlobalFunc::ZEROS(this->H_V_delta.data(), pv->nloc);
+        H_V_delta.resize(1); // the first dimension is for the consistence with H_V_delta_k
+        this->H_V_delta[0].resize(pv->nloc);
+        ModuleBase::GlobalFunc::ZEROS(this->H_V_delta[0].data(), pv->nloc);
     }
     else
     {
@@ -387,15 +369,6 @@ void LCAO_Deepks::allocate_V_delta(const int nat, const int nks)
     return;
 }
 
-void LCAO_Deepks::allocate_V_deltaR(const int nnr)
-{
-    ModuleBase::TITLE("LCAO_Deepks", "allocate_V_deltaR");
-    GlobalV::ofs_running << nnr << std::endl;
-    delete[] H_V_deltaR;
-    H_V_deltaR = new double[nnr];
-    ModuleBase::GlobalFunc::ZEROS(H_V_deltaR, nnr);
-}
-
 void LCAO_Deepks::init_orbital_pdm_shell(const int nks)
 {
     
@@ -541,12 +514,12 @@ void LCAO_Deepks::del_v_delta_pdm_shell(const int nks,const int nlocal)
 
 void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector<std::vector<double>>& dm, const int nks)
 {
-    this->cal_e_delta_band(dm);
+    this->cal_e_delta_band(dm, nks);
 }
 
 void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm, const int nks)
 {
-    this->cal_e_delta_band_k(dm, nks);
+    this->cal_e_delta_band(dm, nks);
 }
 
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
index 57b3609059..51d765f1b2 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
@@ -54,18 +54,10 @@ class LCAO_Deepks
     ///\rho_{HL} = c_{L, \mu}c_{L,\nu} - c_{H, \mu}c_{H,\nu} \f$ (for gamma_only)
     ModuleBase::matrix o_delta;
 
-    ///(Unit: Ry) Hamiltonian matrix in k space
-    /// for gamma only
-    std::vector<double> h_mat;    
-    /// for multi-k
-    std::vector<std::vector<std::complex<double>>> h_mat_k;
-
     /// Correction term to the Hamiltonian matrix: \f$\langle\psi|V_\delta|\psi\rangle\f$ (for gamma only)
-    std::vector<double> H_V_delta;
+    /// The size of first dimension is 1, which is used for the consitence with H_V_delta_k
+    std::vector<std::vector<double>> H_V_delta; 
     /// Correction term to Hamiltonian, for multi-k
-    /// In R space:
-    double* H_V_deltaR;
-    /// In k space:
     std::vector<std::vector<std::complex<double>>> H_V_delta_k;
 
     // F_delta will be deleted soon, mohan 2024-07-25
@@ -213,7 +205,6 @@ class LCAO_Deepks
     // 3. subroutines that are related to V_delta:
     //   - allocate_V_delta : allocates H_V_delta; if calculating force, it also calls
     //       init_gdmx, as well as allocating F_delta
-    //   - allocate_V_deltaR : allcoates H_V_deltaR, for multi-k calculations
 
   public:
     explicit LCAO_Deepks();
@@ -231,8 +222,6 @@ class LCAO_Deepks
     /// Allocate memory for correction to Hamiltonian
     void allocate_V_delta(const int nat, const int nks = 1);
 
-    void allocate_V_deltaR(const int nnr);
-
     // array for storing gdmx, used for calculating gvx
     void init_gdmx(const int nat);
     // void del_gdmx(const int nat);
@@ -299,8 +288,7 @@ class LCAO_Deepks
     // 3. check_projected_dm, which prints pdm to descriptor.dat
 
     // 4. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point
-    // 5. cal_gdmx_k, counterpart of 3, for multi-k
-    // 6. check_gdmx, which prints gdmx to a series of .dat files
+    // 5. check_gdmx, which prints gdmx to a series of .dat files
 
   public:
     /** 
@@ -316,7 +304,7 @@ class LCAO_Deepks
                           const LCAO_Orbitals& orb,
                           Grid_Driver& GridD);
 
-    void cal_projected_DM_k(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
+    void cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
                             const UnitCell& ucell,
                             const LCAO_Orbitals& orb,
                             Grid_Driver& GridD);
@@ -335,21 +323,16 @@ class LCAO_Deepks
 
     // calculate the gradient of pdm with regard to atomic positions
     // d/dX D_{Inl,mm'}
+    template <typename TK>
     void cal_gdmx( // const ModuleBase::matrix& dm,
-        const std::vector<double>& dm,
-        const UnitCell& ucell,
-        const LCAO_Orbitals& orb,
-        Grid_Driver& GridD,
-        const bool isstress);
-
-    void cal_gdmx_k( // const std::vector<ModuleBase::ComplexMatrix>& dm,
-        const std::vector<std::vector<std::complex<double>>>& dm,
+        const std::vector<std::vector<TK>>& dm,
         const UnitCell& ucell,
         const LCAO_Orbitals& orb,
         Grid_Driver& GridD,
         const int nks,
         const std::vector<ModuleBase::Vector3<double>>& kvec_d,
         const bool isstress);
+
     void check_gdmx(const int nat);
 
     /** 
@@ -381,10 +364,10 @@ class LCAO_Deepks
   public:
     /// calculate tr(\rho V_delta)
     // void cal_e_delta_band(const std::vector<ModuleBase::matrix>& dm/**<[in] density matrix*/);
-    void cal_e_delta_band(const std::vector<std::vector<double>>& dm /**<[in] density matrix*/);
+    void cal_e_delta_band(const std::vector<std::vector<double>>& dm /**<[in] density matrix*/, const int /*nks*/);
     // void cal_e_delta_band_k(const std::vector<ModuleBase::ComplexMatrix>& dm/**<[in] density matrix*/,
     //     const int nks);
-    void cal_e_delta_band_k(const std::vector<std::vector<std::complex<double>>>& dm /**<[in] density matrix*/,
+    void cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm /**<[in] density matrix*/,
                             const int nks);
 
     //! a temporary interface for cal_e_delta_band and cal_e_delta_band_k
@@ -404,8 +387,9 @@ class LCAO_Deepks
 
   public:
     void cal_o_delta(const std::vector<std::vector<ModuleBase::matrix>>&
-                         dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/);
-    void cal_o_delta_k(const std::vector<std::vector<ModuleBase::ComplexMatrix>>&
+                         dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/,
+                     const int nks);
+    void cal_o_delta(const std::vector<std::vector<ModuleBase::ComplexMatrix>>&
                            dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/,
                        const int nks);
 
@@ -439,18 +423,12 @@ class LCAO_Deepks
     // 10. cal_orbital_precalc : orbital_precalc is usted for training with orbital label,
     //                          which equals gvdm * orbital_pdm_shell,
     //                          orbital_pdm_shell[1,Inl,nm*nm] = dm_hl * overlap * overlap
-    // 11. cal_orbital_precalc_k : orbital_precalc is usted for training with orbital label,
-    //                          for multi-k case, which equals gvdm * orbital_pdm_shell,
-    //                          orbital_pdm_shell[1,Inl,nm*nm] = dm_hl_k * overlap * overlap
-    // 12. cal_v_delta_precalc : v_delta_precalc is used for training with v_delta label,
+    // 11. cal_v_delta_precalc : v_delta_precalc is used for training with v_delta label,
     //                         which equals gvdm * v_delta_pdm_shell,
     //                         v_delta_pdm_shell = overlap * overlap
-    // 13. cal_v_delta_precalc_k : v_delta_precalc is used for training with v_delta label,
-    //                         for multi-k case, which equals ???
-    //                         ???
-    // 14. check_v_delta_precalc : check v_delta_precalc
-    // 15. prepare_psialpha : prepare psialpha for outputting npy file
-    // 16. prepare_gevdm : prepare gevdm for outputting npy file
+    // 12. check_v_delta_precalc : check v_delta_precalc
+    // 13. prepare_psialpha : prepare psialpha for outputting npy file
+    // 14. prepare_gevdm : prepare gevdm for outputting npy file
 
   public:
     /// Calculates descriptors
@@ -485,30 +463,18 @@ class LCAO_Deepks
     void cal_gedm_equiv(const int nat);
 
     // calculates orbital_precalc
-    void cal_orbital_precalc(const std::vector<std::vector<ModuleBase::matrix>>& dm_hl /**<[in] density matrix*/,
+    template <typename TK, typename TH>
+    void cal_orbital_precalc(const std::vector<std::vector<TH>>& dm_hl /**<[in] density matrix*/,
                              const int nat,
+                             const int nks,
+                             const std::vector<ModuleBase::Vector3<double>>& kvec_d,
                              const UnitCell& ucell,
                              const LCAO_Orbitals& orb,
                              Grid_Driver& GridD);
 
-    // calculates orbital_precalc for multi-k case
-    void cal_orbital_precalc_k(
-        const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl /**<[in] density matrix*/,
-        const int nat,
-        const int nks,
-        const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-        const UnitCell& ucell,
-        const LCAO_Orbitals& orb,
-        Grid_Driver& GridD);
-
     //calculates v_delta_precalc
+    template <typename TK>
     void cal_v_delta_precalc(const int nlocal,
-        const int nat,
-        const UnitCell &ucell,
-        const LCAO_Orbitals &orb,
-        Grid_Driver &GridD);
-
-    void cal_v_delta_precalc_k(const int nlocal,
         const int nat,
         const int nks,
         const std::vector<ModuleBase::Vector3<double>> &kvec_d,
@@ -516,21 +482,20 @@ class LCAO_Deepks
         const LCAO_Orbitals &orb,
         Grid_Driver &GridD);
 
-    void check_v_delta_precalc(const int nat, const int nks,const int nlocal);
+    template <typename TK>
+    void check_v_delta_precalc(const int nat, const int nks, const int nlocal);
 
     // prepare psialpha for outputting npy file
+    template <typename TK>
     void prepare_psialpha(const int nlocal,
-        const int nat,
-        const UnitCell &ucell,
-        const LCAO_Orbitals &orb,
-        Grid_Driver &GridD);
-    void prepare_psialpha_k(const int nlocal,
         const int nat,
         const int nks,
         const std::vector<ModuleBase::Vector3<double>> &kvec_d,
         const UnitCell &ucell,
         const LCAO_Orbitals &orb,
         Grid_Driver &GridD);
+
+    template <typename TK>
     void check_vdp_psialpha(const int nat, const int nks, const int nlocal);
     
     // prepare gevdm for outputting npy file
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
index 52d3811692..266cac73ee 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
@@ -6,12 +6,15 @@
 #include "module_base/global_variable.h"
 #include "module_base/tool_title.h"
 #include "module_elecstate/cal_dm.h"
+#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
 
-LCAO_Deepks_Interface::LCAO_Deepks_Interface(std::shared_ptr<LCAO_Deepks> ld_in) : ld(ld_in)
+template <typename TK, typename TR>
+LCAO_Deepks_Interface<TK, TR>::LCAO_Deepks_Interface(std::shared_ptr<LCAO_Deepks> ld_in) : ld(ld_in)
 {
 }
-// gamma-only
-void LCAO_Deepks_Interface::out_deepks_labels(const double& etot,
+
+template<typename TK, typename TR>
+void LCAO_Deepks_Interface<TK,TR>::out_deepks_labels(const double& etot,
                                               const int& nks,
                                               const int& nat,
                                               const int& nlocal,
@@ -21,17 +24,20 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot,
                                               const LCAO_Orbitals& orb,
                                               Grid_Driver& GridD,
                                               const Parallel_Orbitals* ParaV,
-                                              const psi::Psi<double>& psid,
-                                              const elecstate::DensityMatrix<double, double>* dm,
-                                              const int& deepks_v_delta)
+                                              const psi::Psi<TK>& psi,
+                                              const elecstate::DensityMatrix<TK, double>* dm,
+                                              hamilt::HamiltLCAO<TK,TR>* p_ham)
 {
     ModuleBase::TITLE("LCAO_Deepks_Interface", "out_deepks_labels");
+    ModuleBase::timer::tick("LCAO_Deepks_Interface", "out_deepks_labels");
+
+    // define TH for different types
+    using TH = std::conditional_t<std::is_same<TK, double>::value, ModuleBase::matrix, ModuleBase::ComplexMatrix>;
 
     const int my_rank = GlobalV::MY_RANK;
     const int nspin = PARAM.inp.nspin;
 
-    // calculating deepks correction to bandgap
-    // and save the results
+    // calculating deepks correction to bandgap and save the results
     if (PARAM.inp.deepks_out_labels)
     {
         // mohan updated 2024-07-25
@@ -72,118 +78,163 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot,
             if (PARAM.inp.deepks_scf)
             {
                 ModuleBase::matrix wg_hl;
-                wg_hl.create(nspin, PARAM.inp.nbands);
+                std::vector<std::vector<TH>> dm_bandgap;
 
-                std::vector<std::vector<ModuleBase::matrix>> dm_bandgap_gamma;
+                // Calculate O_delta
+                if constexpr (std::is_same<TK, double>::value) // for gamma only
+                {
+                    wg_hl.create(nspin, PARAM.inp.nbands);
+                    dm_bandgap.resize(nspin);
 
-                dm_bandgap_gamma.resize(nspin);
-                for (int is = 0; is < nspin; ++is)
+                    for (int is = 0; is < nspin; ++is)
+                    {
+                        for (int ib = 0; ib < 1; ++ib)
+                        {
+                            wg_hl.zero_out();
+                            wg_hl(is, ib + nocc - 1) = -1.0;
+                            wg_hl(is, ib + nocc) = 1.0;
+                            dm_bandgap[ib].resize(nspin);
+                            elecstate::cal_dm(ParaV, wg_hl, psi, dm_bandgap[ib]);
+                        }
+                    }
+                }
+                else // for multi-k
                 {
-                    for (int ib = 0; ib < 1; ++ib)
+                    wg_hl.create(nks, PARAM.inp.nbands);
+                    dm_bandgap.resize(1);
+
+                    for (int ib = 0; ib < 1; ib++)
                     {
                         wg_hl.zero_out();
-                        wg_hl(is, ib + nocc - 1) = -1.0;
-                        wg_hl(is, ib + nocc) = 1.0;
-                        dm_bandgap_gamma[ib].resize(nspin);
-                        elecstate::cal_dm(ParaV, wg_hl, psid, dm_bandgap_gamma[ib]);
+                        for (int ik = 0; ik < nks; ik++)
+                        {
+                            wg_hl(ik, ib + nocc - 1) = -1.0;
+                            wg_hl(ik, ib + nocc) = 1.0;
+                        }
+                        dm_bandgap[ib].resize(nks);
+                        elecstate::cal_dm(ParaV, wg_hl, psi, dm_bandgap[ib]);
                     }
                 }
+                
+                ld->cal_orbital_precalc<TK,TH>(dm_bandgap, nat, nks, kvec_d, ucell, orb, GridD);
+                ld->cal_o_delta(dm_bandgap, nks);
 
-                ld->cal_orbital_precalc(dm_bandgap_gamma, nat, ucell, orb, GridD);
-
+                // save obase and orbital_precalc
                 LCAO_deepks_io::save_npy_orbital_precalc(nat, 
-                                                         nks, 
-                                                         ld->des_per_atom, 
-                                                         ld->orbital_precalc_tensor, 
-                                                         PARAM.globalv.global_out_dir,
-                                                         my_rank);
-
-                ld->cal_o_delta(dm_bandgap_gamma);
-
+                                                        nks, 
+                                                        ld->des_per_atom, 
+                                                        ld->orbital_precalc_tensor, 
+                                                        PARAM.globalv.global_out_dir,
+                                                        my_rank);
                 const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy";
-                LCAO_deepks_io::save_npy_o(deepks_bands - ld->o_delta, file_obase, nks, my_rank);
+                LCAO_deepks_io::save_npy_o(deepks_bands - ld->o_delta, file_obase, nks, my_rank);      
             }     // end deepks_scf == 1
             else  // deepks_scf == 0
             {
                 const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy";
                 LCAO_deepks_io::save_npy_o(deepks_bands, file_obase, nks, my_rank); // no scf, o_tot=o_base
-            }                                                    // end deepks_scf == 0
+            }     // end deepks_scf == 0
         } // end bandgap label
 
-        if(deepks_v_delta)//gamma only now
+        // save H(R) matrix
+        if (true) // should be modified later!
+        {
+            const std::string file_hr = PARAM.globalv.global_out_dir + "deepks_hr.npy";
+            const hamilt::HContainer<TR>& hR = *(p_ham->getHR());
+
+            // How to save H(R)?
+        }
+
+        if(PARAM.inp.deepks_v_delta)
         {
-            ModuleBase::matrix h_tot;
-            h_tot.create(nlocal,nlocal);
+            std::vector<TH> h_tot(nks);
+            std::vector<std::vector<TK>> h_mat(nks, std::vector<TK>(ParaV->nloc));
+            for (int ik = 0; ik < nks; ik++)
+            {
+                h_tot[ik].create(nlocal, nlocal);
+                p_ham->updateHk(ik);
+                const TK* hk_ptr = p_ham->getHk();
+                for (int i = 0; i < ParaV->nloc; i++)
+                {
+                    h_mat[ik][i] = hk_ptr[i];
+                }
+            }
 
-            DeePKS_domain::collect_h_mat(*ParaV, ld->h_mat,h_tot,nlocal);
+            DeePKS_domain::collect_h_mat<TK,TH>(*ParaV, h_mat, h_tot, nlocal, nks);
 
             const std::string file_htot = PARAM.globalv.global_out_dir + "deepks_htot.npy";
-            LCAO_deepks_io::save_npy_h(h_tot, file_htot, nlocal, my_rank);
+            LCAO_deepks_io::save_npy_h<TK,TH>(h_tot, file_htot, nlocal, nks, my_rank);
 
             if(PARAM.inp.deepks_scf)
             {
-                ModuleBase::matrix v_delta;
-                v_delta.create(nlocal,nlocal);
-                DeePKS_domain::collect_h_mat(*ParaV, ld->H_V_delta,v_delta,nlocal);
+                std::vector<TH> v_delta(nks);
+                std::vector<TH> h_base(nks);
+                for (int ik = 0; ik < nks; ik++)
+                {
+                    v_delta[ik].create(nlocal, nlocal);
+                    h_base[ik].create(nlocal, nlocal);
+                }
+                std::vector<std::vector<TK>>* H_V_delta = nullptr;
+                if constexpr (std::is_same<TK, double>::value)
+                {
+                    H_V_delta = &ld->H_V_delta;
+                }
+                else
+                {
+                    H_V_delta = &ld->H_V_delta_k;
+                }
+                DeePKS_domain::collect_h_mat<TK,TH>(*ParaV, *H_V_delta,v_delta,nlocal,nks); 
 
+                // save v_delta and h_base
                 const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy";
-                LCAO_deepks_io::save_npy_h(h_tot-v_delta, file_hbase, nlocal, my_rank);
+                for (int ik = 0; ik < nks; ik++)
+                {
+                    h_base[ik] = h_tot[ik] - v_delta[ik];
+                }
+                LCAO_deepks_io::save_npy_h<TK,TH>(h_base, file_hbase, nlocal, nks, my_rank);
 
                 const std::string file_vdelta = PARAM.globalv.global_out_dir + "deepks_vdelta.npy";
-                LCAO_deepks_io::save_npy_h(v_delta, file_vdelta, nlocal, my_rank);
+                LCAO_deepks_io::save_npy_h<TK,TH>(v_delta, file_vdelta, nlocal, nks, my_rank);
 
-                if(deepks_v_delta==1)//v_delta_precalc storage method 1
+                if(PARAM.inp.deepks_v_delta==1)//v_delta_precalc storage method 1
                 {
-                    ld->cal_v_delta_precalc(nlocal,
-                            nat,
-                            ucell,
-                            orb,
-                            GridD);
-                
-                    const int nks_gamma = 1;
-                    LCAO_deepks_io::save_npy_v_delta_precalc(
-                      nat, 
-                      nks_gamma, 
-                      nlocal,
-                      ld->des_per_atom,
-                      ld->v_delta_precalc_tensor,
-                      PARAM.globalv.global_out_dir,
-                      my_rank);
+                    ld->cal_v_delta_precalc<TK>(nlocal, nat, nks, kvec_d, ucell, orb, GridD);
+
+                    LCAO_deepks_io::save_npy_v_delta_precalc<TK>(nat, 
+                                                                 nks, 
+                                                                 nlocal,
+                                                                 ld->des_per_atom,
+                                                                 ld->v_delta_precalc_tensor,
+                                                                 PARAM.globalv.global_out_dir,
+                                                                 my_rank);
                 }
-                else if(deepks_v_delta==2)//v_delta_precalc storage method 2
+                else if(PARAM.inp.deepks_v_delta==2)//v_delta_precalc storage method 2
                 {
-                    ld->prepare_psialpha(nlocal,
-                                nat,
-                                ucell,
-                                orb,
-                                GridD);
-
-                    const int nks_gamma=1;
-                    LCAO_deepks_io::save_npy_psialpha(nat, 
-                                nks_gamma, 
-                                nlocal,
-                                ld->inlmax,
-                                ld->lmaxd,
-                                ld->psialpha_tensor,
-                                PARAM.globalv.global_out_dir,
-                                my_rank);
-
-                    ld->prepare_gevdm(
-                                nat,
-                                orb);
+                    ld->prepare_psialpha<TK>(nlocal, nat, nks, kvec_d, ucell, orb, GridD);
+
+                    LCAO_deepks_io::save_npy_psialpha<TK>(nat, 
+                                                          nks, 
+                                                          nlocal,
+                                                          ld->inlmax,
+                                                          ld->lmaxd,
+                                                          ld->psialpha_tensor,
+                                                          PARAM.globalv.global_out_dir,
+                                                          my_rank);
+
+                    ld->prepare_gevdm(nat, orb);
 
                     LCAO_deepks_io::save_npy_gevdm(nat,
-                      ld->inlmax,
-                      ld->lmaxd,
-                      ld->gevdm_tensor,
-                      PARAM.globalv.global_out_dir,
-                      my_rank);
+                                                   ld->inlmax,
+                                                   ld->lmaxd,
+                                                   ld->gevdm_tensor,
+                                                   PARAM.globalv.global_out_dir,
+                                                   my_rank);
                 }
             }
             else //deepks_scf == 0
             {
                 const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy";
-                LCAO_deepks_io::save_npy_h(h_tot, file_hbase, nlocal, my_rank);
+                LCAO_deepks_io::save_npy_h<TK,TH>(h_tot, file_hbase, nlocal, nks, my_rank);
             }
         }//end v_delta label
     
@@ -194,6 +245,7 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot,
     if (PARAM.inp.deepks_out_labels || PARAM.inp.deepks_scf)
     {
         // this part is for integrated test of deepks
+        // so it is printed no matter even if deepks_out_labels is not used
         // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm
 		if(!PARAM.inp.deepks_scf) 
 		{
@@ -216,258 +268,24 @@ void LCAO_Deepks_Interface::out_deepks_labels(const double& etot,
 					PARAM.inp.deepks_equiv, 
 					ld->d_tensor, 
                     PARAM.globalv.global_out_dir,
-					my_rank); // libnpy needed
+					GlobalV::MY_RANK); // libnpy needed
 		}
     }
     
     /// print out deepks information to the screen
     if (PARAM.inp.deepks_scf)
     {
-        ld->cal_e_delta_band(dm->get_DMK_vector());
+        ld->cal_e_delta_band(dm->get_DMK_vector(), nks);
         std::cout << "E_delta_band = " << std::setprecision(8) << ld->e_delta_band << " Ry"
                   << " = " << std::setprecision(8) << ld->e_delta_band * ModuleBase::Ry_to_eV << " eV"
                   << std::endl;
-        std::cout << "E_delta_NN= " << std::setprecision(8) << ld->E_delta << " Ry"
+        std::cout << "E_delta_NN = " << std::setprecision(8) << ld->E_delta << " Ry"
                   << " = " << std::setprecision(8) << ld->E_delta * ModuleBase::Ry_to_eV << " eV" << std::endl;
     }
 }
 
-// multi-k
-void LCAO_Deepks_Interface::out_deepks_labels(const double& etot,
-                                              const int& nks,
-                                              const int& nat,
-                                              const int& nlocal,
-                                              const ModuleBase::matrix& ekb,
-                                              const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-                                              const UnitCell& ucell,
-                                              const LCAO_Orbitals& orb,
-                                              Grid_Driver& GridD,
-                                              const Parallel_Orbitals* ParaV,
-                                              const psi::Psi<std::complex<double>>& psi,
-                                              const elecstate::DensityMatrix<std::complex<double>, double>* dm,
-                                              const int& deepks_v_delta)
-{
-    ModuleBase::TITLE("LCAO_Deepks_Interface", "out_deepks_labels");
-    ModuleBase::timer::tick("LCAO_Deepks_Interface", "out_deepks_labels");
-
-    const int my_rank = GlobalV::MY_RANK;
-    const int nspin = PARAM.inp.nspin;
-
-    /// calculating deepks correction to bandgap and save the results
-    if (PARAM.inp.deepks_out_labels)
-    {
-        // mohan updated 2024-07-25
-        const std::string file_etot = PARAM.globalv.global_out_dir + "deepks_etot.npy";
-        const std::string file_ebase = PARAM.globalv.global_out_dir + "deepks_ebase.npy";
-
-        LCAO_deepks_io::save_npy_e(etot, file_etot, my_rank);
-
-        if (PARAM.inp.deepks_scf)
-        {
-            /// ebase :no deepks E_delta including
-            LCAO_deepks_io::save_npy_e(etot - ld->E_delta,
-                           file_ebase, my_rank);
-        }
-        else // deepks_scf = 0; base calculation
-        {
-            LCAO_deepks_io::save_npy_e(etot, file_ebase, my_rank);
-        }
-
-        if (PARAM.inp.deepks_bandgap)
-        {
-            int nocc = PARAM.inp.nelec / 2;
-            ModuleBase::matrix deepks_bands;
-            deepks_bands.create(nks, 1);
-            for (int iks = 0; iks < nks; iks++)
-            {
-                for (int hl = 0; hl < 1; hl++)
-                {
-                    deepks_bands(iks, hl) = ekb(iks, nocc + hl) - ekb(iks, nocc - 1 + hl);
-                }
-            }
-
-            const std::string file_otot = PARAM.globalv.global_out_dir + "deepks_otot.npy";
-            LCAO_deepks_io::save_npy_o(deepks_bands, file_otot, nks, my_rank);
-
-            if (PARAM.inp.deepks_scf)
-            {
-                int nocc = PARAM.inp.nelec / 2; // redundant!
-                ModuleBase::matrix wg_hl;
-                wg_hl.create(nks, PARAM.inp.nbands);
-                std::vector<std::vector<ModuleBase::ComplexMatrix>> dm_bandgap_k;
-                dm_bandgap_k.resize(1);
-
-                for (int ib = 0; ib < 1; ib++)
-                {
-                    wg_hl.zero_out();
-                    for (int ik = 0; ik < nks; ik++)
-                    {
-                        wg_hl(ik, ib + nocc - 1) = -1.0;
-                        wg_hl(ik, ib + nocc) = 1.0;
-                    }
-                    dm_bandgap_k[ib].resize(nks);
-                    elecstate::cal_dm(ParaV, wg_hl, psi, dm_bandgap_k[ib]);
-                }
-
-                // ld->cal_o_delta_k(dm_bandgap_k, ParaV, nks);
-                ld->cal_orbital_precalc_k(dm_bandgap_k, nat, nks, kvec_d, ucell, orb, GridD);
-
-				LCAO_deepks_io::save_npy_orbital_precalc(
-						nat, 
-						nks, 
-						ld->des_per_atom, 
-						ld->orbital_precalc_tensor, 
-                        PARAM.globalv.global_out_dir,
-						GlobalV::MY_RANK);
-
-                ld->cal_o_delta_k(dm_bandgap_k, nks);
-
-                const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy";
-                LCAO_deepks_io::save_npy_o(deepks_bands - ld->o_delta, file_obase, nks, my_rank);
-            }     // end deepks_scf == 1
-            else  // deepks_scf == 0
-            {
-                const std::string file_obase = PARAM.globalv.global_out_dir + "deepks_obase.npy";
-                LCAO_deepks_io::save_npy_o(deepks_bands, file_obase, nks, my_rank); // no scf, o_tot=o_base
-            }     // end deepks_scf == 0
-        } // end bandgap label
-        if(deepks_v_delta)
-        {
-            std::vector<ModuleBase::ComplexMatrix> h_tot(nks);
-            for (int ik = 0; ik < nks; ik++)
-            {
-                h_tot[ik].create(nlocal, nlocal);
-            }
-
-            DeePKS_domain::collect_h_mat(*ParaV, ld->h_mat_k,h_tot,nlocal,nks);
-
-            const std::string file_htot = PARAM.globalv.global_out_dir + "deepks_htot.npy";
-            LCAO_deepks_io::save_npy_h(h_tot, file_htot, nlocal, nks, my_rank);
-
-            if(PARAM.inp.deepks_scf)
-            {
-                std::vector<ModuleBase::ComplexMatrix> v_delta(nks);
-                std::vector<ModuleBase::ComplexMatrix> hbase(nks);
-                for (int ik = 0; ik < nks; ik++)
-                {
-                    v_delta[ik].create(nlocal, nlocal);
-                    hbase[ik].create(nlocal, nlocal);
-                }
-                DeePKS_domain::collect_h_mat(*ParaV, ld->H_V_delta_k,v_delta,nlocal,nks); 
-
-                const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy";
-                for (int ik = 0; ik < nks; ik++)
-                {
-                    hbase[ik] = h_tot[ik] - v_delta[ik];
-                }
-                LCAO_deepks_io::save_npy_h(hbase, file_hbase, nlocal, nks, my_rank);
-
-                const std::string file_vdelta = PARAM.globalv.global_out_dir + "deepks_vdelta.npy";
-                LCAO_deepks_io::save_npy_h(v_delta, file_vdelta, nlocal, nks, my_rank);
-
-                if(deepks_v_delta==1)//v_delta_precalc storage method 1
-                {
-                    ld->cal_v_delta_precalc_k(nlocal,
-                            nat,
-                            nks,
-                            kvec_d,
-                            ucell,
-                            orb,
-                            GridD);
-                
-                    LCAO_deepks_io::save_npy_v_delta_precalc(
-                      nat, 
-                      nks, 
-                      nlocal,
-                      ld->des_per_atom,
-                      ld->v_delta_precalc_tensor,
-                      PARAM.globalv.global_out_dir,
-                      my_rank);
-
-                }
-                else if(deepks_v_delta==2)//v_delta_precalc storage method 2
-                {
-                    ld->prepare_psialpha_k(nlocal,
-                                nat,
-                                nks,
-                                kvec_d,
-                                ucell,
-                                orb,
-                                GridD);
-                    
-                    LCAO_deepks_io::save_npy_psialpha(nat, 
-                                nks, 
-                                nlocal,
-                                ld->inlmax,
-                                ld->lmaxd,
-                                ld->psialpha_tensor,
-                                PARAM.globalv.global_out_dir,
-                                my_rank);
-
-                    ld->prepare_gevdm(
-                                nat,
-                                orb);
-
-                    LCAO_deepks_io::save_npy_gevdm(nat,
-                      ld->inlmax,
-                      ld->lmaxd,
-                      ld->gevdm_tensor,
-                      PARAM.globalv.global_out_dir,
-                      my_rank);
-                }
-            }
-            else //deepks_scf == 0
-            {
-                const std::string file_hbase = PARAM.globalv.global_out_dir + "deepks_hbase.npy";
-                LCAO_deepks_io::save_npy_h(h_tot, file_hbase, nlocal, nks, my_rank);
-            }
-        }
-    }  // end deepks_out_labels
-
-
-    // DeePKS PDM and descriptor
-    if (PARAM.inp.deepks_out_labels || PARAM.inp.deepks_scf)
-    {
-        // this part is for integrated test of deepks
-        // so it is printed no matter even if deepks_out_labels is not used
-        // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm
-		if(!PARAM.inp.deepks_scf) 
-		{
-			ld->cal_projected_DM_k(dm, ucell, orb, GridD);
-		}
-
-        ld->check_projected_dm(); // print out the projected dm for NSCF calculaiton
-
-        ld->cal_descriptor(nat);     // final descriptor
-
-        ld->check_descriptor(ucell, PARAM.globalv.global_out_dir);
-
-        if (PARAM.inp.deepks_out_labels)
-        {
-            LCAO_deepks_io::save_npy_d(nat, 
-                                       ld->des_per_atom, 
-									   ld->inlmax, 
-									   ld->inl_l,
-									   PARAM.inp.deepks_equiv, 
-                                       ld->d_tensor, 
-                                       PARAM.globalv.global_out_dir,
-                                       GlobalV::MY_RANK); // libnpy needed
-        }
-    }
-    //
-    if (PARAM.inp.deepks_scf)
-    {
-        ld->cal_e_delta_band_k(dm->get_DMK_vector(), nks);
-
-        std::cout << "E_delta_band = " << std::setprecision(8) << ld->e_delta_band << " Ry"
-                  << " = " << std::setprecision(8) << ld->e_delta_band * ModuleBase::Ry_to_eV << " eV"
-                  << std::endl;
-
-        std::cout << "E_delta_NN= " << std::setprecision(8) << ld->E_delta << " Ry"
-                  << " = " << std::setprecision(8) << ld->E_delta * ModuleBase::Ry_to_eV << " eV" << std::endl;
-    }
-
-    ModuleBase::timer::tick("LCAO_Deepks_Interface", "out_deepks_labels");
-}
+template class LCAO_Deepks_Interface<double, double>;
+template class LCAO_Deepks_Interface<std::complex<double>, double>;
+template class LCAO_Deepks_Interface<std::complex<double>, std::complex<double>>;
 
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h
index cfb180ea19..1e657f48fe 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h
@@ -5,8 +5,10 @@
 #include "LCAO_deepks.h"
 #include "module_base/complexmatrix.h"
 #include "module_base/matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include <memory>
 
+template <typename TK, typename TR>
 class LCAO_Deepks_Interface
 {
   public:
@@ -28,7 +30,6 @@ class LCAO_Deepks_Interface
     /// @param[in] psid
     /// @param[in] dm_gamma
     /// @param[in] dm_k
-    /// @param[in] deepks_v_delta
     // for Gamma-only
     void out_deepks_labels(const double& etot,
                            const int& nks,
@@ -40,23 +41,9 @@ class LCAO_Deepks_Interface
                            const LCAO_Orbitals& orb,
                            Grid_Driver& GridD,
                            const Parallel_Orbitals* ParaV,
-                           const psi::Psi<double>& psid,
-                           const elecstate::DensityMatrix<double, double>* dm,
-                           const int& deepks_v_delta);
-  // for multi-k
-  void out_deepks_labels(const double& etot,
-                           const int& nks,
-                           const int& nat,
-                           const int& nlocal,
-                           const ModuleBase::matrix& ekb,
-                           const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-                           const UnitCell& ucell,
-                           const LCAO_Orbitals& orb,
-                           Grid_Driver& GridD,
-                           const Parallel_Orbitals* ParaV,
-                           const psi::Psi<std::complex<double>>& psi,
-                           const elecstate::DensityMatrix<std::complex<double>, double>* dm,
-                           const int& deepks_v_delta);
+                           const psi::Psi<TK>& psid,
+                           const elecstate::DensityMatrix<TK, double>* dm,
+                           hamilt::HamiltLCAO<TK, TR>* p_ham);
 
   private:
     std::shared_ptr<LCAO_Deepks> ld;
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp
index dd7d39227f..c1aa9cab0a 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp
@@ -28,28 +28,11 @@
 #include "LCAO_deepks_io.h"
 #include "npy.hpp"
 
-void LCAO_deepks_io::print_dm(const std::vector<double> &dm, 
-                              const int nlocal, 
-                              const int nrow)
-{
-    std::ofstream ofs("dm");
-    ofs << std::setprecision(15);
-
-    for (int mu=0; mu<nlocal; mu++)
-    {
-        for (int nu=0; nu<nlocal; nu++)
-        {
-            ofs << dm[mu * nrow + nu] << " ";
-        }
-        ofs << std::endl;
-    }
-}
-
-
-void LCAO_deepks_io::print_dm_k(const int nks, 
+template <typename TK>
+void LCAO_deepks_io::print_dm(const int nks, 
                                 const int nlocal,
                                 const int nrow,
-                                const std::vector<std::vector<std::complex<double>>>& dm)
+                                const std::vector<std::vector<TK>>& dm)
 {
     std::stringstream ss;
     for(int ik=0;ik<nks;ik++)
@@ -410,42 +393,8 @@ void LCAO_deepks_io::save_npy_orbital_precalc(const int nat,
     return;
 }
 
-
-//just for gamma only
-void LCAO_deepks_io::save_npy_h(const ModuleBase::matrix &hamilt,
-                                const std::string &h_file,
-                                const int nlocal,
-                                const int rank)
-{
-    ModuleBase::TITLE("LCAO_deepks_io", "save_npy_h");
-	if(rank!=0)
-	{
-		return;
-	}
-    int nks=1;
-
-    const long unsigned hshape[] = {static_cast<unsigned long>(nks),
-                                    static_cast<unsigned long>(nlocal), 
-                                    static_cast<unsigned long>(nlocal) };
-
-    std::vector<double> npy_h;
-    for(int k=0; k<nks; k++)
-    {
-        for (int i=0; i<nlocal; i++)
-        {
-            for (int j=0; j<nlocal; j++)
-            {
-                npy_h.push_back(hamilt(i,j));
-            }
-        }         
-    }
-
-    npy::SaveArrayAsNumpy(h_file, false, 3, hshape, npy_h);
-    return;    
-}
-
-// for multi-k, should be combined with gamma-only version in future
-void LCAO_deepks_io::save_npy_h(const std::vector<ModuleBase::ComplexMatrix> &hamilt,
+template <typename TK, typename TH>
+void LCAO_deepks_io::save_npy_h(const std::vector<TH> &hamilt,
                                 const std::string &h_file,
                                 const int nlocal,
                                 const int nks,
@@ -461,7 +410,7 @@ void LCAO_deepks_io::save_npy_h(const std::vector<ModuleBase::ComplexMatrix> &ha
                                     static_cast<unsigned long>(nlocal), 
                                     static_cast<unsigned long>(nlocal) };
 
-    std::vector<std::complex<double>> npy_h;
+    std::vector<TK> npy_h;
     for(int k=0; k<nks; k++)
     {
         for (int i=0; i<nlocal; i++)
@@ -477,6 +426,7 @@ void LCAO_deepks_io::save_npy_h(const std::vector<ModuleBase::ComplexMatrix> &ha
     return;    
 }
 
+template <typename TK>
 void LCAO_deepks_io::save_npy_v_delta_precalc(const int nat, 
                                               const int nks,
                                               const int nlocal, 
@@ -500,60 +450,39 @@ void LCAO_deepks_io::save_npy_v_delta_precalc(const int nat,
                                     static_cast<unsigned long>(nlocal),
                                     static_cast<unsigned long>(nat),
                                     static_cast<unsigned long>(des_per_atom)};
-    if (nks==1)
+
+    std::vector<TK> npy_v_delta_precalc;    
+    for (int iks = 0; iks < nks; ++iks)
     {
-        std::vector<double> npy_v_delta_precalc;    
-        for (int iks = 0; iks < nks; ++iks)
+        for (int mu = 0; mu < nlocal; ++mu)
         {
-            for (int mu = 0; mu < nlocal; ++mu)
+            for (int nu = 0; nu < nlocal; ++nu)
             {
-                for (int nu = 0; nu < nlocal; ++nu)
+                for (int iat = 0;iat < nat;++iat)
                 {
-                    for (int iat = 0;iat < nat;++iat)
+                    for(int p=0; p<des_per_atom; ++p)
                     {
-                        for(int p=0; p<des_per_atom; ++p)
+                        if constexpr (std::is_same<TK, double>::value)
                         {
                             npy_v_delta_precalc.push_back(v_delta_precalc_tensor.index({iks, mu, nu, iat, p }).item().toDouble());
                         }
-                    }                
-                }
-            }
-        }
-        const std::string file_vdpre = out_dir + "deepks_vdpre.npy";
-        npy::SaveArrayAsNumpy(file_vdpre, false, 5, gshape, npy_v_delta_precalc);
-        return;
-    }
-    else
-    {
-        std::vector<std::complex<double>> npy_v_delta_precalc;
-        for (int iks = 0; iks < nks; ++iks)
-        {
-            for (int mu = 0; mu < nlocal; ++mu)
-            {
-                for (int nu = 0; nu < nlocal; ++nu)
-                {
-                    for (int iat = 0;iat < nat;++iat)
-                    {
-                        for(int p=0; p<des_per_atom; ++p)
+                        else
                         {
-                            auto real_part = torch::real(v_delta_precalc_tensor.index({iks, mu, nu, iat, p})).item<double>();
-                            auto imag_part = torch::imag(v_delta_precalc_tensor.index({iks, mu, nu, iat, p})).item<double>();
-                            std::complex<double> value(real_part, imag_part);
+                            std::complex<double> value(torch::real(v_delta_precalc_tensor.index({iks, mu, nu, iat, p})).item<double>(),
+                                                        torch::imag(v_delta_precalc_tensor.index({iks, mu, nu, iat, p})).item<double>());
                             npy_v_delta_precalc.push_back(value);
                         }
-                    }                
-                }
+                    }
+                }                
             }
         }
-        const std::string file_vdpre = out_dir + "deepks_vdpre.npy";
-        npy::SaveArrayAsNumpy(file_vdpre, false, 5, gshape, npy_v_delta_precalc);
-        return;
     }
-
-    
+    const std::string file_vdpre = out_dir + "deepks_vdpre.npy";
+    npy::SaveArrayAsNumpy(file_vdpre, false, 5, gshape, npy_v_delta_precalc);
+    return;
 }
 
-
+template <typename TK>
 void LCAO_deepks_io::save_npy_psialpha(const int nat, 
                                        const int nks,
                                        const int nlocal,
@@ -578,55 +507,35 @@ void LCAO_deepks_io::save_npy_psialpha(const int nat,
                                     static_cast<unsigned long>(nks),
                                     static_cast<unsigned long>(nlocal),
                                     static_cast<unsigned long>(mmax)};
-    if(nks==1)
+    std::vector<TK> npy_psialpha;
+    for(int iat=0; iat< nat ; iat++) 
     {
-        std::vector<double> npy_psialpha;
-        for(int iat=0; iat< nat ; iat++) 
+        for(int nl = 0; nl < nlmax; nl++)
         {
-            for(int nl = 0; nl < nlmax; nl++)
+            for (int iks = 0; iks < nks ; iks++)
             {
-                for (int iks = 0; iks < nks ; iks++)
+                for(int mu = 0; mu < nlocal ; mu++)
                 {
-                    for(int mu = 0; mu < nlocal ; mu++)
+                    for(int m=0; m< mmax; m++)
                     {
-                        for(int m=0; m< mmax; m++)
+                        if constexpr (std::is_same<TK, double>::value)
                         {
                             npy_psialpha.push_back(psialpha_tensor.index({ iat,nl, iks, mu, m }).item().toDouble());
                         }
-                    }                
-                }
-            }
-        }
-        const std::string file_psialpha = out_dir + "deepks_psialpha.npy";
-        npy::SaveArrayAsNumpy(file_psialpha, false, 5, gshape, npy_psialpha);
-        return;
-    }
-    else
-    {
-        std::vector<std::complex<double>> npy_psialpha;
-        for(int iat=0; iat< nat ; iat++) 
-        {
-            for(int nl = 0; nl < nlmax; nl++)
-            {
-                for (int iks = 0; iks < nks ; iks++)
-                {
-                    for(int mu = 0; mu < nlocal ; mu++)
-                    {
-                        for(int m=0; m< mmax; m++)
+                        else
                         {
-                            std::complex<double> value(torch::real(psialpha_tensor.index({ iat,nl, iks, mu, m })).item<double>(), 
-                                                       torch::imag(psialpha_tensor.index({ iat,nl, iks, mu, m })).item<double>());
+                            std::complex<double> value(torch::real(psialpha_tensor.index({ iat, nl, iks, mu, m })).item<double>(), 
+                                                       torch::imag(psialpha_tensor.index({ iat, nl, iks, mu, m })).item<double>());
                             npy_psialpha.push_back(value);
                         }
-                    }                
-                }
+                    }
+                }                
             }
         }
-        const std::string file_psialpha = out_dir + "deepks_psialpha.npy";
-        npy::SaveArrayAsNumpy(file_psialpha, false, 5, gshape, npy_psialpha);
-        return;
     }
-
+    const std::string file_psialpha = out_dir + "deepks_psialpha.npy";
+    npy::SaveArrayAsNumpy(file_psialpha, false, 5, gshape, npy_psialpha);
+    return;
 }
 
 
@@ -676,4 +585,61 @@ void LCAO_deepks_io::save_npy_gevdm(const int nat,
     return;
 }
 
+
+template void LCAO_deepks_io::print_dm<double>(const int nks, 
+                                               const int nlocal,
+                                               const int nrow,
+                                               const std::vector<std::vector<double>>& dm);
+
+template void LCAO_deepks_io::print_dm<std::complex<double>>(const int nks, 
+                                                             const int nlocal,
+                                                             const int nrow,
+                                                             const std::vector<std::vector<std::complex<double>>>& dm);
+
+template void LCAO_deepks_io::save_npy_h<double>(const std::vector<ModuleBase::matrix> &hamilt,
+                                                 const std::string &h_file,
+                                                 const int nlocal,
+                                                 const int nks,
+                                                 const int rank);
+
+template void LCAO_deepks_io::save_npy_h<std::complex<double>>(const std::vector<ModuleBase::ComplexMatrix> &hamilt,
+                                                               const std::string &h_file,
+                                                               const int nlocal,
+                                                               const int nks,
+                                                               const int rank);
+
+template void LCAO_deepks_io::save_npy_v_delta_precalc<double>(const int nat, 
+                                                               const int nks,
+                                                               const int nlocal, 
+                                                               const int des_per_atom,
+                                                               const torch::Tensor& v_delta_precalc_tensor,
+                                                               const std::string& out_dir,
+                                                               const int rank);
+
+template void LCAO_deepks_io::save_npy_v_delta_precalc<std::complex<double>>(const int nat, 
+                                                                             const int nks,
+                                                                             const int nlocal, 
+                                                                             const int des_per_atom,
+                                                                             const torch::Tensor& v_delta_precalc_tensor,
+                                                                             const std::string& out_dir,
+                                                                             const int rank);
+
+template void LCAO_deepks_io::save_npy_psialpha<double>(const int nat,
+                                                        const int nks,
+                                                        const int nlocal,
+                                                        const int inlmax,
+                                                        const int lmaxd,
+                                                        const torch::Tensor &psialpha_tensor,
+                                                        const std::string& out_dir,
+                                                        const int rank);
+
+template void LCAO_deepks_io::save_npy_psialpha<std::complex<double>>(const int nat,
+                                                                      const int nks,
+                                                                      const int nlocal,
+                                                                      const int inlmax,
+                                                                      const int lmaxd,
+                                                                      const torch::Tensor &psialpha_tensor,
+                                                                      const std::string& out_dir,
+                                                                      const int rank);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h
index 8f88fa20d4..34398a21d7 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.h
@@ -20,33 +20,31 @@ namespace LCAO_deepks_io
     /// It also contains subroutines for printing density matrices
     /// which is used in unit tests
 
-    /// There are 2 subroutines for printing density matrices:
-    /// 1. print_dm : for gamma only
-    /// 2. print_dm_k : for multi-k
+    /// There are 2 subroutines for printing and loading .npy file:
+    /// 1. print_dm : print density matrices
+    /// 2. load_npy_gedm : load gedm from .npy file
 
     /// others print quantities in .npy format
 
-    /// 3. save_npy_d : descriptor ->dm_eig.npy
-    /// 4. save_npy_gvx : gvx ->grad_vx.npy
-    /// 5. save_npy_e : energy
-    /// 6. save_npy_f : force
+    /// 3. save_npy_d : descriptor -> deepks_dm_eig.npy
+    /// 4. save_npy_e : energy
+    /// 5. save_npy_f : force 
+    /// 6. save_npy_gvx : gvx -> deepks_gradvx.npy
     /// 7. save_npy_s : stress
-    /// 8. save_npy_o: orbital
-    /// 9. save_npy_orbital_precalc: orbital_precalc -> orbital_precalc.npy
-    /// 10. save_npy_h : Hamiltonian
-    /// 11. save_npy_v_delta_precalc : v_delta_precalc
-    /// 12. save_npy_psialpha : psialpha
-    /// 13. save_npy_gevdm : grav_evdm , can use psialpha and gevdm to calculate v_delta_precalc
+    /// 8. save_npy_gvepsl : gvepsl -> deepks_gvepsl.npy
+    /// 9. save_npy_o: orbital
+    /// 10. save_npy_orbital_precalc: orbital_precalc -> deepks_orbpre.npy
+    /// 11. save_npy_h : Hamiltonian
+    /// 12. save_npy_v_delta_precalc : v_delta_precalc -> deepks_vdpre.npy
+    /// 13. save_npy_psialpha : psialpha -> deepks_psialpha.npy
+    /// 14. save_npy_gevdm : grav_evdm -> deepks_gevdm.npy, can use psialpha and gevdm to calculate v_delta_precalc
 
 /// print density matrices
-void print_dm(const std::vector<double> &dm,
-		const int nlocal,
-		const int nrow);
-
-void print_dm_k(const int nks,
+template <typename TK>
+void print_dm(const int nks,
 		const int nlocal,
 		const int nrow,
-		const std::vector<std::vector<std::complex<double>>>& dm);
+		const std::vector<std::vector<TK>>& dm);
 
 void load_npy_gedm(const int nat,
 		const int des_per_atom,
@@ -54,18 +52,7 @@ void load_npy_gedm(const int nat,
 		double& e_delta,
 		const int rank);
 
-    ///----------------------------------------------------------------------
-    /// The following 4 functions save the `[dm_eig], [e_base], [f_base], [grad_vx]`
-    /// of current configuration as `.npy` file, when `deepks_scf = 1`.
-    /// After a full group of consfigurations are calculated,
-    /// we need a python script to `load` and `torch.cat` these `.npy` files,
-    /// and get `l_e_delta,npy` and `l_f_delta.npy` corresponding to the exact E, F data.
-    ///
-    /// Unit of energy: Ry
-    ///
-    /// Unit of force: Ry/Bohr
-    ///----------------------------------------------------------------------
-
+/// save descriptor
 void save_npy_d(const int nat,
 		const int des_per_atom,
 		const int inlmax,
@@ -75,33 +62,36 @@ void save_npy_d(const int nat,
 		const std::string& out_dir,
 		const int rank);
 
-void save_npy_gvx(const int nat,
-		const int des_per_atom,
-		const torch::Tensor &gvx_tensor,
-        const std::string& out_dir,
-		const int rank);
-
-void save_npy_gvepsl(const int nat,
-		const int des_per_atom,
-		const torch::Tensor &gvepsl_tensor,
-		const std::string& out_dir,
-		const int rank);
-
+// save energy
 void save_npy_e(const double &e,  /**<[in] \f$E_{base}\f$ or \f$E_{tot}\f$, in Ry*/
 		const std::string &e_file,
 		const int rank);
 
+// save force and gvx
 void save_npy_f(const ModuleBase::matrix &f, /**<[in] \f$F_{base}\f$ or \f$F_{tot}\f$, in Ry/Bohr*/
 		const std::string &f_file,
 		const int nat,
 		const int rank);
 
+void save_npy_gvx(const int nat,
+		const int des_per_atom,
+		const torch::Tensor &gvx_tensor,
+        const std::string& out_dir,
+		const int rank);
+
+// save stress and gvepsl
 void save_npy_s(const ModuleBase::matrix &stress, /**<[in] \f$S_{base}\f$ or \f$S_{tot}\f$, in Ry/Bohr^3*/
 		const std::string &s_file,
 		const double &omega,
 		const int rank);
 
-/// QO added on 2021-12-15
+void save_npy_gvepsl(const int nat,
+		const int des_per_atom,
+		const torch::Tensor &gvepsl_tensor,
+		const std::string& out_dir,
+		const int rank);
+
+/// save orbital and orbital_precalc
 void save_npy_o(const ModuleBase::matrix &bandgap, /**<[in] \f$E_{base}\f$ or \f$E_{tot}\f$, in Ry*/
 		const std::string &o_file,
 		const int nks,
@@ -114,20 +104,15 @@ void save_npy_orbital_precalc(const int nat,
         const std::string& out_dir,
 		const int rank);
 
-/// xinyuan added on 2023-2-20
-/// for gamma only
-void save_npy_h(const ModuleBase::matrix &hamilt,
+// save Hamiltonian and v_delta_precalc(for deepks_v_delta==1)/psialpha+gevdm(for deepks_v_delta==2)
+template <typename TK, typename TH>
+void save_npy_h(const std::vector<TH> &hamilt,
 		const std::string &h_file,
 		const int nlocal,
-		const int rank);
-
-/// for multi-k
-void save_npy_h(const std::vector<ModuleBase::ComplexMatrix> &hamilt,
-        const std::string &h_file,
-        const int nlocal,
         const int nks,
-        const int rank);
+		const int rank);
 
+template <typename TK>
 void save_npy_v_delta_precalc(const int nat,
 		const int nks,
 		const int nlocal,
@@ -136,6 +121,7 @@ void save_npy_v_delta_precalc(const int nat,
 		const std::string& out_dir,
 		const int rank);
 
+template <typename TK>
 void save_npy_psialpha(const int nat,
 		const int nks,
 		const int nlocal,
@@ -145,6 +131,7 @@ void save_npy_psialpha(const int nat,
 		const std::string& out_dir,
 		const int rank);
 
+// Always real, no need for template now
 void save_npy_gevdm(const int nat,
 		const int inlmax,
 		const int lmaxd,
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
index 7f9885d76f..eae3c12220 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
@@ -13,7 +13,7 @@
 #include "LCAO_deepks.h"
 #include "module_base/parallel_reduce.h"
 
-void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::matrix>>& dm_hl)
+void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::matrix>>& dm_hl, const int nks)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_o_delta");
     this->o_delta.zero_out();
@@ -31,7 +31,7 @@ void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::matrix>>
                     const int index = nu * pv->nrow + mu;
                     for (int is = 0; is < PARAM.inp.nspin; ++is)
                     {
-                        this->o_delta(0,hl) += dm_hl[hl][is](nu, mu) * this->H_V_delta[index];
+                        this->o_delta(0,hl) += dm_hl[hl][is](nu, mu) * this->H_V_delta[0][index];
                     }
                 }
             }
@@ -44,7 +44,7 @@ void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::matrix>>
 
 //calculating the correction of (LUMO-HOMO) energies, i.e., band gap corrections
 //for multi_k calculations
-void LCAO_Deepks::cal_o_delta_k(const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl,
+void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl,
     const int nks)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_o_delta_k");
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
index 2925dfc69a..29c1fd813e 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
@@ -14,8 +14,7 @@
 //3. check_projected_dm, which prints pdm to descriptor.dat
 
 //4. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point
-//5. cal_gdmx_k, counterpart of 3, for multi-k
-//6. check_gdmx, which prints gdmx to a series of .dat files
+//5. check_gdmx, which prints gdmx to a series of .dat files
 
 #ifdef __DEEPKS
 
@@ -317,7 +316,7 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
     return;
 }
 
-void LCAO_Deepks::cal_projected_DM_k(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
+void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
     const UnitCell &ucell,
     const LCAO_Orbitals &orb,
     Grid_Driver& GridD)
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp
index 97aef62322..5ec6a7acb4 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp
@@ -153,8 +153,11 @@ void LCAO_Deepks::load_model(const std::string& deepks_model) {
 }
 
 // prepare_psialpha and prepare_gevdm for deepks_v_delta = 2
+template <typename TK>
 void LCAO_Deepks::prepare_psialpha(const int nlocal,
     const int nat,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
     const UnitCell &ucell,
     const LCAO_Orbitals &orb,
     Grid_Driver &GridD)
@@ -162,109 +165,15 @@ void LCAO_Deepks::prepare_psialpha(const int nlocal,
     ModuleBase::TITLE("LCAO_Deepks", "prepare_psialpha");
     int nlmax = this->inlmax/nat;
     int mmax = 2*this->lmaxd+1;
-    this->psialpha_tensor = torch::zeros({ nat, nlmax, 1, nlocal, mmax }, torch::TensorOptions().dtype(torch::kFloat64)); // support gamma-only
-
-    //cutoff for alpha is same for all types of atoms
-    const double Rcut_Alpha = orb.Alpha[0].getRcut();
-
-    for (int T0 = 0; T0 < ucell.ntype; T0++)
+    if constexpr (std::is_same<TK, double>::value)
     {
-		Atom* atom0 = &ucell.atoms[T0]; 
-        for (int I0 =0; I0< atom0->na; I0++)
-        {
-            //iat: atom index on which |alpha> is located
-            const int iat = ucell.itia2iat(T0,I0);
-			const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0);
-
-            //outermost loop : find all adjacent atoms
-            for (int ad=0; ad<GridD.getAdjacentNum()+1 ; ++ad)
-            {
-                const int T1 = GridD.getType(ad);
-                const int I1 = GridD.getNatom(ad);
-                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-				const double Rcut_AO1 = orb.Phi[T1].getRcut();
-
-                const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad);
-				const Atom* atom1 = &ucell.atoms[T1];
-				const int nw1_tot = atom1->nw*PARAM.globalv.npol;
-
-				const double dist1 = (tau1-tau0).norm() * ucell.lat0;
-
-				if (dist1 > Rcut_Alpha + Rcut_AO1)
-				{
-					continue;
-				}
-
-                //middle loop : all atomic basis on the adjacent atom ad
-				for (int iw1=0; iw1<nw1_tot; ++iw1)
-				{
-					const int iw1_all = start1 + iw1;
-					const int iw1_local = pv->global2local_row(iw1_all);
-					const int iw2_local = pv->global2local_col(iw1_all);
-					if(iw1_local < 0 || iw2_local < 0) {continue;
-}
-					const int iw1_0 = iw1/PARAM.globalv.npol;
-					std::vector<double> nlm = this->nlm_save[iat][ad][iw1][0];
-                    
-                    int ib=0;
-                    int nl=0;
-                    for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
-                    {
-                        for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
-                        {
-                            const int nm = 2*L0+1;
-                            
-                            for (int m1=0; m1<nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
-                            {
-                                this->psialpha_tensor[iat][nl][0][iw1_all][m1] = nlm[ib+m1];
-                            }
-                            ib+=nm;
-                            nl++;
-                        }
-                    }  
-				}//end iw
-			}//end ad
-		}//end I0
-	}//end T0
-
-#ifdef __MPI
-    double msg[mmax];
-    for(int iat=0; iat< nat ; iat++)
+        this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kFloat64); // support gamma-only
+    }
+    else
     {
-        for(int nl = 0; nl < nlmax; nl++)
-        {
-            for(int mu = 0; mu < nlocal ; mu++)
-            {
-                for(int m=0;m<mmax;m++) 
-				{
-					msg[m] = this->psialpha_tensor[iat][nl][0][mu][m].item().toDouble();
-				}
-				Parallel_Reduce::reduce_all(msg,mmax);
-				for(int m=0;m<mmax;m++) 
-				{ 
-					this->psialpha_tensor[iat][nl][0][mu][m] = msg[m];
-				}
-			}
-        }
+        this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kComplexDouble); // support multi-k
     }
 
-#endif  
-}
-
-void LCAO_Deepks::prepare_psialpha_k(const int nlocal,
-    const int nat,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver &GridD)
-{
-    ModuleBase::TITLE("LCAO_Deepks", "prepare_psialpha");
-    int nlmax = this->inlmax/nat;
-    int mmax = 2*this->lmaxd+1;
-    this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kComplexDouble); // support multi-k
-
     //cutoff for alpha is same for all types of atoms
     const double Rcut_Alpha = orb.Alpha[0].getRcut();
 
@@ -304,10 +213,13 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal,
 
                 key_tuple key(ibt, dR.x, dR.y, dR.z);
 
-                if (this->nlm_save_k[iat].find(key)
-                    == this->nlm_save_k[iat].end()) 
+                if constexpr (std::is_same<TK, std::complex<double>>::value)
                 {
-                    continue;
+                    if (this->nlm_save_k[iat].find(key)
+                        == this->nlm_save_k[iat].end()) 
+                    {
+                        continue;
+                    }
                 }
 
                 //middle loop : all atomic basis on the adjacent atom ad
@@ -318,14 +230,26 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal,
 					const int iw2_local = pv->global2local_col(iw1_all);
 					if(iw1_local < 0 || iw2_local < 0) {continue;}
 					const int iw1_0 = iw1/PARAM.globalv.npol;
-					std::vector<double> nlm = this->nlm_save_k[iat][key][iw1][0];
-                    
+					std::vector<double> nlm;
+                    if constexpr (std::is_same<TK, double>::value)
+                    {
+                        nlm = this->nlm_save[iat][ad][iw1][0];
+                    }
+                    else
+                    {
+                        nlm = this->nlm_save_k[iat][key][iw1][0];
+                    }
+
                     for (int ik = 0; ik <nks; ik++)
                     {
-                        const double arg = kvec_d[ik] * dR * ModuleBase::TWO_PI;
-                        double sinp, cosp;
-                        ModuleBase::libm::sincos(arg, &sinp, &cosp);
-                        const std::complex<double> kphase = std::complex<double>(cosp, sinp);
+                        std::complex<double> kphase = std::complex<double>(1.0, 0.0);
+                        if constexpr (std::is_same<TK, std::complex<double>>::value)
+                        {
+                            const double arg = - (kvec_d[ik] * dR) * ModuleBase::TWO_PI;
+                            double sinp, cosp;
+                            ModuleBase::libm::sincos(arg, &sinp, &cosp);
+                            kphase = std::complex<double>(cosp, sinp);
+                        }
                         int ib=0;
                         int nl=0;
                         for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
@@ -335,11 +259,18 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal,
                                 const int nm = 2*L0+1;
                                 
                                 for (int m1=0; m1<nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
-                                {
-                                    std::complex<double> nlm_phase = nlm[ib + m1] * kphase;
-                                    torch::Tensor nlm_tensor = torch::tensor({nlm_phase.real(), nlm_phase.imag()}, torch::kDouble);
-                                    torch::Tensor complex_tensor = torch::complex(nlm_tensor[0], nlm_tensor[1]);
-                                    this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = complex_tensor;
+                                {   
+                                    if constexpr (std::is_same<TK, double>::value)
+                                    {
+                                        this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = nlm[ib+m1];
+                                    }
+                                    else
+                                    {
+                                        std::complex<double> nlm_phase = nlm[ib + m1] * kphase;
+                                        torch::Tensor nlm_tensor = torch::tensor({nlm_phase.real(), nlm_phase.imag()}, torch::kDouble);
+                                        torch::Tensor complex_tensor = torch::complex(nlm_tensor[0], nlm_tensor[1]);
+                                        this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = complex_tensor;
+                                    }
                                 }
                                 ib+=nm;
                                 nl++;
@@ -352,7 +283,7 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal,
 	}//end T0
 
 #ifdef __MPI
-    std::complex<double> msg[mmax];
+    TK msg[mmax];
     for(int iat=0; iat< nat ; iat++)
     {
         for(int nl = 0; nl < nlmax; nl++)
@@ -363,15 +294,29 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal,
                 {
                     for(int m=0;m<mmax;m++) 
                     {
-                        auto tensor_value = this->psialpha_tensor.index({iat, nl, ik, mu, m});
-                        msg[m] = std::complex<double>(torch::real(tensor_value).item<double>(), torch::imag(tensor_value).item<double>());
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            msg[m] = this->psialpha_tensor[iat][nl][ik][mu][m].item().toDouble();
+                        }
+                        else
+                        {
+                            auto tensor_value = this->psialpha_tensor.index({iat, nl, ik, mu, m});
+                            msg[m] = std::complex<double>(torch::real(tensor_value).item<double>(), torch::imag(tensor_value).item<double>());
+                        }
                     }
                     Parallel_Reduce::reduce_all(msg,mmax);
                     for(int m=0;m<mmax;m++) 
                     {
-                        torch::Tensor msg_tensor = torch::tensor({msg[m].real(), msg[m].imag()}, torch::kDouble);
-                        torch::Tensor complex_tensor = torch::complex(msg_tensor[0], msg_tensor[1]);
-                        this->psialpha_tensor[iat][nl][ik][mu][m] = complex_tensor;
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            this->psialpha_tensor[iat][nl][ik][mu][m] = msg[m];
+                        }
+                        else
+                        {
+                            torch::Tensor msg_tensor = torch::tensor({msg[m].real(), msg[m].imag()}, torch::kDouble);
+                            torch::Tensor complex_tensor = torch::complex(msg_tensor[0], msg_tensor[1]);
+                            this->psialpha_tensor[iat][nl][ik][mu][m] = complex_tensor;
+                        }
                     }
                 }
             }
@@ -381,6 +326,7 @@ void LCAO_Deepks::prepare_psialpha_k(const int nlocal,
 #endif  
 }
 
+template <typename TK>
 void LCAO_Deepks::check_vdp_psialpha(const int nat, const int nks, const int nlocal)
 {
     std::ofstream ofs("vdp_psialpha.dat");
@@ -398,7 +344,15 @@ void LCAO_Deepks::check_vdp_psialpha(const int nat, const int nks, const int nlo
                 {
                     for(int m=0; m< mmax; m++)
                     {
-                        ofs << this->psialpha_tensor.index({ iat,nl, iks, mu, m }).item().toDouble() << " ";
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            ofs << this->psialpha_tensor.index({ iat, nl, iks, mu, m }).item().toDouble() << " ";
+                        }
+                        else
+                        {
+                            auto tensor_value = this->psialpha_tensor.index({iat, nl, iks, mu, m});
+                            ofs << std::complex<double>(torch::real(tensor_value).item<double>(), torch::imag(tensor_value).item<double>()) << " ";
+                        }
                     }
                 }                
             }
@@ -470,4 +424,23 @@ void LCAO_Deepks::check_vdp_gevdm(const int nat)
     ofs.close();
 }
 
+template void LCAO_Deepks::prepare_psialpha<double>(const int nlocal,
+                                                    const int nat,
+                                                    const int nks,
+                                                    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
+                                                    const UnitCell &ucell,
+                                                    const LCAO_Orbitals &orb,
+                                                    Grid_Driver &GridD);
+
+template void LCAO_Deepks::prepare_psialpha<std::complex<double>>(const int nlocal,
+                                                                  const int nat,
+                                                                  const int nks,
+                                                                  const std::vector<ModuleBase::Vector3<double>> &kvec_d,
+                                                                  const UnitCell &ucell,
+                                                                  const LCAO_Orbitals &orb,
+                                                                  Grid_Driver &GridD);
+
+template void LCAO_Deepks::check_vdp_psialpha<double>(const int nat, const int nks, const int nlocal);
+template void LCAO_Deepks::check_vdp_psialpha<std::complex<double>>(const int nat, const int nks, const int nlocal);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp
index 459ff5c10d..9a1f151f70 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp
@@ -18,7 +18,7 @@
 
 //calculating sum of correction band energies
 //for gamma_only calculations
-void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<double>>& dm)
+void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<double>>& dm, const int /*nks*/)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_e_delta_band");
     this->e_delta_band = 0;
@@ -35,7 +35,7 @@ void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<double>>& dm)
                 for (int is = 0; is < dm.size(); ++is)  //dm.size() == PARAM.inp.nspin
                 {
                     //this->e_delta_band += dm[is](nu, mu) * this->H_V_delta[index];
-					this->e_delta_band += dm[is][nu*this->pv->nrow+mu] * this->H_V_delta[index];
+					this->e_delta_band += dm[is][nu*this->pv->nrow+mu] * this->H_V_delta[0][index];
                 }
             }
         }
@@ -48,7 +48,7 @@ void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<double>>& dm)
 
 //calculating sum of correction band energies
 //for multi_k calculations
-void LCAO_Deepks::cal_e_delta_band_k(const std::vector<std::vector<std::complex<double>>>& dm,
+void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm,
     const int nks)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_e_delta_band");
diff --git a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
index 6950e030ea..e28af0685c 100644
--- a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
+++ b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
@@ -14,10 +14,13 @@
 /// be calculated: 
 /// gdm_epsl = d/d\epsilon_{ab} * 
 ///           sum_{mu,nu} rho_{mu,nu} <chi_mu|alpha_m><alpha_m'|chi_nu>
-void LCAO_Deepks::cal_gdmx(const std::vector<double>& dm,
+template <typename TK>
+void LCAO_Deepks::cal_gdmx(const std::vector<std::vector<TK>>& dm,
     const UnitCell &ucell,
     const LCAO_Orbitals &orb,
     Grid_Driver& GridD,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
     const bool isstress)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_gdmx");
@@ -70,6 +73,8 @@ void LCAO_Deepks::cal_gdmx(const std::vector<double>& dm,
 				const int nw1_tot = atom1->nw*PARAM.globalv.npol;
 				const double Rcut_AO1 = orb.Phi[T1].getRcut(); 
 
+                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); 
+
 				for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++)
 				{
 					const int T2 = GridD.getType(ad2);
@@ -79,6 +84,7 @@ void LCAO_Deepks::cal_gdmx(const std::vector<double>& dm,
 					const ModuleBase::Vector3<double> tau2 = GridD.getAdjacentTau(ad2);
 					const Atom* atom2 = &ucell.atoms[T2];
 					const int nw2_tot = atom2->nw*PARAM.globalv.npol;
+					ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z);
 					
 					const double Rcut_AO2 = orb.Phi[T2].getRcut();
                 	const double dist1 = (tau1-tau0).norm() * ucell.lat0;
@@ -104,25 +110,68 @@ void LCAO_Deepks::cal_gdmx(const std::vector<double>& dm,
                     auto row_indexes = pv->get_indexes_row(ibt1);
                     auto col_indexes = pv->get_indexes_col(ibt2);
                     if(row_indexes.size() * col_indexes.size() == 0) continue;
-
-                    hamilt::AtomPair<double> dm_pair(ibt1, ibt2, 0, 0, 0, pv);
-                    dm_pair.allocate(nullptr, 1);
-                    if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver))
+                    
+                    double* dm_current;
+                    int dRx, dRy, dRz;
+                    if constexpr (std::is_same<TK, double>::value)
                     {
-                        dm_pair.add_from_matrix(dm.data(), pv->get_row_size(), 1.0, 1);
+                        dRx = 0;
+                        dRy = 0;
+                        dRz = 0;
                     }
                     else
                     {
-                        dm_pair.add_from_matrix(dm.data(), pv->get_col_size(), 1.0, 0);
+                        dRx = (dR2-dR1).x;
+                        dRy = (dR2-dR1).y;
+                        dRz = (dR2-dR1).z;
                     }
-                    const double* dm_current = dm_pair.get_pointer();
+                    hamilt::AtomPair<double> dm_pair(ibt1, ibt2, dRx, dRy, dRz, pv);
+                    dm_pair.allocate(nullptr, 1);
+                    for(int ik=0;ik<nks;ik++)
+                    {
+                        TK kphase;
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            kphase = 1.0;
+                        }
+                        else
+                        {
+                            const double arg = - (kvec_d[ik] * (dR2-dR1) ) * ModuleBase::TWO_PI;
+                            double sinp, cosp;
+                            ModuleBase::libm::sincos(arg, &sinp, &cosp);
+                            kphase = TK(cosp, sinp);
+                        }
+                        if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver))
+                        {
+                            dm_pair.add_from_matrix(dm[ik].data(), pv->get_row_size(), kphase, 1);
+                        }
+                        else
+                        {
+                            dm_pair.add_from_matrix(dm[ik].data(), pv->get_col_size(), kphase, 0);
+                        }
+                    }
+                    
+                    dm_current = dm_pair.get_pointer();
 
+                    key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z);
+                    key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z);
 					for (int iw1=0; iw1<row_indexes.size(); ++iw1)
                     {
                         for (int iw2=0; iw2<col_indexes.size(); ++iw2)
                         {
-                            std::vector<double> nlm1 = this->nlm_save[iat][ad1][row_indexes[iw1]][0];
-                            std::vector<std::vector<double>> nlm2 = this->nlm_save[iat][ad2][col_indexes[iw2]];
+                            std::vector<double> nlm1;
+                            std::vector<std::vector<double>> nlm2;
+
+                            if constexpr (std::is_same<TK, double>::value)
+                            {
+                                nlm1 = this->nlm_save[iat][ad1][row_indexes[iw1]][0];
+                                nlm2 = this->nlm_save[iat][ad2][col_indexes[iw2]];
+                            }
+                            else
+                            {
+                                nlm1 = this->nlm_save_k[iat][key_1][row_indexes[iw1]][0];
+                                nlm2 = this->nlm_save_k[iat][key_2][col_indexes[iw2]];
+                            }
 
                             assert(nlm1.size()==nlm2[0].size());
 
@@ -178,8 +227,16 @@ void LCAO_Deepks::cal_gdmx(const std::vector<double>& dm,
                             assert(ib==nlm1.size());
                             if  (isstress)
                             {
-                                nlm1 = this->nlm_save[iat][ad2][col_indexes[iw2]][0];
-                                nlm2 = this->nlm_save[iat][ad1][row_indexes[iw1]];
+                                if constexpr (std::is_same<TK, double>::value)
+                                {
+                                    nlm1 = this->nlm_save[iat][ad2][col_indexes[iw2]][0];
+                                    nlm2 = this->nlm_save[iat][ad1][row_indexes[iw1]];
+                                }
+                                else
+                                {
+                                    nlm1 = this->nlm_save_k[iat][key_2][col_indexes[iw2]][0];
+                                    nlm2 = this->nlm_save_k[iat][key_1][row_indexes[iw1]];
+                                }
 
                                 assert(nlm1.size()==nlm2[0].size());  
                                 int ib=0;
@@ -278,4 +335,20 @@ void LCAO_Deepks::check_gdmx(const int nat)
     }
 }
 
+template void LCAO_Deepks::cal_gdmx<double>(const std::vector<std::vector<double>>& dm,
+                                            const UnitCell &ucell,
+                                            const LCAO_Orbitals &orb,
+                                            Grid_Driver& GridD,
+                                            const int nks,
+                                            const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                            const bool isstress);
+
+template void LCAO_Deepks::cal_gdmx<std::complex<double>>(const std::vector<std::vector<std::complex<double>>>& dm,
+                                                          const UnitCell &ucell,
+                                                          const LCAO_Orbitals &orb,
+                                                          Grid_Driver& GridD,
+                                                          const int nks,
+                                                          const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                                          const bool isstress);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp b/source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp
deleted file mode 100644
index 62a3a25c26..0000000000
--- a/source/module_hamilt_lcao/module_deepks/cal_gdmx_k.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-#ifdef __DEEPKS
-
-#include "module_parameter/parameter.h"
-#include "LCAO_deepks.h"
-#include "module_base/vector3.h"
-#include "module_base/timer.h"
-#include "module_base/constants.h"
-#include "module_hamilt_lcao/module_hcontainer/atom_pair.h"
-#include "module_base/libm/libm.h"
-
-void LCAO_Deepks::cal_gdmx_k(const std::vector<std::vector<std::complex<double>>>& dm,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver& GridD,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-    const bool isstress)
-{
-    ModuleBase::TITLE("LCAO_Deepks", "cal_gdmx_k");
-    ModuleBase::timer::tick("LCAO_Deepks","cal_gdmx_k");
-
-    const int size = (2 * lmaxd + 1) * (2 * lmaxd + 1);
-
-    for (int iat = 0;iat < ucell.nat;iat++)
-    {
-        for (int inl = 0;inl < inlmax;inl++)
-        {
-            ModuleBase::GlobalFunc::ZEROS(gdmx[iat][inl], size);
-            ModuleBase::GlobalFunc::ZEROS(gdmy[iat][inl], size);
-            ModuleBase::GlobalFunc::ZEROS(gdmz[iat][inl], size);
-        }
-    }
-
-    if (isstress)
-    {
-        for (int ipol = 0;ipol < 6;ipol++)
-        {
-            for (int inl = 0;inl < inlmax;inl++)
-            {
-                ModuleBase::GlobalFunc::ZEROS(gdm_epsl[ipol][inl], size);
-            }
-        }
-    }
-
-    const double Rcut_Alpha = orb.Alpha[0].getRcut();
-    int nrow = this->pv->nrow;
-    for (int T0 = 0; T0 < ucell.ntype; T0++)
-    {
-		Atom* atom0 = &ucell.atoms[T0]; 
-        for (int I0 =0; I0< atom0->na; I0++)
-        {
-            const int iat = ucell.itia2iat(T0,I0);//on which alpha is located
-            const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0);
-
-            for (int ad1=0; ad1<GridD.getAdjacentNum()+1 ; ++ad1)
-            {
-                const int T1 = GridD.getType(ad1);
-                const int I1 = GridD.getNatom(ad1);
-                const int ibt1 = ucell.itia2iat(T1,I1); //on which chi_mu is located
-                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-                
-                const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad1);
-				const Atom* atom1 = &ucell.atoms[T1];
-				const int nw1_tot = atom1->nw*PARAM.globalv.npol;
-				const double Rcut_AO1 = orb.Phi[T1].getRcut();
-
-                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); 
-
-				for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++)
-				{
-					const int T2 = GridD.getType(ad2);
-					const int I2 = GridD.getNatom(ad2);
-					const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
-                    const int ibt2 = ucell.itia2iat(T2,I2);
-					const ModuleBase::Vector3<double> tau2 = GridD.getAdjacentTau(ad2);
-					const Atom* atom2 = &ucell.atoms[T2];
-					const int nw2_tot = atom2->nw*PARAM.globalv.npol;
-                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z);
-					
-					const double Rcut_AO2 = orb.Phi[T2].getRcut();
-                	const double dist1 = (tau1-tau0).norm() * ucell.lat0;
-                	const double dist2 = (tau2-tau0).norm() * ucell.lat0;
-
-					if (dist1 > Rcut_Alpha + Rcut_AO1
-							|| dist2 > Rcut_Alpha + Rcut_AO2)
-					{
-						continue;
-					}
-
-                    double r0[3];
-                    double r1[3];
-                    if(isstress)
-                    {
-                        r1[0] = ( tau1.x - tau0.x) ;
-                        r1[1] = ( tau1.y - tau0.y) ;
-                        r1[2] = ( tau1.z - tau0.z) ;
-                        r0[0] = ( tau2.x - tau0.x) ;
-                        r0[1] = ( tau2.y - tau0.y) ;
-                        r0[2] = ( tau2.z - tau0.z) ;
-                    }
-
-                    auto row_indexes = pv->get_indexes_row(ibt1);
-                    auto col_indexes = pv->get_indexes_col(ibt2);
-                    if(row_indexes.size() * col_indexes.size() == 0) continue;
-
-                    hamilt::AtomPair<double> dm_pair(ibt1, ibt2, (dR2-dR1).x, (dR2-dR1).y, (dR2-dR1).z, pv);
-                    dm_pair.allocate(nullptr, 1);
-                    for(int ik=0;ik<nks;ik++)
-                    {
-                        const double arg = - (kvec_d[ik] * (dR2-dR1) ) * ModuleBase::TWO_PI;
-                        double sinp, cosp;
-                        ModuleBase::libm::sincos(arg, &sinp, &cosp);
-                        const std::complex<double> kphase = std::complex<double>(cosp, sinp);
-                        if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver))
-                        {
-                            dm_pair.add_from_matrix(dm[ik].data(), pv->get_row_size(), kphase, 1);
-                        }
-                        else
-                        {
-                            dm_pair.add_from_matrix(dm[ik].data(), pv->get_col_size(), kphase, 0);
-                        }
-                    }
-                    const double* dm_current = dm_pair.get_pointer();
-
-                    key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z);
-                    key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z);
-					for (int iw1l = 0; iw1l < row_indexes.size(); ++iw1l)
-                    {
-                        for (int iw2l = 0; iw2l < col_indexes.size(); ++iw2l)
-                        {
-                            std::vector<double> nlm1 = this->nlm_save_k[iat][key_1][row_indexes[iw1l]][0];
-                            std::vector<std::vector<double>> nlm2 = this->nlm_save_k[iat][key_2][col_indexes[iw2l]];
-
-                            assert(nlm1.size()==nlm2[0].size());
-
-                            int ib=0;
-                            for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
-                            {
-                                for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
-                                {
-                                    const int inl = this->inl_index[T0](I0, L0, N0);
-                                    const int nm = 2*L0+1;
-                                    for (int m1 = 0;m1 < 2 * L0 + 1;++m1)
-                                    {
-                                        for (int m2 = 0; m2 < 2 * L0 + 1; ++m2)
-                                        {
-                                            //(<d/dX chi_mu|alpha_m>)<chi_nu|alpha_m'>
-                                            gdmx[iat][inl][m1*nm+m2] += nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current;
-                                            gdmy[iat][inl][m1*nm+m2] += nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current;
-                                            gdmz[iat][inl][m1*nm+m2] += nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current;
-
-                                            //(<d/dX chi_nu|alpha_m'>)<chi_mu|alpha_m>
-                                            gdmx[iat][inl][m2*nm+m1] += nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current;
-                                            gdmy[iat][inl][m2*nm+m1] += nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current;
-                                            gdmz[iat][inl][m2*nm+m1] += nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current;                                            
-
-                                            //(<chi_mu|d/dX alpha_m>)<chi_nu|alpha_m'> = -(<d/dX chi_mu|alpha_m>)<chi_nu|alpha_m'>
-                                            gdmx[ibt2][inl][m1*nm+m2] -= nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current;         
-                                            gdmy[ibt2][inl][m1*nm+m2] -= nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current;         
-                                            gdmz[ibt2][inl][m1*nm+m2] -= nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current;
-
-                                            //(<chi_nu|d/dX alpha_m'>)<chi_mu|alpha_m> = -(<d/dX chi_nu|alpha_m'>)<chi_mu|alpha_m>
-                                            gdmx[ibt2][inl][m2*nm+m1] -= nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; 
-                                            gdmy[ibt2][inl][m2*nm+m1] -= nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current;         
-                                            gdmz[ibt2][inl][m2*nm+m1] -= nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; 
-
-                                            if (isstress)
-                                            {
-                                                int mm = 0;
-                                                for(int ipol=0;ipol<3;ipol++)
-                                                {
-                                                    for(int jpol=ipol;jpol<3;jpol++)
-                                                    {
-                                                        gdm_epsl[mm][inl][m2*nm+m1] += ucell.lat0 * 
-                                                        *dm_current * (nlm2[jpol+1][ib+m2] * nlm1[ib+m1] * r0[ipol]);
-                                                        mm++;
-                                                    }
-                                                }
-                                            }
-
-                                        }
-                                    }
-                                    ib+=nm;
-                                }
-                            }
-                            assert(ib==nlm1.size());
-
-                            if  (isstress)
-                            {
-                                nlm1 = this->nlm_save_k[iat][key_2][col_indexes[iw2l]][0];
-                                nlm2 = this->nlm_save_k[iat][key_1][row_indexes[iw1l]];
-
-                                assert(nlm1.size()==nlm2[0].size());  
-                                int ib=0;
-                                for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
-                                {
-                                    for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
-                                    {
-                                        const int inl = this->inl_index[T0](I0, L0, N0);
-                                        const int nm = 2*L0+1;
-                                        for (int m1 = 0;m1 < nm; ++m1)
-                                        {
-                                            for (int m2 = 0; m2 < nm; ++m2)
-                                            {
-                                                int mm = 0;
-                                                for(int ipol=0;ipol<3;ipol++)
-                                                {
-                                                    for(int jpol=ipol;jpol<3;jpol++)
-                                                    {
-                                                        gdm_epsl[mm][inl][m2*nm+m1]  += ucell.lat0 * 
-                                                        *dm_current * (nlm1[ib+m1] * nlm2[jpol+1][ib+m2] * r1[ipol]);
-                                                        mm++;
-                                                    }
-                                                }
-                                            }
-                                        }
-                                        ib+=nm;
-                                    }
-                                }
-                            }
-                            dm_current++;
-						}//iw2
-					}//iw1
-				}//ad2
-			}//ad1
-        }//I0
-    }//T0
-
-#ifdef __MPI
-    for(int iat=0;iat<ucell.nat;iat++)
-    {
-        allsum_deepks(this->inlmax,size,this->gdmx[iat]);
-        allsum_deepks(this->inlmax,size,this->gdmy[iat]);
-        allsum_deepks(this->inlmax,size,this->gdmz[iat]);
-    }
-    if (isstress)
-    {
-        for(int ipol=0;ipol<6;ipol++)
-        {
-            allsum_deepks(this->inlmax,size,this->gdm_epsl[ipol]);
-        }
-    }
-#endif
-    ModuleBase::timer::tick("LCAO_Deepks","cal_gdmx_k");
-    return;
-}
-
-#endif
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp b/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp
index 66125bf042..54d143687c 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp
+++ b/source/module_hamilt_lcao/module_deepks/deepks_hmat.cpp
@@ -4,98 +4,11 @@
 #include "LCAO_deepks.h"
 #include "module_base/parallel_reduce.h"
 
-
-void DeePKS_domain::save_h_mat(
-		const double *h_mat_in,
-		const int nloc,
-        const int ik)
-{
-	for(int i=0;i<nloc;i++)
-	{
-		GlobalC::ld.h_mat[i]=h_mat_in[i];
-	}  
-}
-
-void DeePKS_domain::save_h_mat(
-		const std::complex<double> *h_mat_in,
-		const int nloc,
-        const int ik)
-{
-    for(int i=0;i<nloc;i++)
-    {
-        GlobalC::ld.h_mat_k[ik][i]=h_mat_in[i];
-    }  
-}
-
-
+template <typename TK, typename TH>
 void DeePKS_domain::collect_h_mat(
         const Parallel_Orbitals &pv,
-		const std::vector<double>& h_in,
-		ModuleBase::matrix &h_out,
-		const int nlocal)
-{
-    ModuleBase::TITLE("DeePKS_domain", "collect_h_tot");
-
-    //construct the total H matrix
-#ifdef __MPI
-    int ir=0;
-    int ic=0;
-    for (int i=0; i<nlocal; i++)
-    {
-        std::vector<double> lineH(nlocal-i,0.0);
-
-        ir = pv.global2local_row(i);
-        if (ir>=0)
-        {
-            // data collection
-            for (int j=i; j<nlocal; j++)
-            {
-                ic = pv.global2local_col(j);
-                if (ic>=0)
-                {
-                    int iic=0;
-                    if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver))
-                    {
-                        iic=ir+ic*pv.nrow;
-                    }
-                    else
-                    {
-                        iic=ir*pv.ncol+ic;
-                    }
-                    lineH[j-i] = h_in[iic];
-                }
-            }
-        }
-        else
-        {
-            //do nothing
-        }
-
-        Parallel_Reduce::reduce_all(lineH.data(),nlocal-i);
-
-        for (int j=i; j<nlocal; j++)
-        {
-            h_out(i,j)=lineH[j-i];
-            h_out(j,i)=h_out(i,j);//H is a symmetric matrix
-        }
-    }
-#else
-    for (int i=0; i<nlocal; i++)
-    {
-        for (int j=i; j<nlocal; j++)
-        {
-            h_out(i,j)=h_in[i*nlocal+j];
-            h_out(j,i)=h_out(i,j);//H is a symmetric matrix
-        }
-    }
-#endif
-}
-
-// for multi-k
-void DeePKS_domain::collect_h_mat(
-        const Parallel_Orbitals &pv,
-		const std::vector<std::vector<std::complex<double>>>& h_in,
-		std::vector<ModuleBase::ComplexMatrix> &h_out,
+		const std::vector<std::vector<TK>>& h_in,
+		std::vector<TH> &h_out,
 		const int nlocal,
         const int nks)
 {
@@ -109,7 +22,7 @@ void DeePKS_domain::collect_h_mat(
         int ic=0;
         for (int i=0; i<nlocal; i++)
         {
-            std::vector<std::complex<double>> lineH(nlocal-i, std::complex<double>(0.0, 0.0));
+            std::vector<TK> lineH(nlocal-i, TK(0.0));
 
             ir = pv.global2local_row(i);
             if (ir>=0)
@@ -159,28 +72,9 @@ void DeePKS_domain::collect_h_mat(
     }
 }
 
-//just for gamma-only now
-void DeePKS_domain::check_h_mat(
-		const ModuleBase::matrix &H,
-		const std::string &h_file,
-		const int nlocal)
-{
-    std::ofstream ofs(h_file.c_str());
-    ofs << std::setprecision(10);
-    for (int i=0; i<nlocal; i++)
-    {
-        for (int j=0; j<nlocal; j++)
-        {
-            ofs << H(i,j) << " ";
-        }
-        ofs << std::endl;
-    }
-    ofs.close();
-}
-
-// for multi-k
+template <typename TH>
 void DeePKS_domain::check_h_mat(
-		const std::vector<ModuleBase::ComplexMatrix> &H,
+		const std::vector<TH> &H,
 		const std::string &h_file,
 		const int nlocal,
         const int nks)
@@ -202,4 +96,30 @@ void DeePKS_domain::check_h_mat(
     ofs.close();
 }
 
+template void DeePKS_domain::collect_h_mat<double, ModuleBase::matrix>(
+        const Parallel_Orbitals &pv,
+        const std::vector<std::vector<double>>& h_in,
+        std::vector<ModuleBase::matrix> &h_out,
+        const int nlocal,
+        const int nks);
+
+template void DeePKS_domain::collect_h_mat<std::complex<double>, ModuleBase::ComplexMatrix>(
+        const Parallel_Orbitals &pv,
+        const std::vector<std::vector<std::complex<double>>>& h_in,
+        std::vector<ModuleBase::ComplexMatrix> &h_out,
+        const int nlocal,
+        const int nks);
+
+template void DeePKS_domain::check_h_mat<ModuleBase::matrix>(
+        const std::vector<ModuleBase::matrix> &H,
+        const std::string &h_file,
+        const int nlocal,
+        const int nks);
+
+template void DeePKS_domain::check_h_mat<ModuleBase::ComplexMatrix>(
+        const std::vector<ModuleBase::ComplexMatrix> &H,
+        const std::string &h_file,
+        const int nlocal,
+        const int nks);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_hmat.h b/source/module_hamilt_lcao/module_deepks/deepks_hmat.h
index fe4d51dea5..9668d5ede9 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_hmat.h
+++ b/source/module_hamilt_lcao/module_deepks/deepks_hmat.h
@@ -14,37 +14,19 @@
 
 namespace DeePKS_domain
 {
-	void save_h_mat(
-			const double *h_mat_in,
-			const int nloc,
-            const int ik);
-
-	void save_h_mat(
-			const std::complex<double> *h_mat_in,
-			const int nloc,
-            const int ik);
-
     //Collect data in h_in to matrix h_out. Note that left lower trianger in h_out is filled
+    template <typename TK, typename TH>
     void collect_h_mat(
         const Parallel_Orbitals &pv,
-        const std::vector<double>& h_in,
-        ModuleBase::matrix &h_out,
-        const int nlocal);
-
-    void collect_h_mat(
-        const Parallel_Orbitals &pv,
-		const std::vector<std::vector<std::complex<double>>>& h_in,
-		std::vector<ModuleBase::ComplexMatrix> &h_out,
-		const int nlocal,
+        const std::vector<std::vector<TK>>& h_in,
+        std::vector<TH> &h_out,
+        const int nlocal,
         const int nks);
 
+    // write h_mat to file h_file for checking // not used in the code now
+    template <typename TH>
     void check_h_mat(
-        const ModuleBase::matrix &H,
-        const std::string &h_file,
-        const int nlocal);
-
-    void check_h_mat(
-		const std::vector<ModuleBase::ComplexMatrix> &H,
+		const std::vector<TH> &H,
 		const std::string &h_file,
 		const int nlocal,
         const int nks);
diff --git a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp
index 0e01573331..5f202d1532 100644
--- a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp
+++ b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp
@@ -3,9 +3,6 @@
 /// cal_orbital_precalc : orbital_precalc is usted for training with orbital label,
 ///                          which equals gvdm * orbital_pdm_shell,
 ///                          orbital_pdm_shell[1,Inl,nm*nm] = dm_hl * overlap * overlap
-/// cal_orbital_precalc_k : orbital_precalc is usted for training with orbital label,
-///                          for multi-k case, which equals gvdm * orbital_pdm_shell,
-///                          orbital_pdm_shell[1,Inl,nm*nm] = dm_hl_k * overlap * overlap
 
 #include "LCAO_deepks.h"
 #include "LCAO_deepks_io.h" // mohan add 2024-07-22
@@ -18,21 +15,24 @@
 
 // calculates orbital_precalc[1,NAt,NDscrpt] = gvdm * orbital_pdm_shell;
 // orbital_pdm_shell[2,Inl,nm*nm] = dm_hl * overlap * overlap;
+template <typename TK, typename TH>
 void LCAO_Deepks::cal_orbital_precalc(
-    const std::vector<std::vector<ModuleBase::matrix>>& dm_hl,
+    const std::vector<std::vector<TH>>& dm_hl,
     const int nat,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
     const UnitCell& ucell,
     const LCAO_Orbitals& orb,
     Grid_Driver& GridD) 
 {
-
     ModuleBase::TITLE("LCAO_Deepks", "cal_orbital_precalc");
+    ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc");
 
     this->cal_gvdm(nat);
 
     const double Rcut_Alpha = orb.Alpha[0].getRcut();
 
-    this->init_orbital_pdm_shell(1);
+    this->init_orbital_pdm_shell(nks);
 
     for (int T0 = 0; T0 < ucell.ntype; T0++) 
     {
@@ -85,6 +85,21 @@ void LCAO_Deepks::cal_orbital_precalc(
                     continue;
                 }
 
+                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x,
+                                                GridD.getBox(ad1).y,
+                                                GridD.getBox(ad1).z);
+
+                key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z);
+
+                if constexpr (std::is_same<TK, std::complex<double>>::value)
+                {
+                    if (this->nlm_save_k[iat].find(key_1)
+                        == this->nlm_save_k[iat].end()) 
+                    {
+                        continue;
+                    }
+                }
+
                 auto row_indexes = pv->get_indexes_row(ibt1);
 
                 const int row_size = row_indexes.size();
@@ -96,12 +111,19 @@ void LCAO_Deepks::cal_orbital_precalc(
 
                 std::vector<double> s_1t(trace_alpha_size * row_size);
 
-				std::vector<double> g_1dmt(trace_alpha_size * row_size, 0.0);
+				std::vector<double> g_1dmt(nks * trace_alpha_size * row_size, 0.0);
 
 				for (int irow = 0; irow < row_size; irow++) 
 				{
-                    const double* row_ptr
-                        = this->nlm_save[iat][ad1][row_indexes[irow]][0].data();
+                    double* row_ptr;
+                    if constexpr (std::is_same<TK, double>::value)
+                    {
+                        row_ptr = this->nlm_save[iat][ad1][row_indexes[irow]][0].data();
+                    }
+                    else
+                    {
+                        row_ptr = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data();
+                    }
                     for (int i = 0; i < trace_alpha_size; i++) 
                     {
                         s_1t[i * row_size + irow] = row_ptr[trace_alpha_row[i]];
@@ -113,6 +135,13 @@ void LCAO_Deepks::cal_orbital_precalc(
                     const int T2 = GridD.getType(ad2);
                     const int I2 = GridD.getNatom(ad2);
                     const int ibt2 = ucell.itia2iat(T2, I2);
+                    if constexpr (std::is_same<TK, std::complex<double>>::value) // Why only for multi-k?
+                    {
+                        if (ibt1 > ibt2) 
+                        {
+                            continue;
+                        }
+                    }
                     const ModuleBase::Vector3<double> tau2
                         = GridD.getAdjacentTau(ad2);
                     const Atom* atom2 = &ucell.atoms[T2];
@@ -126,6 +155,21 @@ void LCAO_Deepks::cal_orbital_precalc(
                         continue;
                     }
 
+                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x,
+                                                    GridD.getBox(ad2).y,
+                                                    GridD.getBox(ad2).z);
+
+                    key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z);
+                    
+                    if constexpr (std::is_same<TK, std::complex<double>>::value)
+                    {
+                        if (this->nlm_save_k[iat].find(key_2)
+                                == this->nlm_save_k[iat].end()) 
+                        {
+                            continue;
+                        }
+                    }
+
                     auto col_indexes = pv->get_indexes_col(ibt2);
                     const int col_size = col_indexes.size();
 					if (col_size == 0) 
@@ -136,8 +180,15 @@ void LCAO_Deepks::cal_orbital_precalc(
                     std::vector<double> s_2t(trace_alpha_size * col_size);
                     for (int icol = 0; icol < col_size; icol++) 
                     {
-                        const double* col_ptr
-                            = this->nlm_save[iat][ad2][col_indexes[icol]][0].data();
+                        double* col_ptr;
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                           col_ptr = this->nlm_save[iat][ad2][col_indexes[icol]][0].data();
+                        }
+                        else
+                        {
+                            col_ptr = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data();
+                        }
                         for (int i = 0; i < trace_alpha_size; i++) 
                         {
                             s_2t[i * col_size + icol]
@@ -145,40 +196,94 @@ void LCAO_Deepks::cal_orbital_precalc(
                         }
                     }
 
-                    std::vector<double> dm_array(row_size * col_size, 0.0);
-                    for (int is = 0; is < PARAM.inp.nspin; is++) 
-                    {
-                        hamilt::AtomPair<double> dm_pair(ibt1,
-                                                         ibt2,
-                                                         0,
-                                                         0,
-                                                         0,
-                                                         pv);
+                    std::vector<double> dm_array(row_size * nks * col_size, 0.0);
 
-                        dm_pair.allocate(dm_array.data(), 0);
+                    const int row_size_nks = row_size * nks;
 
-                        if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) 
+                    if constexpr (std::is_same<TK, double>::value)
+                    {
+                        for (int is = 0; is < PARAM.inp.nspin; is++) 
                         {
-                            dm_pair.add_from_matrix(dm_hl[0][is].c,
-                                                    pv->get_row_size(),
-                                                    1.0,
-                                                    1);
-                        } 
-                        else 
+                            hamilt::AtomPair<double> dm_pair(ibt1,
+                                                            ibt2,
+                                                            0,
+                                                            0,
+                                                            0,
+                                                            pv);
+
+                            dm_pair.allocate(dm_array.data(), 0);
+
+                            if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) 
+                            {
+                                dm_pair.add_from_matrix(dm_hl[0][is].c,
+                                                        pv->get_row_size(),
+                                                        1.0,
+                                                        1);
+                            } 
+                            else 
+                            {
+                                dm_pair.add_from_matrix(dm_hl[0][is].c,
+                                                        pv->get_col_size(),
+                                                        1.0,
+                                                        0);
+                            }
+                        } // is
+                    }
+                    else
+                    {
+                        for (int ik = 0; ik < nks; ik++) 
                         {
-                            dm_pair.add_from_matrix(dm_hl[0][is].c,
-                                                    pv->get_col_size(),
-                                                    1.0,
-                                                    0);
-                        }
-                    } // is
+                            hamilt::AtomPair<double> dm_pair(ibt1,
+                                                            ibt2,
+                                                            (dR2 - dR1).x,
+                                                            (dR2 - dR1).y,
+                                                            (dR2 - dR1).z,
+                                                            pv);
+
+                            dm_pair.allocate(&dm_array[ik * row_size * col_size], 0);
+
+                            const double arg
+                                = -(kvec_d[ik] * (dR2 - dR1)) * ModuleBase::TWO_PI;
+
+                            double sinp, cosp;
+
+                            ModuleBase::libm::sincos(arg, &sinp, &cosp);
+
+                            const std::complex<double> kphase
+                                = std::complex<double>(cosp, sinp);
+
+                            if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) 
+                            {
+                                dm_pair.add_from_matrix(dm_hl[0][ik].c,
+                                        pv->get_row_size(),
+                                        kphase,
+                                        1);
+                            } 
+                            else 
+                            {
+                                dm_pair.add_from_matrix(dm_hl[0][ik].c,
+                                        pv->get_col_size(),
+                                        kphase,
+                                        0);
+                            }
+                        } // ik
+                    }
 
                     // dgemm for s_2t and dm_array to get g_1dmt
                     constexpr char transa = 'T', transb = 'N';
-                    const double gemm_alpha = 1.0, gemm_beta = 1.0;
+                    double gemm_alpha = 1.0, gemm_beta = 1.0;
+
+                    if constexpr (std::is_same<TK, std::complex<double>>::value) 
+                    {
+                        if (ibt1 < ibt2) 
+                        {
+                            gemm_alpha = 2.0;
+                        }
+                    }
+
                     dgemm_(&transa,
                            &transb,
-                           &row_size,
+                           &row_size_nks,
                            &trace_alpha_size,
                            &col_size,
                            &gemm_alpha,
@@ -188,46 +293,54 @@ void LCAO_Deepks::cal_orbital_precalc(
                            &col_size,
                            &gemm_beta,
                            g_1dmt.data(),
-                           &row_size);
+                           &row_size_nks);
                 } // ad2
 
-                // do dot of g_1dmt and s_1t to get orbital_pdm_shell
-                const double* p_g1dmt = g_1dmt.data();
+                for (int ik = 0; ik < nks; ik++)
+                {
+                    // do dot of g_1dmt and s_1t to get orbital_pdm_shell
+                    const double* p_g1dmt = g_1dmt.data() + ik * row_size;
 
-                int ib = 0, index = 0, inc = 1;
+                    int ib = 0, index = 0, inc = 1;
 
-                for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) 
-                {
-                    for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) 
+                    for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) 
                     {
-                        const int inl = this->inl_index[T0](I0, L0, N0);
-                        const int nm = 2 * L0 + 1;
-
-                        for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
+                        for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) 
                         {
-                            for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
+                            const int inl = this->inl_index[T0](I0, L0, N0);
+                            const int nm = 2 * L0 + 1;
+
+                            for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
                             {
-                                orbital_pdm_shell[0][0][inl][m1 * nm + m2]
-                                    += ddot_(&row_size,
-                                             p_g1dmt + index * row_size,
-                                             &inc,
-                                             s_1t.data() + index * row_size,
-                                             &inc);
-                                index++;
+                                for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
+                                {
+                                    orbital_pdm_shell[ik][0][inl][m1 * nm + m2]
+                                        += ddot_(&row_size,
+                                                p_g1dmt + index * row_size * nks,
+                                                &inc,
+                                                s_1t.data() + index * row_size,
+                                                &inc);
+                                    index++;
+                                }
                             }
+                            ib += nm;
                         }
-                        ib += nm;
                     }
                 }
             } // ad1
         }
     }
 #ifdef __MPI
-    for (int inl = 0; inl < this->inlmax; inl++) 
+    for (int iks = 0; iks < nks; iks++) 
     {
-        Parallel_Reduce::reduce_all(this->orbital_pdm_shell[0][0][inl],
-                                    (2 * this->lmaxd + 1)
-                                        * (2 * this->lmaxd + 1));
+        for (int hl = 0; hl < 1; hl++)
+        {
+            for (int inl = 0; inl < this->inlmax; inl++) 
+            {
+                Parallel_Reduce::reduce_all(this->orbital_pdm_shell[iks][hl][inl],
+                                            (2 * this->lmaxd + 1) * (2 * this->lmaxd + 1));
+            }
+        }
     }
 #endif
 
@@ -239,33 +352,35 @@ void LCAO_Deepks::cal_orbital_precalc(
     for (int nl = 0; nl < nlmax; ++nl) 
     {
         std::vector<torch::Tensor> kiammv;
-        for (int iks = 0; iks < 1; ++iks) 
+        for (int iks = 0; iks < nks; ++iks) 
         {
             std::vector<torch::Tensor> iammv;
-            std::vector<torch::Tensor> ammv;
-            for (int iat = 0; iat < nat; ++iat) 
+            for (int hl = 0; hl < 1; ++hl)
             {
-                int inl = iat * nlmax + nl;
-                int nm = 2 * this->inl_l[inl] + 1;
-                std::vector<double> mmv;
-
-                for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
+                std::vector<torch::Tensor> ammv;
+                for (int iat = 0; iat < nat; ++iat) 
                 {
-                    for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
+                    int inl = iat * nlmax + nl;
+                    int nm = 2 * this->inl_l[inl] + 1;
+                    std::vector<double> mmv;
+
+                    for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
                     {
-                        mmv.push_back(
-                            this->orbital_pdm_shell[iks][0][inl][m1 * nm + m2]);
+                        for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
+                        {
+                            mmv.push_back(
+                                this->orbital_pdm_shell[iks][hl][inl][m1 * nm + m2]);
+                        }
                     }
-                }
-                torch::Tensor mm = torch::tensor(mmv,
-                     torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm
+                    torch::Tensor mm = torch::tensor(mmv,
+                        torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm
 
-                ammv.push_back(mm);
+                    ammv.push_back(mm);
+                }
+                torch::Tensor amm = torch::stack(ammv, 0);
+                iammv.push_back(amm);
             }
-            torch::Tensor amm = torch::stack(ammv, 0);
-            iammv.push_back(amm);
             torch::Tensor iamm = torch::stack(iammv, 0); // inl*nm*nm
-            // orbital_pdm_shell_vector.push_back(iamm);
             kiammv.push_back(iamm);
         }
         torch::Tensor kiamm = torch::stack(kiammv, 0);
@@ -284,8 +399,25 @@ void LCAO_Deepks::cal_orbital_precalc(
     }
 
     this->orbital_precalc_tensor = torch::cat(orbital_precalc_vector, -1);
-    this->del_orbital_pdm_shell(1);
+    this->del_orbital_pdm_shell(nks);
+    ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc");
     return;
 }
 
+template void LCAO_Deepks::cal_orbital_precalc<double, ModuleBase::matrix>(const std::vector<std::vector<ModuleBase::matrix>>& dm_hl,
+                                                                           const int nat,
+                                                                           const int nks,
+                                                                           const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                                                           const UnitCell& ucell,
+                                                                           const LCAO_Orbitals& orb,
+                                                                           Grid_Driver& GridD);
+
+
+template void LCAO_Deepks::cal_orbital_precalc<std::complex<double>, ModuleBase::ComplexMatrix>(const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl,
+                                                                                                const int nat,
+                                                                                                const int nks,
+                                                                                                const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                                                                                const UnitCell& ucell,
+                                                                                                const LCAO_Orbitals& orb,
+                                                                                                Grid_Driver& GridD);
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp b/source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp
deleted file mode 100644
index fd8b24e0c7..0000000000
--- a/source/module_hamilt_lcao/module_deepks/orbital_precalc_k.cpp
+++ /dev/null
@@ -1,350 +0,0 @@
-#ifdef __DEEPKS
-
-#include "LCAO_deepks.h"
-#include "LCAO_deepks_io.h" // mohan add 2024-07-22
-#include "module_base/blas_connector.h"
-#include "module_base/constants.h"
-#include "module_base/libm/libm.h"
-#include "module_base/parallel_reduce.h"
-#include "module_hamilt_lcao/module_hcontainer/atom_pair.h"
-#include "module_parameter/parameter.h"
-
-// calculates orbital_precalc[nks,2,NAt,NDscrpt] = gvdm * orbital_pdm_shell for
-// multi-k case; orbital_pdm_shell[nks,2,Inl,nm*nm] = dm_hl_k * overlap *
-// overlap;
-void LCAO_Deepks::cal_orbital_precalc_k(
-    const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl_k,
-    const int nat,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-    const UnitCell& ucell,
-    const LCAO_Orbitals& orb,
-    Grid_Driver& GridD) {
-    ModuleBase::TITLE("LCAO_Deepks", "calc_orbital_precalc_k");
-    ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc_k");
-
-    this->cal_gvdm(nat);
-    const double Rcut_Alpha = orb.Alpha[0].getRcut();
-    this->init_orbital_pdm_shell(nks);
-
-    for (int T0 = 0; T0 < ucell.ntype; T0++) 
-    {
-        Atom* atom0 = &ucell.atoms[T0];
-
-        for (int I0 = 0; I0 < atom0->na; I0++) 
-        {
-            const int iat = ucell.itia2iat(T0, I0);
-            const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            GridD.Find_atom(ucell, atom0->tau[I0], T0, I0);
-
-            // trace alpha orbital
-            std::vector<int> trace_alpha_row;
-            std::vector<int> trace_alpha_col;
-            int ib = 0;
-            for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) 
-            {
-                for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) 
-                {
-                    const int inl = this->inl_index[T0](I0, L0, N0);
-                    const int nm = 2 * L0 + 1;
-
-                    for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
-                    {
-                        for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
-                        {
-                            trace_alpha_row.push_back(ib + m1);
-                            trace_alpha_col.push_back(ib + m2);
-                        }
-                    }
-                    ib += nm;
-                }
-            }
-            const int trace_alpha_size = trace_alpha_row.size();
-
-            for (int ad1 = 0; ad1 < GridD.getAdjacentNum() + 1; ++ad1) 
-            {
-                const int T1 = GridD.getType(ad1);
-                const int I1 = GridD.getNatom(ad1);
-                const int ibt1 = ucell.itia2iat(T1, I1);
-                const ModuleBase::Vector3<double> tau1
-                    = GridD.getAdjacentTau(ad1);
-                const double dist1 = (tau1 - tau0).norm() * ucell.lat0;
-                const double Rcut_AO1 = orb.Phi[T1].getRcut();
-                if (dist1 >= Rcut_Alpha + Rcut_AO1) 
-                {
-                    continue;
-                }
-
-                const Atom* atom1 = &ucell.atoms[T1];
-                const int nw1_tot = atom1->nw * PARAM.globalv.npol;
-
-                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x,
-                                                GridD.getBox(ad1).y,
-                                                GridD.getBox(ad1).z);
-
-                auto row_indexes = pv->get_indexes_row(ibt1);
-                const int row_size = row_indexes.size();
-                if (row_size == 0) 
-                {
-                    continue;
-                }
-
-                key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z);
-                if (this->nlm_save_k[iat].find(key_1)
-                    == this->nlm_save_k[iat].end()) 
-                {
-                    continue;
-                }
-
-                std::vector<double> s_1t(trace_alpha_size * row_size);
-
-                std::vector<double> g_1dmt(nks * trace_alpha_size * row_size, 0.0);
-
-                for (int irow = 0; irow < row_size; irow++) 
-                {
-                    const double* row_ptr
-                        = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data();
-
-                    for (int i = 0; i < trace_alpha_size; i++) 
-                    {
-                        s_1t[i * row_size + irow] = row_ptr[trace_alpha_row[i]];
-                    }
-                }
-
-                for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) 
-                {
-                    const int T2 = GridD.getType(ad2);
-                    const int I2 = GridD.getNatom(ad2);
-                    const int ibt2 = ucell.itia2iat(T2, I2);
-                    if (ibt1 > ibt2) 
-                    {
-                        continue;
-                    }
-
-                    const ModuleBase::Vector3<double> tau2
-                        = GridD.getAdjacentTau(ad2);
-
-                    const Atom* atom2 = &ucell.atoms[T2];
-
-                    const int nw2_tot = atom2->nw * PARAM.globalv.npol;
-
-                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x,
-                                                    GridD.getBox(ad2).y,
-                                                    GridD.getBox(ad2).z);
-
-                    const double Rcut_AO2 = orb.Phi[T2].getRcut();
-                    const double dist2 = (tau2 - tau0).norm() * ucell.lat0;
-
-                    if (dist2 >= Rcut_Alpha + Rcut_AO2) 
-                    {
-                        continue;
-                    }
-
-                    auto col_indexes = pv->get_indexes_col(ibt2);
-                    const int col_size = col_indexes.size();
-					if (col_size == 0) 
-					{
-						continue;
-					}
-
-                    std::vector<double> s_2t(trace_alpha_size * col_size);
-                    key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z);
-
-					if (this->nlm_save_k[iat].find(key_2)
-							== this->nlm_save_k[iat].end()) 
-					{
-						continue;
-					}
-
-                    for (int icol = 0; icol < col_size; icol++) 
-                    {
-                        const double* col_ptr
-                            = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data();
-
-                        for (int i = 0; i < trace_alpha_size; i++) 
-                        {
-                            s_2t[i * col_size + icol]
-                                = col_ptr[trace_alpha_col[i]];
-                        }
-                    }
-
-                    std::vector<double> dm_array(row_size * nks * col_size, 0.0);
-
-                    const int row_size_nks = row_size * nks;
-
-                    for (int ik = 0; ik < nks; ik++) 
-                    {
-                        hamilt::AtomPair<double> dm_pair(ibt1,
-                                                         ibt2,
-                                                         (dR2 - dR1).x,
-                                                         (dR2 - dR1).y,
-                                                         (dR2 - dR1).z,
-                                                         pv);
-
-                        dm_pair.allocate(&dm_array[ik * row_size * col_size], 0);
-
-                        const double arg
-                            = -(kvec_d[ik] * (dR2 - dR1)) * ModuleBase::TWO_PI;
-
-                        double sinp, cosp;
-
-                        ModuleBase::libm::sincos(arg, &sinp, &cosp);
-
-                        const std::complex<double> kphase
-                            = std::complex<double>(cosp, sinp);
-
-						if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) 
-						{
-							dm_pair.add_from_matrix(dm_hl_k[0][ik].c,
-									pv->get_row_size(),
-									kphase,
-									1);
-						} 
-						else 
-						{
-							dm_pair.add_from_matrix(dm_hl_k[0][ik].c,
-									pv->get_col_size(),
-									kphase,
-									0);
-						}
-                    } // ik
-
-                    // dgemm for s_2t and dm_array to get g_1dmt
-                    constexpr char transa = 'T', transb = 'N';
-                    double gemm_alpha = 1.0, gemm_beta = 1.0;
-
-					if (ibt1 < ibt2) 
-					{
-						gemm_alpha = 2.0;
-					}
-
-					dgemm_(&transa,
-                           &transb,
-                           &row_size_nks,
-                           &trace_alpha_size,
-                           &col_size,
-                           &gemm_alpha,
-                           dm_array.data(),
-                           &col_size,
-                           s_2t.data(),
-                           &col_size,
-                           &gemm_beta,
-                           g_1dmt.data(),
-                           &row_size_nks);
-                } // ad2
-
-				for (int ik = 0; ik < nks; ik++) 
-				{
-					// do dot of g_1dmt and s_1t to get orbital_pdm_shell
-                    const double* p_g1dmt = g_1dmt.data() + ik * row_size;
-
-                    int ib = 0, index = 0, inc = 1;
-
-					for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) 
-					{
-						for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) 
-						{
-                            const int inl = this->inl_index[T0](I0, L0, N0);
-                            const int nm = 2 * L0 + 1;
-
-							for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
-							{
-								for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
-								{
-									orbital_pdm_shell[ik][0][inl][m1 * nm + m2]
-										+= ddot_(&row_size,
-												p_g1dmt
-												+ index * row_size * nks,
-												&inc,
-												s_1t.data() + index * row_size,
-												&inc);
-									index++;
-								}
-							}
-							ib += nm;
-						}
-                    }
-                }
-            } // ad1
-        }
-    }
-
-#ifdef __MPI
-	for (int iks = 0; iks < nks; iks++) 
-	{
-		for (int hl = 0; hl < 1; hl++) 
-		{
-			for (int inl = 0; inl < this->inlmax; inl++) 
-			{
-				Parallel_Reduce::reduce_all(
-						this->orbital_pdm_shell[iks][hl][inl],
-						(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1));
-			}
-		}
-	}
-#endif
-
-    // transfer orbital_pdm_shell to orbital_pdm_shell_vector
-
-    int nlmax = this->inlmax / nat;
-
-    std::vector<torch::Tensor> orbital_pdm_shell_vector;
-
-	for (int nl = 0; nl < nlmax; ++nl) 
-	{
-		std::vector<torch::Tensor> kiammv;
-		for (int iks = 0; iks < nks; ++iks) 
-		{
-			std::vector<torch::Tensor> iammv;
-			for (int hl = 0; hl < 1; ++hl) 
-			{
-				std::vector<torch::Tensor> ammv;
-				for (int iat = 0; iat < nat; ++iat) 
-				{
-                    int inl = iat * nlmax + nl;
-                    int nm = 2 * this->inl_l[inl] + 1;
-                    std::vector<double> mmv;
-
-                    for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
-                    {
-                        for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
-                        {
-                            mmv.push_back(
-                                this->orbital_pdm_shell[iks][hl][inl][m1 * nm + m2]);
-                        }
-                    }
-                    torch::Tensor mm
-                        = torch::tensor(
-                              mmv,
-                              torch::TensorOptions().dtype(torch::kFloat64))
-                              .reshape({nm, nm}); // nm*nm
-                    ammv.push_back(mm);
-                }
-                torch::Tensor amm = torch::stack(ammv, 0);
-                iammv.push_back(amm);
-            }
-            torch::Tensor iamm = torch::stack(iammv, 0); // inl*nm*nm
-            kiammv.push_back(iamm);
-        }
-        torch::Tensor kiamm = torch::stack(kiammv, 0);
-        orbital_pdm_shell_vector.push_back(kiamm);
-    }
-
-    assert(orbital_pdm_shell_vector.size() == nlmax);
-
-    // einsum for each nl:
-    std::vector<torch::Tensor> orbital_precalc_vector;
-	for (int nl = 0; nl < nlmax; ++nl) 
-	{
-		orbital_precalc_vector.push_back(
-				at::einsum("kiamn, avmn->kiav",
-					{orbital_pdm_shell_vector[nl], this->gevdm_vector[nl]}));
-    }
-    this->orbital_precalc_tensor = torch::cat(orbital_precalc_vector, -1);
-
-    this->del_orbital_pdm_shell(nks);
-    ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc_k");
-    return;
-}
-
-#endif
diff --git a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
index 010da2073d..c3d1abe61f 100644
--- a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
+++ b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
@@ -126,11 +126,11 @@ void test_deepks::check_gdmx()
     this->ld.init_gdmx(ucell.nat);
     if (PARAM.sys.gamma_only_local)
     {
-        this->ld.cal_gdmx(dm_new[0], ucell, ORB, Test_Deepks::GridD, 0);
+        this->ld.cal_gdmx(dm_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0);
     }
     else
     {
-        this->ld.cal_gdmx_k(dm_k_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0);
+        this->ld.cal_gdmx(dm_k_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0);
     }
     this->ld.check_gdmx(ucell.nat);
 
diff --git a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp
index 0c91180d9a..25eebd7490 100644
--- a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp
+++ b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp
@@ -18,8 +18,11 @@
 // calculates v_delta_precalc[nks,nlocal,nlocal,NAt,NDscrpt] = gvdm * v_delta_pdm_shell;
 // v_delta_pdm_shell[nks,nlocal,nlocal,Inl,nm*nm] = overlap * overlap;
 // for deepks_v_delta = 1
+template <typename TK>
 void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
     const int nat,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
     const UnitCell &ucell,
     const LCAO_Orbitals &orb,
     Grid_Driver &GridD)
@@ -30,7 +33,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
 
     this->cal_gvdm(nat);
     const double Rcut_Alpha = orb.Alpha[0].getRcut();
-    this->init_v_delta_pdm_shell(1,nlocal); // 1 for gamma-only
+    this->init_v_delta_pdm_shell(nks,nlocal); 
    
     for (int T0 = 0; T0 < ucell.ntype; T0++)
     {
@@ -46,6 +49,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
             {
                 const int T1 = GridD.getType(ad1);
                 const int I1 = GridD.getNatom(ad1);
+                const int ibt1 = ucell.itia2iat(T1, I1); 
                 const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
                 const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad1);
 				const Atom* atom1 = &ucell.atoms[T1];
@@ -57,11 +61,27 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
                 {
                     continue;
                 }
+                
+                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x,
+                                            GridD.getBox(ad1).y,
+                                            GridD.getBox(ad1).z);
+
+                key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z); 
+
+                if constexpr (std::is_same<TK, std::complex<double>>::value)
+                {
+                    if (this->nlm_save_k[iat].find(key_1)
+                        == this->nlm_save_k[iat].end()) 
+                    {
+                        continue;
+                    }
+                }
 
 				for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++)
 				{
 					const int T2 = GridD.getType(ad2);
 					const int I2 = GridD.getNatom(ad2);
+                    const int ibt2 = ucell.itia2iat(T2, I2);
 					const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
 					const ModuleBase::Vector3<double> tau2 = GridD.getAdjacentTau(ad2);
 					const Atom* atom2 = &ucell.atoms[T2];
@@ -75,43 +95,83 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
 						continue;
 					}
 
+                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x,
+                                                        GridD.getBox(ad2).y,
+                                                        GridD.getBox(ad2).z);
+                    key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z);
+
+                    if constexpr (std::is_same<TK, std::complex<double>>::value)
+                    {
+                        if (this->nlm_save_k[iat].find(key_2)
+                            == this->nlm_save_k[iat].end()) 
+                        {
+                            continue;
+                        }
+                    }
+
 					for (int iw1=0; iw1<nw1_tot; ++iw1)
 					{
 						const int iw1_all = start1 + iw1; // this is \mu
 						const int iw1_local = pv->global2local_row(iw1_all);
-						if(iw1_local < 0) {continue;
-}
+						if(iw1_local < 0) {continue;}
 						const int iw1_0 = iw1/PARAM.globalv.npol;
 						for (int iw2=0; iw2<nw2_tot; ++iw2)
 						{
 							const int iw2_all = start2 + iw2; // this is \nu
 							const int iw2_local = pv->global2local_col(iw2_all);
-							if(iw2_local < 0) {continue;
-}
+							if(iw2_local < 0) {continue;}
 							const int iw2_0 = iw2/PARAM.globalv.npol;
 
-                            std::vector<double> nlm1 = this->nlm_save[iat][ad1][iw1][0];
-                            std::vector<double> nlm2 = this->nlm_save[iat][ad2][iw2][0];
+                            std::vector<double> nlm1;
+                            std::vector<double> nlm2;
+                            if constexpr (std::is_same<TK, double>::value)
+                            {
+                                nlm1 = this->nlm_save[iat][ad1][iw1][0];
+                                nlm2 = this->nlm_save[iat][ad2][iw2][0];
+                            }
+                            else
+                            {
+                                nlm1 = this->nlm_save_k[iat][key_1][iw1][0];
+                                nlm2 = this->nlm_save_k[iat][key_2][iw2][0];
+                            }
 
                             assert(nlm1.size()==nlm2.size());
-                            int ib=0;
-                            for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
+                            for (int ik = 0; ik < nks; ik++)
                             {
-                                for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
+                                int ib=0;
+                                std::complex<double> kphase = std::complex<double>(1.0, 0.0);
+                                if constexpr (std::is_same<TK, std::complex<double>>::value)
                                 {
-                                    const int inl = this->inl_index[T0](I0, L0, N0);
-                                    const int nm = 2*L0+1;
-                                    
-                                    for (int m1=0; m1<nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
+                                    const double arg = - (kvec_d[ik] * (dR1-dR2) ) * ModuleBase::TWO_PI;
+                                    double sinp, cosp;
+                                    ModuleBase::libm::sincos(arg, &sinp, &cosp);
+                                    kphase = std::complex<double>(cosp, sinp);
+                                }
+                                for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
+                                {
+                                    for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
                                     {
-                                        for (int m2=0; m2<nm; ++m2) // nm = 1 for s, 3 for p, 5 for d
+                                        const int inl = this->inl_index[T0](I0, L0, N0);
+                                        const int nm = 2*L0+1;
+                                        
+                                        for (int m1=0; m1<nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
                                         {
-                                            v_delta_pdm_shell[0][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2];
+                                            for (int m2=0; m2<nm; ++m2) // nm = 1 for s, 3 for p, 5 for d
+                                            {
+                                                if constexpr (std::is_same<TK, double>::value)
+                                                {
+                                                    this->v_delta_pdm_shell[ik][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2];
+                                                }
+                                                else
+                                                {
+                                                    this->v_delta_pdm_shell_complex[ik][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2]*kphase;
+                                                }
+                                            }
                                         }
+                                        ib+=nm;
                                     }
-                                    ib+=nm;
-                                }
-                            }                            
+                                }  
+                            } //ik                         
 						}//iw2
 					}//iw1
 				}//ad2
@@ -121,13 +181,23 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
     }
 #ifdef __MPI
     const int mn_size=(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1);
-    for(int inl = 0; inl < this->inlmax; inl++)
+    for (int ik = 0; ik < nks; ik++)
     {
-        for(int mu = 0; mu < nlocal ; mu++)
+        for(int inl = 0; inl < this->inlmax; inl++)
         {
-            for(int nu=0; nu< nlocal ; nu++)
+            for(int mu = 0; mu < nlocal ; mu++)
             {
-                Parallel_Reduce::reduce_all(this->v_delta_pdm_shell[0][mu][nu][inl],mn_size);
+                for(int nu=0; nu< nlocal ; nu++)
+                {
+                    if constexpr (std::is_same<TK, double>::value)
+                    {
+                        Parallel_Reduce::reduce_all(this->v_delta_pdm_shell[ik][mu][nu][inl],mn_size);
+                    }
+                    else
+                    {
+                        Parallel_Reduce::reduce_all(this->v_delta_pdm_shell_complex[ik][mu][nu][inl],mn_size);
+                    }
+                }
             }
         }
     }
@@ -141,7 +211,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
     for(int nl = 0; nl < nlmax; ++nl)
     {
         std::vector<torch::Tensor> kuuammv;
-        for(int iks = 0; iks < 1; ++iks)
+        for(int iks = 0; iks < nks; ++iks)
         {
             std::vector<torch::Tensor> uuammv;
             for(int mu = 0; mu < nlocal; ++mu)
@@ -154,17 +224,32 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
                     {
                         int inl = iat*nlmax+nl;
                         int nm = 2*this->inl_l[inl]+1;
-                        std::vector<double> mmv;
+                        std::vector<TK> mmv;
                     
                         for (int m1=0; m1<nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
                         {
                             for (int m2=0; m2<nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
                             {
-                                mmv.push_back(this->v_delta_pdm_shell[iks][mu][nu][inl][m1*nm+m2]);
+                                if constexpr (std::is_same<TK, double>::value)
+                                {
+                                    mmv.push_back(this->v_delta_pdm_shell[iks][mu][nu][inl][m1*nm+m2]);
+                                }
+                                else
+                                {
+                                    mmv.push_back(this->v_delta_pdm_shell_complex[iks][mu][nu][inl][m1*nm+m2]);
+                                }
                             }
                         }
-                        torch::Tensor mm = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64) ).reshape({nm, nm});    //nm*nm
-                        ammv.push_back(mm);
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            torch::Tensor mm = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64) ).reshape({nm, nm});    //nm*nm
+                            ammv.push_back(mm);
+                        }
+                        else
+                        {
+                            torch::Tensor mm = torch::from_blob(mmv.data(), {nm, nm}, torch::TensorOptions().dtype(torch::kComplexDouble)).clone();    //nm*nm
+                            ammv.push_back(mm);
+                        }
                     }
                     torch::Tensor amm = torch::stack(ammv, 0); 
                     uammv.push_back(amm);                    
@@ -184,12 +269,20 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
     //einsum for each nl: 
     std::vector<torch::Tensor> v_delta_precalc_vector;
     for (int nl = 0; nl<nlmax; ++nl)
-    {
-        v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], this->gevdm_vector[nl]}));
+    {   
+        if constexpr (std::is_same<TK, double>::value)
+        {
+            v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], this->gevdm_vector[nl]}));
+        }
+        else
+        {
+            torch::Tensor gevdm_vector_complex = this->gevdm_vector[nl].to(torch::kComplexDouble);
+            v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], gevdm_vector_complex}));
+        }
     }
 
     this->v_delta_precalc_tensor = torch::cat(v_delta_precalc_vector, -1);
-    this->del_v_delta_pdm_shell(1,nlocal);
+    this->del_v_delta_pdm_shell(nks,nlocal);
 
     //check_v_delta_precalc(nlocal,nat);
     // timeval t_end;
@@ -198,6 +291,7 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
     return;
 }
 
+template <typename TK>
 void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int nlocal)
 {
     std::ofstream ofs("v_delta_precalc.dat");
@@ -212,7 +306,15 @@ void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int n
                 {
                     for(int p=0; p<this->des_per_atom; ++p)
                     {
-                        ofs<<this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p }).item().toDouble()<<" ";
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            ofs << this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p }).item().toDouble() << " " ;
+                        }
+                        else
+                        {
+                            auto tensor_value = this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p});
+                            ofs << std::complex<double>(torch::real(tensor_value).item<double>(), torch::imag(tensor_value).item<double>()) << " " ;
+                        }
                     }
                 }
                 ofs << std::endl;                
@@ -222,4 +324,23 @@ void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int n
     ofs.close();
 }
 
+template void LCAO_Deepks::cal_v_delta_precalc<double>(const int nlocal,
+                                                       const int nat,
+                                                       const int nks,
+                                                       const std::vector<ModuleBase::Vector3<double>> &kvec_d,
+                                                       const UnitCell &ucell,
+                                                       const LCAO_Orbitals &orb,
+                                                       Grid_Driver &GridD);
+
+template void LCAO_Deepks::cal_v_delta_precalc<std::complex<double>>(const int nlocal,
+                                                                    const int nat,
+                                                                    const int nks,
+                                                                    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
+                                                                    const UnitCell &ucell,
+                                                                    const LCAO_Orbitals &orb,
+                                                                    Grid_Driver &GridD);
+
+template void LCAO_Deepks::check_v_delta_precalc<double>(const int nat, const int nks, const int nlocal);
+template void LCAO_Deepks::check_v_delta_precalc<std::complex<double>>(const int nat, const int nks, const int nlocal);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp b/source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp
deleted file mode 100644
index 1a1d3b4006..0000000000
--- a/source/module_hamilt_lcao/module_deepks/v_delta_precalc_k.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-#ifdef __DEEPKS
-
-#include "LCAO_deepks.h"
-#include "LCAO_deepks_io.h" 
-#include "module_base/blas_connector.h"
-#include "module_base/constants.h"
-#include "module_base/libm/libm.h"
-#include "module_base/parallel_reduce.h"
-#include "module_hamilt_lcao/module_hcontainer/atom_pair.h"
-#include "module_parameter/parameter.h"
-
-
-// calculates v_delta_precalc[nks,nlocal,nlocal,NAt,NDscrpt] = gvdm * v_delta_pdm_shell;
-// v_delta_pdm_shell[nks,nlocal,nlocal,Inl,nm*nm] = overlap * overlap;
-// for deepks_v_delta = 1
-void LCAO_Deepks::cal_v_delta_precalc_k(const int nlocal,
-    const int nat,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver &GridD)
-{
-    ModuleBase::TITLE("LCAO_Deepks", "calc_v_delta_precalc");
-    // timeval t_start;
-    // gettimeofday(&t_start,NULL);
-
-    this->cal_gvdm(nat);
-    const double Rcut_Alpha = orb.Alpha[0].getRcut();
-    this->init_v_delta_pdm_shell(nks,nlocal); // multi-k
-   
-    for (int T0 = 0; T0 < ucell.ntype; T0++)
-    {
-		Atom* atom0 = &ucell.atoms[T0]; 
-        
-        for (int I0 =0; I0< atom0->na; I0++)
-        {
-            const int iat = ucell.itia2iat(T0,I0);
-            const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0);
-
-            for (int ad1=0; ad1<GridD.getAdjacentNum()+1 ; ++ad1)
-            {
-                const int T1 = GridD.getType(ad1);
-                const int I1 = GridD.getNatom(ad1);
-                const int ibt1 = ucell.itia2iat(T1, I1); 
-                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-                const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad1);
-				const Atom* atom1 = &ucell.atoms[T1];
-				const int nw1_tot = atom1->nw*PARAM.globalv.npol;
-				const double Rcut_AO1 = orb.Phi[T1].getRcut(); 
-
-                const double dist1 = (tau1-tau0).norm() * ucell.lat0;
-                if (dist1 >= Rcut_Alpha + Rcut_AO1)
-                {
-                    continue;
-                }
-
-                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x,
-                                                GridD.getBox(ad1).y,
-                                                GridD.getBox(ad1).z);
-                
-                key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z);
-                
-                if (this->nlm_save_k[iat].find(key_1)
-                    == this->nlm_save_k[iat].end()) 
-                {
-                    continue;
-                }
-
-				for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++)
-				{
-					const int T2 = GridD.getType(ad2);
-					const int I2 = GridD.getNatom(ad2);
-                    const int ibt2 = ucell.itia2iat(T2, I2);
-					const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
-					const ModuleBase::Vector3<double> tau2 = GridD.getAdjacentTau(ad2);
-					const Atom* atom2 = &ucell.atoms[T2];
-					const int nw2_tot = atom2->nw*PARAM.globalv.npol;
-					
-					const double Rcut_AO2 = orb.Phi[T2].getRcut();
-                	const double dist2 = (tau2-tau0).norm() * ucell.lat0;
-
-					if (dist2 >= Rcut_Alpha + Rcut_AO2)
-					{
-						continue;
-					}
-
-                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x,
-                                                    GridD.getBox(ad2).y,
-                                                    GridD.getBox(ad2).z);
-
-                    key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z);
-
-                    if (this->nlm_save_k[iat].find(key_2)
-                        == this->nlm_save_k[iat].end()) 
-                    {
-                        continue;
-                    }
-
-					for (int iw1=0; iw1<nw1_tot; ++iw1)
-					{
-						const int iw1_all = start1 + iw1; // this is \mu
-						const int iw1_local = pv->global2local_row(iw1_all);
-						if(iw1_local < 0) {continue;}
-						const int iw1_0 = iw1/PARAM.globalv.npol;
-						for (int iw2=0; iw2<nw2_tot; ++iw2)
-						{
-							const int iw2_all = start2 + iw2; // this is \nu
-							const int iw2_local = pv->global2local_col(iw2_all);
-							if(iw2_local < 0) {continue;}
-							const int iw2_0 = iw2/PARAM.globalv.npol;
-                            // Should use nlm_save_k, to be modified here!!!
-                            std::vector<double> nlm1 = this->nlm_save_k[iat][key_1][iw1][0];
-                            std::vector<double> nlm2 = this->nlm_save_k[iat][key_2][iw2][0];
-                            assert(nlm1.size()==nlm2.size());
-                            for (int ik = 0; ik < nks; ik++)
-                            {
-                                int ib=0;
-                                const double arg = - (kvec_d[ik] * (dR1-dR2) ) * ModuleBase::TWO_PI;
-                                double sinp, cosp;
-                                ModuleBase::libm::sincos(arg, &sinp, &cosp);
-                                const std::complex<double> kphase = std::complex<double>(cosp, sinp);
-                                for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
-                                {
-                                    for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
-                                    {
-                                        const int inl = this->inl_index[T0](I0, L0, N0);
-                                        const int nm = 2*L0+1;
-                                        
-                                        for (int m1=0; m1<nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
-                                        {
-                                            for (int m2=0; m2<nm; ++m2) // nm = 1 for s, 3 for p, 5 for d
-                                            {
-                                                v_delta_pdm_shell_complex[ik][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2]*kphase;
-                                            }
-                                        }
-
-                                        ib+=nm;
-                                    }
-                                }
-                            }
-                                                        
-						}//iw2
-					}//iw1
-				}//ad2
-			}//ad1   
-            
-        }
-    }
-
-#ifdef __MPI
-    const int mn_size=(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1);
-    for(int ik = 0; ik < nks; ik++)
-    {
-        for(int inl = 0; inl < this->inlmax; inl++)
-        {
-            for(int mu = 0; mu < nlocal ; mu++)
-            {
-                for(int nu=0; nu< nlocal ; nu++)
-                {
-                    Parallel_Reduce::reduce_all(this->v_delta_pdm_shell_complex[ik][mu][nu][inl],mn_size);
-                }
-            }
-        }
-    }
-#endif    
-    // transfer v_delta_pdm_shell to v_delta_pdm_shell_vector
-    
-    int nlmax = this->inlmax/nat;
-   
-    std::vector<torch::Tensor> v_delta_pdm_shell_vector;
-    for(int nl = 0; nl < nlmax; ++nl)
-    {
-        std::vector<torch::Tensor> kuuammv;
-        for(int iks = 0; iks < nks; ++iks)
-        {
-            std::vector<torch::Tensor> uuammv;
-            for(int mu = 0; mu < nlocal; ++mu)
-            {
-                std::vector<torch::Tensor> uammv;
-                for(int nu =0 ; nu < nlocal; ++nu)
-                {
-                    std::vector<torch::Tensor> ammv;
-                    for (int iat=0; iat<nat; ++iat)
-                    {
-                        int inl = iat*nlmax+nl;
-                        int nm = 2*this->inl_l[inl]+1;
-                        std::vector<std::complex<double>> mmv;
-                    
-                        for (int m1=0; m1<nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
-                        {
-                            for (int m2=0; m2<nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
-                            {
-                                mmv.push_back(this->v_delta_pdm_shell_complex[iks][mu][nu][inl][m1*nm+m2]);
-                            }
-                        }
-                        torch::Tensor mm = torch::from_blob(mmv.data(), {nm, nm}, torch::TensorOptions().dtype(torch::kComplexDouble)).clone();    //nm*nm
-                        ammv.push_back(mm);
-                    }
-                    torch::Tensor amm = torch::stack(ammv, 0); 
-                    uammv.push_back(amm);                    
-                }
-                torch::Tensor uamm = torch::stack(uammv, 0); 
-                uuammv.push_back(uamm);
-            }
-            torch::Tensor uuamm = torch::stack(uuammv, 0); 
-            kuuammv.push_back(uuamm);
-        }
-        torch::Tensor kuuamm = torch::stack(kuuammv, 0);  
-        v_delta_pdm_shell_vector.push_back(kuuamm);
-    }
-
-    assert(v_delta_pdm_shell_vector.size() == nlmax);
-    
-    //einsum for each nl: 
-    std::vector<torch::Tensor> v_delta_precalc_vector;
-    for (int nl = 0; nl<nlmax; ++nl)
-    {
-        torch::Tensor gevdm_vector_complex = this->gevdm_vector[nl].to(torch::kComplexDouble);
-        v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], gevdm_vector_complex}));
-    }
-
-    this->v_delta_precalc_tensor = torch::cat(v_delta_precalc_vector, -1);
-    this->del_v_delta_pdm_shell(nks,nlocal);
-
-    //check_v_delta_precalc(nlocal,nat);
-    // timeval t_end;
-    // gettimeofday(&t_end,NULL);
-    // std::cout<<"calculate v_delta_precalc time:\t"<<(double)(t_end.tv_sec-t_start.tv_sec) + (double)(t_end.tv_usec-t_start.tv_usec)/1000000.0<<std::endl;
-    return;
-}
-
-#endif
\ No newline at end of file

From 1f9779ded0bc91414219fcef587fad4880af172f Mon Sep 17 00:00:00 2001
From: Yu Liu <77716030+YuLiu98@users.noreply.github.com>
Date: Thu, 12 Dec 2024 16:41:22 +0800
Subject: [PATCH 5/7] Refactor: remove GlobalC::GridD (#5720)

* Refactor: dd TITLE and timer::tick for esolver

* Refactor: remove GlobalC::GridD

* convert tool_title.cpp

* remove useless destructor and flag to enable the default move assignment

---------

Co-authored-by: maki49 <1579492865@qq.com>
---
 source/module_base/tool_title.cpp             |   4 +-
 .../module_neighbor/sltk_atom_arrange.cpp     |  28 +--
 .../module_neighbor/sltk_atom_arrange.h       |  11 -
 .../module_cell/module_neighbor/sltk_grid.cpp |  29 +--
 .../module_cell/module_neighbor/sltk_grid.h   |  32 +--
 .../module_neighbor/sltk_grid_driver.cpp      |   4 -
 .../module_neighbor/sltk_grid_driver.h        |   9 +-
 .../test/sltk_atom_arrange_test.cpp           |   1 -
 .../module_neighbor/test/sltk_grid_test.cpp   |   4 -
 .../module_dm/test/test_dm_R_init.cpp         |   2 +-
 source/module_esolver/esolver_fp.cpp          |   2 +
 source/module_esolver/esolver_gets.cpp        |  14 +-
 source/module_esolver/esolver_ks.cpp          |   6 +-
 source/module_esolver/esolver_ks_lcao.cpp     | 196 ++++++++-------
 source/module_esolver/esolver_ks_lcao.h       |   2 +
 .../module_esolver/esolver_ks_lcao_tddft.cpp  |   6 +
 source/module_esolver/esolver_ks_pw.cpp       |   8 +
 source/module_esolver/esolver_lj.cpp          |   9 -
 source/module_esolver/esolver_of.cpp          |  10 +
 source/module_esolver/esolver_sdft_pw.cpp     |  10 +
 source/module_esolver/lcao_before_scf.cpp     |  19 +-
 source/module_esolver/lcao_others.cpp         |  22 +-
 .../module_hamilt_lcao/hamilt_lcaodft/FORCE.h |   2 +
 .../hamilt_lcaodft/FORCE_STRESS.cpp           | 172 ++++++-------
 .../hamilt_lcaodft/FORCE_STRESS.h             |   2 +
 .../hamilt_lcaodft/FORCE_gamma.cpp            |  35 +--
 .../hamilt_lcaodft/FORCE_k.cpp                |  40 ++--
 .../hamilt_lcaodft/hamilt_lcao.cpp            |   2 +-
 .../operator_lcao/deepks_lcao.cpp             |  31 +--
 .../operator_lcao/deepks_lcao.h               |   2 +
 .../operator_lcao/test/tmp_mocks.cpp          |   3 +-
 .../hamilt_lcaodft/spar_dh.cpp                |  25 +-
 .../hamilt_lcaodft/spar_st.cpp                |   2 +-
 source/module_hamilt_lcao/module_dftu/dftu.h  |  86 ++++---
 .../module_dftu/dftu_folding.cpp              |  56 +++--
 .../module_dftu/dftu_force.cpp                |  20 +-
 source/module_io/berryphase.cpp               |   8 +-
 source/module_io/berryphase.h                 |   6 +-
 source/module_io/cal_r_overlap_R.cpp          |  14 +-
 source/module_io/cal_r_overlap_R.h            |   6 +-
 source/module_io/output_mat_sparse.cpp        |   2 +-
 source/module_io/output_mulliken.h            |  33 ++-
 source/module_io/td_current_io.cpp            |   3 +-
 source/module_io/td_current_io.h              |   1 +
 source/module_io/to_wannier90_lcao.cpp        |  19 +-
 source/module_io/to_wannier90_lcao.h          |   4 +-
 source/module_io/unk_overlap_lcao.cpp         |  18 +-
 source/module_io/unk_overlap_lcao.h           |   2 +-
 source/module_lr/esolver_lrtd_lcao.cpp        | 126 +++++++---
 source/module_lr/esolver_lrtd_lcao.h          |   1 +
 .../operator_casida/operator_lr_hxc.cpp       |   2 +-
 source/module_rdmft/rdmft.cpp                 |  18 +-
 source/module_rdmft/rdmft.h                   |  16 +-
 source/module_rdmft/rdmft_pot.cpp             | 226 ++++++++----------
 source/module_rdmft/update_state_rdmft.cpp    |   4 +-
 55 files changed, 708 insertions(+), 707 deletions(-)

diff --git a/source/module_base/tool_title.cpp b/source/module_base/tool_title.cpp
index e55f966f27..6d2716670e 100644
--- a/source/module_base/tool_title.cpp
+++ b/source/module_base/tool_title.cpp
@@ -19,7 +19,7 @@ void TITLE(const std::string &class_name,const std::string &function_name,const
 {
     if (disable)
     {
-        return;//no output
+        return; // no output
     }
 #ifdef __NORMAL
     std::cout<<" ==> "<<class_name<<"::"<<function_name<<"\t"
@@ -39,7 +39,7 @@ void TITLE(std::ofstream &ofs,const std::string &class_name,const std::string &f
 {
     if (disable)
     {
-        return;//no output
+        return; // no output
     }
 #ifdef __NORMAL
     std::cout<<"\n\n ==> "<<class_name<<"::"<<function_name<<"\t"
diff --git a/source/module_cell/module_neighbor/sltk_atom_arrange.cpp b/source/module_cell/module_neighbor/sltk_atom_arrange.cpp
index d4d020c5ff..8c259794f0 100644
--- a/source/module_cell/module_neighbor/sltk_atom_arrange.cpp
+++ b/source/module_cell/module_neighbor/sltk_atom_arrange.cpp
@@ -162,30 +162,4 @@ void atom_arrange::search(
     }
 
     return;
-}
-
-
-//2015-05-07
-void atom_arrange::delete_vector(
-	std::ofstream &ofs_in,
-	const bool pbc_flag, // GlobalV::SEARCH_PBC
-	Grid_Driver &grid_d, 
-	const UnitCell &ucell, 
-	const double &search_radius_bohr, 
-	const int &test_atom_in)
-{
-	const double radius_lat0unit2 = search_radius_bohr / ucell.lat0;
-
-	Atom_input at2(
-		ofs_in,
-		ucell, 
-		ucell.nat, 
-		ucell.ntype, 
-		pbc_flag, 
-		radius_lat0unit2, 
-		test_atom_in);
-
-	grid_d.delete_vector(at2.getGrid_layerX_minus(),at2.getGrid_layerY_minus(),at2.getGrid_layerZ_minus());
-
-	grid_d.delete_Cell();
-}
+}
\ No newline at end of file
diff --git a/source/module_cell/module_neighbor/sltk_atom_arrange.h b/source/module_cell/module_neighbor/sltk_atom_arrange.h
index 14a2771487..1d263adb3f 100644
--- a/source/module_cell/module_neighbor/sltk_atom_arrange.h
+++ b/source/module_cell/module_neighbor/sltk_atom_arrange.h
@@ -30,17 +30,6 @@ class atom_arrange
 		const double& rcutmax_Beta, 
 		const bool gamma_only_local);
 
-	//2015-05-07
-	static void delete_vector(
-		std::ofstream &ofs_in,
-		const bool pbc_flag,
-		Grid_Driver &grid_d, 
-		const UnitCell &ucell, 
-		const double &search_radius_bohr, 
-		const int &test_atom_in);
-
-private:
-
 };
 
 #endif
diff --git a/source/module_cell/module_neighbor/sltk_grid.cpp b/source/module_cell/module_neighbor/sltk_grid.cpp
index 117a5281e1..52d9cdb760 100644
--- a/source/module_cell/module_neighbor/sltk_grid.cpp
+++ b/source/module_cell/module_neighbor/sltk_grid.cpp
@@ -16,20 +16,9 @@ CellSet::CellSet()
     in_grid[2] = 0;
 }
 
-Grid::Grid(const int& test_grid_in) : test_grid(test_grid_in)
-{
-    //	ModuleBase::TITLE("Grid","Grid");
-    //----------------------------------------------------------
-    // EXPLAIN : init_cell_flag (use this flag in case memory
-    // leak)
-    //----------------------------------------------------------
-    init_cell_flag = false;
-}
+Grid::Grid(const int& test_grid_in) : test_grid(test_grid_in) {}
 
-Grid::~Grid()
-{
-    this->delete_Cell();
-}
+Grid::~Grid() {}
 
 void Grid::init(std::ofstream& ofs_in, const UnitCell& ucell, const Atom_input& input)
 {
@@ -49,7 +38,6 @@ void Grid::setMemberVariables(std::ofstream& ofs_in, //  output data to ofs
 {
     ModuleBase::TITLE("SLTK_Grid", "setMemberVariables");
 
-    this->delete_Cell();
     // mohan add 2010-09-05
     // AdjacentSet::call_times = 0;
 
@@ -96,8 +84,6 @@ void Grid::setMemberVariables(std::ofstream& ofs_in, //  output data to ofs
             Cell[i][j].resize(cell_nz);
         }
     }
-    this->init_cell_flag = true;
-
     this->true_cell_x = input.getGrid_layerX_minus();
     this->true_cell_y = input.getGrid_layerY_minus();
     this->true_cell_z = input.getGrid_layerZ_minus();
@@ -508,14 +494,3 @@ void Grid::Construct_Adjacent_final(const int i,
         fatom1.addAdjacent(fatom2);
     }
 }
-// 2015-05-07
-void Grid::delete_vector(int i, int j, int k)
-{
-    if (expand_flag)
-    {
-        if (this->pbc)
-        {
-            this->Cell[i][j][k].atom_map.clear();
-        }
-    }
-}
diff --git a/source/module_cell/module_neighbor/sltk_grid.h b/source/module_cell/module_neighbor/sltk_grid.h
index 56a8b6ee3c..6b6883abe8 100644
--- a/source/module_cell/module_neighbor/sltk_grid.h
+++ b/source/module_cell/module_neighbor/sltk_grid.h
@@ -41,10 +41,9 @@ class Grid
     Grid(const int& test_grid_in);
     virtual ~Grid();
 
-    void init(std::ofstream& ofs, const UnitCell& ucell, const Atom_input& input);
+    Grid& operator=(Grid&&) = default;
 
-    // 2015-05-07
-    void delete_vector(int i, int j, int k);
+    void init(std::ofstream& ofs, const UnitCell& ucell, const Atom_input& input);
 
     // Data
     bool pbc; // periodic boundary condition
@@ -64,28 +63,7 @@ class Grid
     int true_cell_z;
 
     std::vector<std::vector<std::vector<CellSet>>> Cell; // dx , dy ,dz is cell number in each direction,respectly.
-    void delete_Cell()                                   // it will replace by container soon!
-    {
-        if (this->init_cell_flag)
-        {
-            for (int i = 0; i < this->cell_nx; i++)
-            {
-                for (int j = 0; j < this->cell_ny; j++)
-                {
-                    this->Cell[i][j].clear();
-                }
-            }
-
-            for (int i = 0; i < this->cell_nx; i++)
-            {
-                this->Cell[i].clear();
-            }
-
-            this->Cell.clear();
-            this->init_cell_flag = false;
-        }
-    }
-    bool init_cell_flag = false;
+
     // LiuXh add 2019-07-15
     double getD_minX() const
     {
@@ -125,8 +103,8 @@ class Grid
         return true_cell_z;
     }
 
-  private:
-    const int test_grid;
+private:
+    int test_grid = 0;
     //==========================================================
     // MEMBER FUNCTIONS :
     // Three Main Steps:
diff --git a/source/module_cell/module_neighbor/sltk_grid_driver.cpp b/source/module_cell/module_neighbor/sltk_grid_driver.cpp
index 8d770ff1f9..b4f3616fed 100644
--- a/source/module_cell/module_neighbor/sltk_grid_driver.cpp
+++ b/source/module_cell/module_neighbor/sltk_grid_driver.cpp
@@ -8,10 +8,6 @@
 #include <omp.h>
 #endif
 
-namespace GlobalC
-{
-Grid_Driver GridD;
-}
 Grid_Driver::Grid_Driver(
 	const int &test_d_in, 
 	const int &test_grid_in)
diff --git a/source/module_cell/module_neighbor/sltk_grid_driver.h b/source/module_cell/module_neighbor/sltk_grid_driver.h
index fbc683cade..4ea18c70de 100644
--- a/source/module_cell/module_neighbor/sltk_grid_driver.h
+++ b/source/module_cell/module_neighbor/sltk_grid_driver.h
@@ -55,6 +55,8 @@ class Grid_Driver : public Grid
 
     ~Grid_Driver();
 
+    Grid_Driver& operator=(Grid_Driver&&) = default;
+
     //==========================================================
     // EXPLAIN FOR default parameter `adjs = nullptr`
     //
@@ -103,7 +105,7 @@ class Grid_Driver : public Grid
   private:
     mutable AdjacentAtomInfo adj_info;
 
-    const int test_deconstructor; // caoyu reconst 2021-05-24
+    int test_deconstructor = 0;
 
     //==========================================================
     // MEMBER FUNCTIONS :
@@ -125,9 +127,4 @@ class Grid_Driver : public Grid
                                                         const short box_y,
                                                         const short box_z) const;
 };
-
-namespace GlobalC
-{
-extern Grid_Driver GridD;
-}
 #endif
diff --git a/source/module_cell/module_neighbor/test/sltk_atom_arrange_test.cpp b/source/module_cell/module_neighbor/test/sltk_atom_arrange_test.cpp
index 369c332bf1..31709e540a 100644
--- a/source/module_cell/module_neighbor/test/sltk_atom_arrange_test.cpp
+++ b/source/module_cell/module_neighbor/test/sltk_atom_arrange_test.cpp
@@ -52,7 +52,6 @@ Magnetism::~Magnetism()
 void SetGlobalV()
 {
     PARAM.input.test_grid = false;
-    PARAM.input.test_deconstructor = false;
 }
 
 class SltkAtomArrangeTest : public testing::Test
diff --git a/source/module_cell/module_neighbor/test/sltk_grid_test.cpp b/source/module_cell/module_neighbor/test/sltk_grid_test.cpp
index af9492e4bc..25b5cf7204 100644
--- a/source/module_cell/module_neighbor/test/sltk_grid_test.cpp
+++ b/source/module_cell/module_neighbor/test/sltk_grid_test.cpp
@@ -84,7 +84,6 @@ TEST_F(SltkGridTest, Init)
     Atom_input Atom_inp(ofs, *ucell, ucell->nat, ucell->ntype, pbc, radius, test_atom_in);
     Grid LatGrid(PARAM.input.test_grid);
     LatGrid.init(ofs, *ucell, Atom_inp);
-    EXPECT_TRUE(LatGrid.init_cell_flag);
     EXPECT_EQ(LatGrid.getCellX(), 11);
     EXPECT_EQ(LatGrid.getCellY(), 11);
     EXPECT_EQ(LatGrid.getCellZ(), 11);
@@ -126,9 +125,6 @@ TEST_F(SltkGridTest, InitSmall)
     EXPECT_EQ(LatGrid.cell_nx, 4);
     EXPECT_EQ(LatGrid.cell_ny, 4);
     EXPECT_EQ(LatGrid.cell_nz, 4);
-    // init cell flag
-    EXPECT_TRUE(LatGrid.init_cell_flag);
-
     ofs.close();
     remove("test.out");
 }
diff --git a/source/module_elecstate/module_dm/test/test_dm_R_init.cpp b/source/module_elecstate/module_dm/test/test_dm_R_init.cpp
index ac2d7161b1..ef4ccef8ce 100644
--- a/source/module_elecstate/module_dm/test/test_dm_R_init.cpp
+++ b/source/module_elecstate/module_dm/test/test_dm_R_init.cpp
@@ -100,7 +100,7 @@ class DMTest : public testing::Test
 #endif
 };
 
-// test for construct DMR from GlobalC::GridD and UnitCell
+// test for construct DMR from GridD and UnitCell
 TEST_F(DMTest, DMInit1)
 {
     // initalize a kvectors
diff --git a/source/module_esolver/esolver_fp.cpp b/source/module_esolver/esolver_fp.cpp
index ba5770414f..94daacd37d 100644
--- a/source/module_esolver/esolver_fp.cpp
+++ b/source/module_esolver/esolver_fp.cpp
@@ -129,6 +129,8 @@ void ESolver_FP::before_all_runners(UnitCell& ucell, const Input_para& inp)
 //! Something to do after SCF iterations when SCF is converged or comes to the max iter step.
 void ESolver_FP::after_scf(UnitCell& ucell, const int istep)
 {
+    ModuleBase::TITLE("ESolver_FP", "after_scf");
+
     // 0) output convergence information
     ModuleIO::output_convergence_after_scf(this->conv_esolver, this->pelec->f_en.etot);
 
diff --git a/source/module_esolver/esolver_gets.cpp b/source/module_esolver/esolver_gets.cpp
index 3cdfcde99a..cf51f6bbbe 100644
--- a/source/module_esolver/esolver_gets.cpp
+++ b/source/module_esolver/esolver_gets.cpp
@@ -94,15 +94,17 @@ void ESolver_GetS::runner(UnitCell& ucell, const int istep)
                                             ucell.infoNL.get_rcutmax_Beta(),
                                             PARAM.globalv.gamma_only_local);
 
+    Grid_Driver gd;
+
     atom_arrange::search(PARAM.inp.search_pbc,
                          GlobalV::ofs_running,
-                         GlobalC::GridD,
+                         gd,
                          ucell,
                          search_radius,
                          PARAM.inp.test_atom_input);
 
     Record_adj RA;
-    RA.for_2d(ucell, GlobalC::GridD, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs());
+    RA.for_2d(ucell, gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs());
 
     if (this->p_hamilt == nullptr)
     {
@@ -110,7 +112,7 @@ void ESolver_GetS::runner(UnitCell& ucell, const int istep)
         {
             this->p_hamilt
                 = new hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>(ucell,
-                                                                                     GlobalC::GridD,
+                                                                                     gd,
                                                                                      &this->pv,
                                                                                      this->kv,
                                                                                      *(two_center_bundle_.overlap_orb),
@@ -121,7 +123,7 @@ void ESolver_GetS::runner(UnitCell& ucell, const int istep)
         else
         {
             this->p_hamilt = new hamilt::HamiltLCAO<std::complex<double>, double>(ucell,
-                                                                                  GlobalC::GridD,
+                                                                                  gd,
                                                                                   &this->pv,
                                                                                   this->kv,
                                                                                   *(two_center_bundle_.overlap_orb),
@@ -132,13 +134,13 @@ void ESolver_GetS::runner(UnitCell& ucell, const int istep)
 
     const std::string fn = PARAM.globalv.global_out_dir + "SR.csr";
     std::cout << " The file is saved in " << fn << std::endl;
-    ModuleIO::output_SR(pv, GlobalC::GridD, this->p_hamilt, fn);
+    ModuleIO::output_SR(pv, gd, this->p_hamilt, fn);
 
     if (PARAM.inp.out_mat_r)
     {
         cal_r_overlap_R r_matrix;
         r_matrix.init(ucell,pv, orb_);
-        r_matrix.out_rR(ucell,istep);
+        r_matrix.out_rR(ucell, gd, istep);
     }
 
     ModuleBase::timer::tick("ESolver_GetS", "runner");
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index a79ab77493..e644d19e0a 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -427,9 +427,7 @@ void ESolver_KS<T, Device>::runner(UnitCell& ucell, const int istep)
     ModuleBase::timer::tick(this->classname, "runner");
 
     // 2) before_scf (electronic iteration loops)
-    ModuleBase::timer::tick(this->classname, "before_scf");
     this->before_scf(ucell, istep);
-    ModuleBase::timer::tick(this->classname, "before_scf");
 
     // 3) write charge density
     if (PARAM.inp.dm_to_rho)
@@ -468,9 +466,7 @@ void ESolver_KS<T, Device>::runner(UnitCell& ucell, const int istep)
     } // end scf iterations
 
     // 9) after scf
-    ModuleBase::timer::tick(this->classname, "after_scf");
     this->after_scf(ucell, istep);
-    ModuleBase::timer::tick(this->classname, "after_scf");
 
     ModuleBase::timer::tick(this->classname, "runner");
     return;
@@ -740,6 +736,8 @@ void ESolver_KS<T, Device>::iter_finish(UnitCell& ucell, const int istep, int& i
 template <typename T, typename Device>
 void ESolver_KS<T, Device>::after_scf(UnitCell& ucell, const int istep)
 {
+    ModuleBase::TITLE("ESolver_KS", "after_scf");
+
     // 1) call after_scf() of ESolver_FP
     ESolver_FP::after_scf(ucell, istep);
 
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 3d089e568e..44e8105dc9 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -33,6 +33,7 @@
 //--------------temporary----------------------------
 #include "module_base/global_function.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_elecstate/cal_ux.h"
 #include "module_elecstate/module_charge/symmetry_rho.h"
 #include "module_elecstate/occupy.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" // need divide_HS_in_frag
@@ -40,7 +41,6 @@
 #include "module_hamilt_lcao/module_dftu/dftu.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_io/print_info.h"
-#include "module_elecstate/cal_ux.h"
 
 #include <memory>
 #ifdef __EXX
@@ -65,6 +65,7 @@
 
 // test RDMFT
 #include "module_rdmft/rdmft.h"
+
 #include <iostream>
 
 namespace ModuleESolver
@@ -164,7 +165,7 @@ void ESolver_KS_LCAO<TK, TR>::before_all_runners(UnitCell& ucell, const Input_pa
     // 5) initialize Hamilt in LCAO
     // * allocate H and S matrices according to computational resources
     // * set the 'trace' between local H/S and global H/S
-    LCAO_domain::divide_HS_in_frag(PARAM.globalv.gamma_only_local, ucell , pv, this->kv.get_nks(), orb_);
+    LCAO_domain::divide_HS_in_frag(PARAM.globalv.gamma_only_local, ucell, pv, this->kv.get_nks(), orb_);
 
 #ifdef __EXX
     // 6) initialize exx
@@ -178,12 +179,12 @@ void ESolver_KS_LCAO<TK, TR>::before_all_runners(UnitCell& ucell, const Input_pa
             // initialize 2-center radial tables for EXX-LRI
             if (GlobalC::exx_info.info_ri.real_number)
             {
-                this->exx_lri_double->init(MPI_COMM_WORLD, ucell,this->kv, orb_);
+                this->exx_lri_double->init(MPI_COMM_WORLD, ucell, this->kv, orb_);
                 this->exd->exx_before_all_runners(this->kv, ucell, this->pv);
             }
             else
             {
-                this->exx_lri_complex->init(MPI_COMM_WORLD, ucell,this->kv, orb_);
+                this->exx_lri_complex->init(MPI_COMM_WORLD, ucell, this->kv, orb_);
                 this->exc->exx_before_all_runners(this->kv, ucell, this->pv);
             }
         }
@@ -198,7 +199,7 @@ void ESolver_KS_LCAO<TK, TR>::before_all_runners(UnitCell& ucell, const Input_pa
     }
 
     // 8) initialize ppcell
-    this->ppcell.init_vloc(ucell,this->pw_rho);
+    this->ppcell.init_vloc(ucell, this->pw_rho);
     ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "LOCAL POTENTIAL");
 
     // 9) inititlize the charge density
@@ -257,10 +258,19 @@ void ESolver_KS_LCAO<TK, TR>::before_all_runners(UnitCell& ucell, const Input_pa
     }
 
     // 14) initialize rdmft, added by jghan
-    if( PARAM.inp.rdmft == true )
+    if (PARAM.inp.rdmft == true)
     {
-        rdmft_solver.init( this->GG, this->GK, this->pv, ucell, this->kv, *(this->pelec),
-                                this->orb_, two_center_bundle_, PARAM.inp.dft_functional, PARAM.inp.rdmft_power_alpha);
+        rdmft_solver.init(this->GG,
+                          this->GK,
+                          this->pv,
+                          ucell,
+                          this->gd,
+                          this->kv,
+                          *(this->pelec),
+                          this->orb_,
+                          two_center_bundle_,
+                          PARAM.inp.dft_functional,
+                          PARAM.inp.rdmft_power_alpha);
     }
 
     ModuleBase::timer::tick("ESolver_KS_LCAO", "before_all_runners");
@@ -296,6 +306,7 @@ void ESolver_KS_LCAO<TK, TR>::cal_force(UnitCell& ucell, ModuleBase::matrix& for
                        PARAM.inp.test_force,
                        PARAM.inp.test_stress,
                        ucell,
+                       this->gd,
                        this->pv,
                        this->pelec,
                        this->psi,
@@ -458,7 +469,7 @@ void ESolver_KS_LCAO<TK, TR>::after_all_runners(UnitCell& ucell)
                                     this->kv,
                                     orb_.cutoffs(),
                                     this->pelec->wg,
-                                    GlobalC::GridD
+                                    this->gd
 #ifdef __EXX
                                     ,
                                     this->exx_lri_double ? &this->exx_lri_double->Hexxs : nullptr,
@@ -484,7 +495,7 @@ void ESolver_KS_LCAO<TK, TR>::after_all_runners(UnitCell& ucell)
                                             this->GK,
                                             this->kv,
                                             this->pelec->wg,
-                                            GlobalC::GridD,
+                                            this->gd,
                                             orb_.cutoffs(),
                                             this->two_center_bundle_
 #ifdef __EXX
@@ -647,7 +658,7 @@ void ESolver_KS_LCAO<TK, TR>::iter_init(UnitCell& ucell, const int istep, const
             GlobalC::dftu.set_dmr(dynamic_cast<elecstate::ElecStateLCAO<TK>*>(this->pelec)->get_DM());
         }
         // Calculate U and J if Yukawa potential is used
-        GlobalC::dftu.cal_slater_UJ(ucell,this->pelec->charge->rho, this->pw_rho->nrxx);
+        GlobalC::dftu.cal_slater_UJ(ucell, this->pelec->charge->rho, this->pw_rho->nrxx);
     }
 
 #ifdef __DEEPKS
@@ -706,21 +717,21 @@ void ESolver_KS_LCAO<TK, TR>::hamilt2density_single(UnitCell& ucell, int istep,
     if (PARAM.inp.sc_mag_switch)
     {
         spinconstrain::SpinConstrain<TK>& sc = spinconstrain::SpinConstrain<TK>::getScInstance();
-        if(!sc.mag_converged() && this->drho>0 && this->drho < PARAM.inp.sc_scf_thr)
+        if (!sc.mag_converged() && this->drho > 0 && this->drho < PARAM.inp.sc_scf_thr)
         {
             // optimize lambda to get target magnetic moments, but the lambda is not near target
-            sc.run_lambda_loop(iter-1);
+            sc.run_lambda_loop(iter - 1);
             sc.set_mag_converged(true);
             skip_solve = true;
         }
-        else if(sc.mag_converged())
+        else if (sc.mag_converged())
         {
             // optimize lambda to get target magnetic moments, but the lambda is not near target
-            sc.run_lambda_loop(iter-1);
+            sc.run_lambda_loop(iter - 1);
             skip_solve = true;
         }
     }
-    if(!skip_solve)
+    if (!skip_solve)
     {
         hsolver::HSolverLCAO<TK> hsolver_lcao_obj(&(this->pv), PARAM.inp.ks_solver);
         hsolver_lcao_obj.solve(this->p_hamilt, this->psi[0], this->pelec, skip_charge);
@@ -772,7 +783,6 @@ void ESolver_KS_LCAO<TK, TR>::update_pot(UnitCell& ucell, const int istep, const
     {
         this->pelec->cal_converged();
     }
-
 }
 
 //------------------------------------------------------------------------------
@@ -800,9 +810,14 @@ void ESolver_KS_LCAO<TK, TR>::iter_finish(UnitCell& ucell, const int istep, int&
             {
                 const std::vector<std::vector<TK>>& tmp_dm
                     = dynamic_cast<elecstate::ElecStateLCAO<TK>*>(this->pelec)->get_DM()->get_DMK_vector();
-                ModuleDFTU::dftu_cal_occup_m(iter, ucell,tmp_dm, this->kv, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
+                ModuleDFTU::dftu_cal_occup_m(iter,
+                                             ucell,
+                                             tmp_dm,
+                                             this->kv,
+                                             this->p_chgmix->get_mixing_beta(),
+                                             this->p_hamilt);
             }
-            GlobalC::dftu.cal_energy_correction(ucell,istep);
+            GlobalC::dftu.cal_energy_correction(ucell, istep);
         }
         GlobalC::dftu.output(ucell);
     }
@@ -903,12 +918,14 @@ template <typename TK, typename TR>
 void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
 {
     ModuleBase::TITLE("ESolver_KS_LCAO", "after_scf");
+    ModuleBase::timer::tick("ESolver_KS_LCAO", "after_scf");
+
     // 1) calculate the kinetic energy density tau, sunliang 2024-09-18
     if (PARAM.inp.out_elf[0] > 0)
     {
         this->pelec->cal_tau(*(this->psi));
     }
-    
+
     //! 2) call after_scf() of ESolver_KS
     ESolver_KS<TK>::after_scf(ucell, istep);
 
@@ -978,27 +995,27 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
             if (PARAM.inp.out_mat_hs[0])
             {
                 ModuleIO::save_mat(istep,
-                                    h_mat.p,
-                                    PARAM.globalv.nlocal,
-                                    bit,
-                                    PARAM.inp.out_mat_hs[1],
-                                    1,
-                                    PARAM.inp.out_app_flag,
-                                    "H",
-                                    "data-" + std::to_string(ik),
-                                    this->pv,
-                                    GlobalV::DRANK);
+                                   h_mat.p,
+                                   PARAM.globalv.nlocal,
+                                   bit,
+                                   PARAM.inp.out_mat_hs[1],
+                                   1,
+                                   PARAM.inp.out_app_flag,
+                                   "H",
+                                   "data-" + std::to_string(ik),
+                                   this->pv,
+                                   GlobalV::DRANK);
                 ModuleIO::save_mat(istep,
-                                    s_mat.p,
-                                    PARAM.globalv.nlocal,
-                                    bit,
-                                    PARAM.inp.out_mat_hs[1],
-                                    1,
-                                    PARAM.inp.out_app_flag,
-                                    "S",
-                                    "data-" + std::to_string(ik),
-                                    this->pv,
-                                    GlobalV::DRANK);
+                                   s_mat.p,
+                                   PARAM.globalv.nlocal,
+                                   bit,
+                                   PARAM.inp.out_mat_hs[1],
+                                   1,
+                                   PARAM.inp.out_app_flag,
+                                   "S",
+                                   "data-" + std::to_string(ik),
+                                   this->pv,
+                                   GlobalV::DRANK);
             }
         }
     }
@@ -1019,25 +1036,24 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
 #ifdef __DEEPKS
     if (this->psi != nullptr && (istep % PARAM.inp.out_interval == 0))
     {
-        hamilt::HamiltLCAO<TK, TR>* p_ham_deepks
-            = dynamic_cast<hamilt::HamiltLCAO<TK, TR>*>(this->p_hamilt);
+        hamilt::HamiltLCAO<TK, TR>* p_ham_deepks = dynamic_cast<hamilt::HamiltLCAO<TK, TR>*>(this->p_hamilt);
         std::shared_ptr<LCAO_Deepks> ld_shared_ptr(&GlobalC::ld, [](LCAO_Deepks*) {});
         LCAO_Deepks_Interface<TK, TR> LDI(ld_shared_ptr);
 
         ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels");
         LDI.out_deepks_labels(this->pelec->f_en.etot,
-                            this->pelec->klist->get_nks(),
-                            ucell.nat,
-                            PARAM.globalv.nlocal,
-                            this->pelec->ekb,
-                            this->pelec->klist->kvec_d,
-                            ucell,
-                            orb_,
-                            GlobalC::GridD,
-                            &(this->pv),
-                            *(this->psi),
-                            dynamic_cast<const elecstate::ElecStateLCAO<TK>*>(this->pelec)->get_DM(),
-                            p_ham_deepks);
+                              this->pelec->klist->get_nks(),
+                              ucell.nat,
+                              PARAM.globalv.nlocal,
+                              this->pelec->ekb,
+                              this->pelec->klist->kvec_d,
+                              ucell,
+                              orb_,
+                              this->gd,
+                              &(this->pv),
+                              *(this->psi),
+                              dynamic_cast<const elecstate::ElecStateLCAO<TK>*>(this->pelec)->get_DM(),
+                              p_ham_deepks);
 
         ModuleBase::timer::tick("ESolver_KS_LCAO", "out_deepks_labels");
     }
@@ -1045,29 +1061,28 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
 
     //! 9) Perform RDMFT calculations
     /******** test RDMFT *********/
-    if ( PARAM.inp.rdmft == true ) // rdmft, added by jghan, 2024-10-17
+    if (PARAM.inp.rdmft == true) // rdmft, added by jghan, 2024-10-17
     {
         ModuleBase::matrix occ_number_ks(this->pelec->wg);
-        for(int ik=0; ik < occ_number_ks.nr; ++ik) 
-        { 
-            for(int inb=0; inb < occ_number_ks.nc; ++inb)
+        for (int ik = 0; ik < occ_number_ks.nr; ++ik)
+        {
+            for (int inb = 0; inb < occ_number_ks.nc; ++inb)
             {
                 occ_number_ks(ik, inb) /= this->kv.wk[ik];
             }
         }
-        this->rdmft_solver.update_elec(ucell,occ_number_ks, *(this->psi));
+        this->rdmft_solver.update_elec(ucell, occ_number_ks, *(this->psi));
 
-        //! initialize the gradients of Etotal with respect to occupation numbers and wfc, 
-        //! and set all elements to 0. 
+        //! initialize the gradients of Etotal with respect to occupation numbers and wfc,
+        //! and set all elements to 0.
         ModuleBase::matrix dE_dOccNum(this->pelec->wg.nr, this->pelec->wg.nc, true);
-        psi::Psi<TK> dE_dWfc(this->psi->get_nk(), this->psi->get_nbands(), this->psi->get_nbasis()); 
+        psi::Psi<TK> dE_dWfc(this->psi->get_nk(), this->psi->get_nbands(), this->psi->get_nbasis());
         dE_dWfc.zero_out();
 
         double Etotal_RDMFT = this->rdmft_solver.run(dE_dOccNum, dE_dWfc);
     }
     /******** test RDMFT *********/
 
-
 #ifdef __EXX
     // 10) Write RPA information.
     if (PARAM.inp.rpa)
@@ -1081,7 +1096,7 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
                                        this->kv,
                                        orb_);
         rpa_lri_double.init(MPI_COMM_WORLD, this->kv, orb_.cutoffs());
-        rpa_lri_double.out_for_RPA(ucell,this->pv, *(this->psi), this->pelec);
+        rpa_lri_double.out_for_RPA(ucell, this->pv, *(this->psi), this->pelec);
     }
 #endif
 
@@ -1134,19 +1149,28 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
                                     two_center_bundle_,
                                     orb_,
                                     ucell,
-                                    GlobalC::GridD,
+                                    this->gd,
                                     this->kv,
                                     this->p_hamilt);
-        
+
         //! Perform Mulliken charge analysis
         if (PARAM.inp.out_mul)
         {
-            ModuleIO::cal_mag(&(this->pv), this->p_hamilt, this->kv, this->pelec, this->two_center_bundle_, this->orb_, ucell, istep, true);
+            ModuleIO::cal_mag(&(this->pv),
+                              this->p_hamilt,
+                              this->kv,
+                              this->pelec,
+                              this->two_center_bundle_,
+                              this->orb_,
+                              ucell,
+                              this->gd,
+                              istep,
+                              true);
         }
     }
 
     //! 14) Print out atomic magnetization only when 'spin_constraint' is on.
-    if (PARAM.inp.sc_mag_switch) 
+    if (PARAM.inp.sc_mag_switch)
     {
         spinconstrain::SpinConstrain<TK>& sc = spinconstrain::SpinConstrain<TK>::getScInstance();
         sc.cal_mi_lcao(istep);
@@ -1154,7 +1178,7 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
         sc.print_Mag_Force(GlobalV::ofs_running);
     }
 
-    //! 15) Clean up RA. 
+    //! 15) Clean up RA.
     //! this should be last function and put it in the end, mohan request 2024-11-28
     if (!PARAM.inp.cal_force && !PARAM.inp.cal_stress)
     {
@@ -1187,7 +1211,7 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
                                                                     &hR,
                                                                     &ucell,
                                                                     orb_.cutoffs(),
-                                                                    &GlobalC::GridD,
+                                                                    &this->gd,
                                                                     two_center_bundle_.kinetic_orb.get());
 
         const int nspin_k = (PARAM.inp.nspin == 2 ? 2 : 1);
@@ -1226,12 +1250,12 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
                                              PARAM.inp.wannier_spin);
             myWannier.set_tpiba_omega(ucell.tpiba, ucell.omega);
             myWannier.calculate(ucell,
-                                this->pelec->ekb, 
-                                this->pw_wfc, 
-                                this->pw_big, 
-                                this->sf, 
-                                this->kv, 
-                                this->psi, 
+                                this->pelec->ekb,
+                                this->pw_wfc,
+                                this->pw_big,
+                                this->sf,
+                                this->kv,
+                                this->psi,
                                 &(this->pv));
         }
         else if (PARAM.inp.wannier_method == 2)
@@ -1245,27 +1269,25 @@ void ESolver_KS_LCAO<TK, TR>::after_scf(UnitCell& ucell, const int istep)
                                        PARAM.inp.wannier_spin,
                                        orb_);
 
-            myWannier.calculate(ucell,this->pelec->ekb, this->kv, *(this->psi), &(this->pv));
+            myWannier.calculate(ucell, this->gd, this->pelec->ekb, this->kv, *(this->psi), &(this->pv));
         }
         std::cout << FmtCore::format(" >> Finish %s.\n * * * * * *\n", "Wave function to Wannier90");
     }
 
     //! 19) berry phase calculations, added by jingan
-    if (PARAM.inp.calculation == "nscf" && 
-        berryphase::berry_phase_flag && 
-        ModuleSymmetry::Symmetry::symm_flag != 1)
+    if (PARAM.inp.calculation == "nscf" && berryphase::berry_phase_flag && ModuleSymmetry::Symmetry::symm_flag != 1)
     {
         std::cout << FmtCore::format("\n * * * * * *\n << Start %s.\n", "Berry phase calculation");
         berryphase bp(&(this->pv));
-        bp.lcao_init(ucell,
-                     this->kv,
-                     this->GridT,
-                     orb_); // additional step before calling
-                            // macroscopic_polarization (why capitalize
-                            // the function name?)
-        bp.Macroscopic_polarization(ucell,this->pw_wfc->npwk_max, this->psi, this->pw_rho, this->pw_wfc, this->kv);
+        bp.lcao_init(ucell, this->gd, this->kv, this->GridT, orb_);
+        // additional step before calling
+        // macroscopic_polarization (why capitalize
+        // the function name?)
+        bp.Macroscopic_polarization(ucell, this->pw_wfc->npwk_max, this->psi, this->pw_rho, this->pw_wfc, this->kv);
         std::cout << FmtCore::format(" >> Finish %s.\n * * * * * *\n", "Berry phase calculation");
     }
+
+    ModuleBase::timer::tick("ESolver_KS_LCAO", "after_scf");
 }
 
 template class ESolver_KS_LCAO<double, double>;
diff --git a/source/module_esolver/esolver_ks_lcao.h b/source/module_esolver/esolver_ks_lcao.h
index 9c7e8be612..1730f04a36 100644
--- a/source/module_esolver/esolver_ks_lcao.h
+++ b/source/module_esolver/esolver_ks_lcao.h
@@ -58,6 +58,8 @@ class ESolver_KS_LCAO : public ESolver_KS<TK> {
     // we will get rid of this class soon, don't use it, mohan 2024-03-28
     Record_adj RA;
 
+    Grid_Driver gd;
+
     // 2d block-cyclic distribution info
     Parallel_Orbitals pv;
 
diff --git a/source/module_esolver/esolver_ks_lcao_tddft.cpp b/source/module_esolver/esolver_ks_lcao_tddft.cpp
index 880daa8512..fc1ba66bc6 100644
--- a/source/module_esolver/esolver_ks_lcao_tddft.cpp
+++ b/source/module_esolver/esolver_ks_lcao_tddft.cpp
@@ -281,6 +281,9 @@ void ESolver_KS_LCAO_TDDFT::update_pot(UnitCell& ucell, const int istep, const i
 
 void ESolver_KS_LCAO_TDDFT::after_scf(UnitCell& ucell, const int istep)
 {
+    ModuleBase::TITLE("ESolver_KS_LCAO_TDDFT", "after_scf");
+    ModuleBase::timer::tick("ESolver_KS_LCAO_TDDFT", "after_scf");
+
     for (int is = 0; is < PARAM.inp.nspin; is++)
     {
         if (module_tddft::Evolve_elec::out_dipole == 1)
@@ -296,6 +299,7 @@ void ESolver_KS_LCAO_TDDFT::after_scf(UnitCell& ucell, const int istep)
             = dynamic_cast<elecstate::ElecStateLCAO<std::complex<double>>*>(this->pelec)->get_DM();
 
         ModuleIO::write_current(ucell,
+                                this->gd,
                                 istep,
                                 this->psi,
                                 pelec,
@@ -306,6 +310,8 @@ void ESolver_KS_LCAO_TDDFT::after_scf(UnitCell& ucell, const int istep)
                                 this->RA);
     }
     ESolver_KS_LCAO<std::complex<double>, double>::after_scf(ucell, istep);
+
+    ModuleBase::timer::tick("ESolver_KS_LCAO_TDDFT", "after_scf");
 }
 
 void ESolver_KS_LCAO_TDDFT::weight_dm_rho()
diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp
index d57c4f5ffc..385cf27621 100644
--- a/source/module_esolver/esolver_ks_pw.cpp
+++ b/source/module_esolver/esolver_ks_pw.cpp
@@ -247,6 +247,7 @@ template <typename T, typename Device>
 void ESolver_KS_PW<T, Device>::before_scf(UnitCell& ucell, const int istep)
 {
     ModuleBase::TITLE("ESolver_KS_PW", "before_scf");
+    ModuleBase::timer::tick("ESolver_KS_PW", "before_scf");
 
     //! 1) call before_scf() of ESolver_KS
     ESolver_KS<T, Device>::before_scf(ucell, istep);
@@ -427,6 +428,8 @@ void ESolver_KS_PW<T, Device>::before_scf(UnitCell& ucell, const int istep)
             this->already_initpsi = true;
         }
     }
+
+    ModuleBase::timer::tick("ESolver_KS_PW", "before_scf");
 }
 
 template <typename T, typename Device>
@@ -647,6 +650,9 @@ void ESolver_KS_PW<T, Device>::iter_finish(UnitCell& ucell, const int istep, int
 template <typename T, typename Device>
 void ESolver_KS_PW<T, Device>::after_scf(UnitCell& ucell, const int istep)
 {
+    ModuleBase::TITLE("ESolver_KS_PW", "after_scf");
+    ModuleBase::timer::tick("ESolver_KS_PW", "after_scf");
+
     // 1) calculate the kinetic energy density tau, sunliang 2024-09-18
     if (PARAM.inp.out_elf[0] > 0)
     {
@@ -741,6 +747,8 @@ void ESolver_KS_PW<T, Device>::after_scf(UnitCell& ucell, const int istep)
         auto* onsite_p = projectors::OnsiteProjector<double, Device>::get_instance();
         onsite_p->cal_occupations(reinterpret_cast<psi::Psi<std::complex<double>, Device>*>(this->kspw_psi), this->pelec->wg);
     }
+
+    ModuleBase::timer::tick("ESolver_KS_PW", "after_scf");
 }
 
 template <typename T, typename Device>
diff --git a/source/module_esolver/esolver_lj.cpp b/source/module_esolver/esolver_lj.cpp
index 379aaced17..ccf8d33c30 100644
--- a/source/module_esolver/esolver_lj.cpp
+++ b/source/module_esolver/esolver_lj.cpp
@@ -88,15 +88,6 @@ void ESolver_LJ::runner(UnitCell& ucell, const int istep)
             lj_virial(i, j) /= (2.0 * ucell.omega);
         }
     }
-#ifdef __MPI
-        atom_arrange::delete_vector(
-            GlobalV::ofs_running,
-            PARAM.inp.search_pbc,
-            grid_neigh,
-            ucell, 
-            search_radius,
-            PARAM.inp.test_atom_input);
-#endif
     }
 
     double ESolver_LJ::cal_energy()
diff --git a/source/module_esolver/esolver_of.cpp b/source/module_esolver/esolver_of.cpp
index 32d4a90b2e..bdb24829e4 100644
--- a/source/module_esolver/esolver_of.cpp
+++ b/source/module_esolver/esolver_of.cpp
@@ -200,6 +200,9 @@ void ESolver_OF::runner(UnitCell& ucell, const int istep)
  */
 void ESolver_OF::before_opt(const int istep, UnitCell& ucell)
 {
+    ModuleBase::TITLE("ESolver_OF", "before_opt");
+    ModuleBase::timer::tick("ESolver_OF", "before_opt");
+
     //! 1) call before_scf() of ESolver_FP
     ESolver_FP::before_scf(ucell, istep);
 
@@ -299,6 +302,8 @@ void ESolver_OF::before_opt(const int istep, UnitCell& ucell)
     {
         this->theta_[0] = 0.2;
     }
+
+    ModuleBase::timer::tick("ESolver_OF", "before_opt");
 }
 
 /**
@@ -483,6 +488,9 @@ bool ESolver_OF::check_exit()
  */
 void ESolver_OF::after_opt(const int istep, UnitCell& ucell)
 {
+    ModuleBase::TITLE("ESolver_OF", "after_opt");
+    ModuleBase::timer::tick("ESolver_OF", "after_opt");
+
     // 1) calculate the kinetic energy density
     if (PARAM.inp.out_elf[0] > 0)
     {
@@ -491,6 +499,8 @@ void ESolver_OF::after_opt(const int istep, UnitCell& ucell)
 
     // 2) call after_scf() of ESolver_FP
     ESolver_FP::after_scf(ucell, istep);
+
+    ModuleBase::timer::tick("ESolver_OF", "after_opt");
 }
 
 /**
diff --git a/source/module_esolver/esolver_sdft_pw.cpp b/source/module_esolver/esolver_sdft_pw.cpp
index fc18e8e302..abeda884b1 100644
--- a/source/module_esolver/esolver_sdft_pw.cpp
+++ b/source/module_esolver/esolver_sdft_pw.cpp
@@ -94,6 +94,9 @@ void ESolver_SDFT_PW<T, Device>::before_all_runners(UnitCell& ucell, const Input
 template <typename T, typename Device>
 void ESolver_SDFT_PW<T, Device>::before_scf(UnitCell& ucell, const int istep)
 {
+    ModuleBase::TITLE("ESolver_SDFT_PW", "before_scf");
+    ModuleBase::timer::tick("ESolver_SDFT_PW", "before_scf");
+
     ESolver_KS_PW<T, Device>::before_scf(ucell, istep);
     delete reinterpret_cast<hamilt::HamiltPW<double>*>(this->p_hamilt);
     this->p_hamilt = new hamilt::HamiltSdftPW<T, Device>(this->pelec->pot,
@@ -110,6 +113,8 @@ void ESolver_SDFT_PW<T, Device>::before_scf(UnitCell& ucell, const int istep)
     {
         this->stowf.update_sto_orbitals(PARAM.inp.seed_sto);
     }
+
+    ModuleBase::timer::tick("ESolver_SDFT_PW", "before_scf");
 }
 
 template <typename T, typename Device>
@@ -122,8 +127,13 @@ void ESolver_SDFT_PW<T, Device>::iter_finish(UnitCell& ucell, const int istep, i
 template <typename T, typename Device>
 void ESolver_SDFT_PW<T, Device>::after_scf(UnitCell& ucell, const int istep)
 {
+    ModuleBase::TITLE("ESolver_SDFT_PW", "after_scf");
+    ModuleBase::timer::tick("ESolver_SDFT_PW", "after_scf");
+
     // 1) call after_scf() of ESolver_KS_PW
     ESolver_KS_PW<T, Device>::after_scf(ucell, istep);
+
+    ModuleBase::timer::tick("ESolver_SDFT_PW", "after_scf");
 }
 
 template <typename T, typename Device>
diff --git a/source/module_esolver/lcao_before_scf.cpp b/source/module_esolver/lcao_before_scf.cpp
index 2066b5069b..9eef0aafce 100644
--- a/source/module_esolver/lcao_before_scf.cpp
+++ b/source/module_esolver/lcao_before_scf.cpp
@@ -43,6 +43,7 @@ template <typename TK, typename TR>
 void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
 {
     ModuleBase::TITLE("ESolver_KS_LCAO", "before_scf");
+    ModuleBase::timer::tick("ESolver_KS_LCAO", "before_scf");
 
     //! 1) call before_scf() of ESolver_KS
     ESolver_KS<TK>::before_scf(ucell, istep);
@@ -80,7 +81,7 @@ void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
 
     atom_arrange::search(PARAM.inp.search_pbc,
                          GlobalV::ofs_running,
-                         GlobalC::GridD,
+                         this->gd,
                          ucell,
                          search_radius,
                          PARAM.inp.test_atom_input);
@@ -110,7 +111,7 @@ void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
                              this->pw_rho->nplane,
                              this->pw_rho->startz_current,
                              ucell,
-                             GlobalC::GridD,
+                             this->gd,
                              dr_uniform,
                              rcuts,
                              psi_u,
@@ -127,7 +128,7 @@ void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
     // (2)For each atom, calculate the adjacent atoms in different cells
     // and allocate the space for H(R) and S(R).
     // If k point is used here, allocate HlocR after atom_arrange.
-    this->RA.for_2d(ucell, GlobalC::GridD, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs());
+    this->RA.for_2d(ucell, this->gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs());
 
     // 2. density matrix extrapolation
 
@@ -189,7 +190,7 @@ void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
             PARAM.globalv.gamma_only_local ? &(this->GG) : nullptr,
             PARAM.globalv.gamma_only_local ? nullptr : &(this->GK),
             ucell,
-            GlobalC::GridD,
+            this->gd,
             &this->pv,
             this->pelec->pot,
             this->kv,
@@ -213,15 +214,11 @@ void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
     {
         const Parallel_Orbitals* pv = &this->pv;
         // build and save <psi(0)|alpha(R)> at beginning
-        GlobalC::ld.build_psialpha(PARAM.inp.cal_force,
-                                   ucell,
-                                   orb_,
-                                   GlobalC::GridD,
-                                   *(two_center_bundle_.overlap_orb_alpha));
+        GlobalC::ld.build_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd, *(two_center_bundle_.overlap_orb_alpha));
 
         if (PARAM.inp.deepks_out_unittest)
         {
-            GlobalC::ld.check_psialpha(PARAM.inp.cal_force, ucell, orb_, GlobalC::GridD);
+            GlobalC::ld.check_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd);
         }
     }
 #endif
@@ -347,6 +344,7 @@ void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
                                           1);
         }
 
+        ModuleBase::timer::tick("ESolver_KS_LCAO", "before_scf");
         return;
     }
 
@@ -377,6 +375,7 @@ void ESolver_KS_LCAO<TK, TR>::before_scf(UnitCell& ucell, const int istep)
                                 this->sf.strucFac); // add by jghan, 2024-03-16/2024-10-08
     }
 
+    ModuleBase::timer::tick("ESolver_KS_LCAO", "before_scf");
     return;
 }
 
diff --git a/source/module_esolver/lcao_others.cpp b/source/module_esolver/lcao_others.cpp
index fa82e38ff6..55ee33c593 100644
--- a/source/module_esolver/lcao_others.cpp
+++ b/source/module_esolver/lcao_others.cpp
@@ -65,7 +65,7 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
         double search_radius = PARAM.inp.search_radius;
         atom_arrange::search(PARAM.inp.search_pbc,
                              GlobalV::ofs_running,
-                             GlobalC::GridD,
+                             this->gd,
                              ucell,
                              search_radius,
                              PARAM.inp.test_atom_input,
@@ -84,7 +84,7 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
 
     atom_arrange::search(PARAM.inp.search_pbc,
                          GlobalV::ofs_running,
-                         GlobalC::GridD,
+                         this->gd,
                          ucell,
                          search_radius,
                          PARAM.inp.test_atom_input);
@@ -114,7 +114,7 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
                              this->pw_rho->nplane,
                              this->pw_rho->startz_current,
                              ucell,
-                             GlobalC::GridD,
+                             this->gd,
                              dr_uniform,
                              rcuts,
                              psi_u,
@@ -131,7 +131,7 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
     // (2)For each atom, calculate the adjacent atoms in different cells
     // and allocate the space for H(R) and S(R).
     // If k point is used here, allocate HlocR after atom_arrange.
-    this->RA.for_2d(ucell, GlobalC::GridD, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs());
+    this->RA.for_2d(ucell, this->gd, this->pv, PARAM.globalv.gamma_only_local, orb_.cutoffs());
 
     // 2. density matrix extrapolation
 
@@ -193,7 +193,7 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
             PARAM.globalv.gamma_only_local ? &(this->GG) : nullptr,
             PARAM.globalv.gamma_only_local ? nullptr : &(this->GK),
             ucell,
-            GlobalC::GridD,
+            this->gd,
             &this->pv,
             this->pelec->pot,
             this->kv,
@@ -217,15 +217,11 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
     {
         const Parallel_Orbitals* pv = &this->pv;
         // build and save <psi(0)|alpha(R)> at beginning
-        GlobalC::ld.build_psialpha(PARAM.inp.cal_force,
-                                   ucell,
-                                   orb_,
-                                   GlobalC::GridD,
-                                   *(two_center_bundle_.overlap_orb_alpha));
+        GlobalC::ld.build_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd, *(two_center_bundle_.overlap_orb_alpha));
 
         if (PARAM.inp.deepks_out_unittest)
         {
-            GlobalC::ld.check_psialpha(PARAM.inp.cal_force, ucell, orb_, GlobalC::GridD);
+            GlobalC::ld.check_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd);
         }
     }
 #endif
@@ -283,7 +279,7 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
                       PARAM.globalv.global_out_dir,
                       GlobalV::ofs_warning,
                       &ucell,
-                      &GlobalC::GridD,
+                      &this->gd,
                       this->kv);
         }
         else
@@ -312,7 +308,7 @@ void ESolver_KS_LCAO<TK, TR>::others(UnitCell& ucell, const int istep)
                       PARAM.globalv.global_out_dir,
                       GlobalV::ofs_warning,
                       &ucell,
-                      &GlobalC::GridD,
+                      &this->gd,
                       this->kv,
                       PARAM.inp.if_separate_k,
                       &GlobalC::Pgrid,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h
index 171c94a5d8..3f6fc8f01d 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h
@@ -51,6 +51,7 @@ class Force_LCAO
                 const bool isstress,
                 ForceStressArrays& fsr, // mohan add 2024-06-16
                 const UnitCell& ucell,
+                Grid_Driver& gd,
                 const psi::Psi<T>* psi,
                 const elecstate::ElecState* pelec,
                 ModuleBase::matrix& foverlap,
@@ -73,6 +74,7 @@ class Force_LCAO
 
     // get the ds, dt, dvnl.
     void allocate(const UnitCell& ucell,
+                  Grid_Driver& gd,
                   const Parallel_Orbitals& pv,
                   ForceStressArrays& fsr, // mohan add 2024-06-15
                   const TwoCenterBundle& two_center_bundle,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
index 4a262da0e4..c740a96550 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
@@ -1,26 +1,26 @@
 #include "FORCE_STRESS.h"
 
-#include "module_parameter/parameter.h"
 #include "module_hamilt_lcao/module_dftu/dftu.h" //Quxin add for DFT+U on 20201029
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_io/output_log.h"
+#include "module_parameter/parameter.h"
 // new
 #include "module_base/timer.h"
-#include "module_parameter/parameter.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
 #include "module_elecstate/potentials/efield.h"           // liuyu add 2022-05-18
 #include "module_elecstate/potentials/gatefield.h"        // liuyu add 2022-09-13
 #include "module_hamilt_general/module_surchem/surchem.h" //sunml add 2022-08-10
 #include "module_hamilt_general/module_vdw/vdw.h"
+#include "module_parameter/parameter.h"
 #ifdef __DEEPKS
 #include "module_elecstate/elecstate_lcao.h"
-#include "module_hamilt_lcao/module_deepks/LCAO_deepks.h" //caoyu add for deepks 2021-06-03
-#include "module_hamilt_lcao/module_deepks/LCAO_deepks_io.h" // mohan add 2024-07-22 
+#include "module_hamilt_lcao/module_deepks/LCAO_deepks.h"    //caoyu add for deepks 2021-06-03
+#include "module_hamilt_lcao/module_deepks/LCAO_deepks_io.h" // mohan add 2024-07-22
 #endif
+#include "module_elecstate/elecstate_lcao.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h"
-#include "module_elecstate/elecstate_lcao.h"
 
 template <typename T>
 Force_Stress_LCAO<T>::Force_Stress_LCAO(Record_adj& ra, const int nat_in) : RA(&ra), f_pw(nat_in), nat(nat_in)
@@ -36,6 +36,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                                           const bool istestf,
                                           const bool istests,
                                           const UnitCell& ucell,
+                                          Grid_Driver& gd,
                                           Parallel_Orbitals& pv,
                                           const elecstate::ElecState* pelec,
                                           const psi::Psi<T>* psi,
@@ -158,6 +159,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                         isforce,
                         isstress,
                         ucell,
+                        gd,
                         fsr,
                         pelec,
                         psi,
@@ -179,41 +181,38 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                         pv,
                         kv);
     // calculate force and stress for Nonlocal part
-    if(PARAM.inp.nspin == 1 || PARAM.inp.nspin == 2)
+    if (PARAM.inp.nspin == 1 || PARAM.inp.nspin == 2)
     {
-        hamilt::NonlocalNew<hamilt::OperatorLCAO<T, double>> tmp_nonlocal(
-                    nullptr,
-                    kv.kvec_d,
-                    nullptr,
-                    &ucell,
-                    orb.cutoffs(),
-                    &GlobalC::GridD,
-                    two_center_bundle.overlap_orb_beta.get()
-            );
+        hamilt::NonlocalNew<hamilt::OperatorLCAO<T, double>> tmp_nonlocal(nullptr,
+                                                                          kv.kvec_d,
+                                                                          nullptr,
+                                                                          &ucell,
+                                                                          orb.cutoffs(),
+                                                                          &gd,
+                                                                          two_center_bundle.overlap_orb_beta.get());
 
         const auto* dm_p = dynamic_cast<const elecstate::ElecStateLCAO<T>*>(pelec)->get_DM();
-        if(PARAM.inp.nspin == 2)
+        if (PARAM.inp.nspin == 2)
         {
             const_cast<elecstate::DensityMatrix<T, double>*>(dm_p)->switch_dmr(1);
         }
         const hamilt::HContainer<double>* dmr = dm_p->get_DMR_pointer(1);
         tmp_nonlocal.cal_force_stress(isforce, isstress, dmr, fvnl_dbeta, svnl_dbeta);
-        if(PARAM.inp.nspin == 2)
+        if (PARAM.inp.nspin == 2)
         {
             const_cast<elecstate::DensityMatrix<T, double>*>(dm_p)->switch_dmr(0);
         }
     }
-    else if(PARAM.inp.nspin == 4)
+    else if (PARAM.inp.nspin == 4)
     {
         hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>> tmp_nonlocal(
-                    nullptr,
-                    kv.kvec_d,
-                    nullptr,
-                    &ucell,
-                    orb.cutoffs(),
-                    &GlobalC::GridD,
-                    two_center_bundle.overlap_orb_beta.get()
-            );
+            nullptr,
+            kv.kvec_d,
+            nullptr,
+            &ucell,
+            orb.cutoffs(),
+            &gd,
+            two_center_bundle.overlap_orb_beta.get());
 
         // calculate temporary complex DMR for nonlocal force&stress
         // In fact, only SOC part need the imaginary part of DMR for correct force&stress
@@ -225,7 +224,6 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         dm_p->cal_DMR_full(&tmp_dmr);
         tmp_nonlocal.cal_force_stress(isforce, isstress, &tmp_dmr, fvnl_dbeta, svnl_dbeta);
     }
-    
 
     //! forces and stress from vdw
     //  Peize Lin add 2014-04-04, update 2021-03-09
@@ -300,13 +298,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         }
         if (PARAM.inp.dft_plus_u == 2)
         {
-            GlobalC::dftu.force_stress(ucell,
-                                       pelec,
-                                       pv,
-                                       fsr, // mohan 2024-06-16
-                                       force_dftu,
-                                       stress_dftu,
-                                       kv);
+            GlobalC::dftu.force_stress(ucell, gd, pelec, pv, fsr, force_dftu, stress_dftu, kv);
         }
         else
         {
@@ -314,7 +306,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                                                                    kv.kvec_d,
                                                                    nullptr, // HR are not used for force&stress
                                                                    ucell,
-                                                                   &GlobalC::GridD,
+                                                                   &gd,
                                                                    two_center_bundle.overlap_orb_onsite.get(),
                                                                    orb.cutoffs(),
                                                                    &GlobalC::dftu);
@@ -326,7 +318,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
     // atomic force and stress for DeltaSpin
     ModuleBase::matrix force_dspin;
     ModuleBase::matrix stress_dspin;
-    if(PARAM.inp.sc_mag_switch)
+    if (PARAM.inp.sc_mag_switch)
     {
         if (isforce)
         {
@@ -337,24 +329,22 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
             stress_dspin.create(3, 3);
         }
 
-        hamilt::DeltaSpin<hamilt::OperatorLCAO<T, double>> tmp_dspin(
-                    nullptr,
-                    kv.kvec_d,
-                    nullptr,
-                    ucell,
-                    &GlobalC::GridD,
-                    two_center_bundle.overlap_orb_onsite.get(),
-                    orb.cutoffs()
-            );
+        hamilt::DeltaSpin<hamilt::OperatorLCAO<T, double>> tmp_dspin(nullptr,
+                                                                     kv.kvec_d,
+                                                                     nullptr,
+                                                                     ucell,
+                                                                     &gd,
+                                                                     two_center_bundle.overlap_orb_onsite.get(),
+                                                                     orb.cutoffs());
 
         const auto* dm_p = dynamic_cast<const elecstate::ElecStateLCAO<std::complex<double>>*>(pelec)->get_DM();
-        if(PARAM.inp.nspin == 2)
+        if (PARAM.inp.nspin == 2)
         {
             const_cast<elecstate::DensityMatrix<std::complex<double>, double>*>(dm_p)->switch_dmr(2);
         }
         const hamilt::HContainer<double>* dmr = dm_p->get_DMR_pointer(1);
         tmp_dspin.cal_force_stress(isforce, isstress, dmr, force_dspin, stress_dspin);
-        if(PARAM.inp.nspin == 2)
+        if (PARAM.inp.nspin == 2)
         {
             const_cast<elecstate::DensityMatrix<std::complex<double>, double>*>(dm_p)->switch_dmr(0);
         }
@@ -388,12 +378,12 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         {
             if (GlobalC::exx_info.info_ri.real_number)
             {
-                exx_lri_double.cal_exx_stress(ucell.omega,ucell.lat0);
+                exx_lri_double.cal_exx_stress(ucell.omega, ucell.lat0);
                 stress_exx = GlobalC::exx_info.info_global.hybrid_alpha * exx_lri_double.stress_exx;
             }
             else
             {
-                exx_lri_complex.cal_exx_stress(ucell.omega,ucell.lat0);
+                exx_lri_complex.cal_exx_stress(ucell.omega, ucell.lat0);
                 stress_exx = GlobalC::exx_info.info_global.hybrid_alpha * exx_lri_complex.stress_exx;
             }
         }
@@ -495,7 +485,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         // pengfei 2016-12-20
         if (ModuleSymmetry::Symmetry::symm_flag == 1)
         {
-            this->forceSymmetry(ucell,fcs, symm);
+            this->forceSymmetry(ucell, fcs, symm);
         }
 
 #ifdef __DEEPKS
@@ -503,17 +493,15 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         if (PARAM.inp.deepks_out_labels) // not parallelized yet
         {
             const std::string file_ftot = PARAM.globalv.global_out_dir + "deepks_ftot.npy";
-            LCAO_deepks_io::save_npy_f(fcs, 
-                                       file_ftot, 
-                                       ucell.nat, 
+            LCAO_deepks_io::save_npy_f(fcs, file_ftot, ucell.nat,
                                        GlobalV::MY_RANK); // Ty/Bohr, F_tot
 
             if (PARAM.inp.deepks_scf)
             {
                 const std::string file_fbase = PARAM.globalv.global_out_dir + "deepks_fbase.npy";
-                LCAO_deepks_io::save_npy_f(fcs - GlobalC::ld.F_delta, 
-                                           file_fbase, 
-                                           ucell.nat, 
+                LCAO_deepks_io::save_npy_f(fcs - GlobalC::ld.F_delta,
+                                           file_fbase,
+                                           ucell.nat,
                                            GlobalV::MY_RANK); // Ry/Bohr, F_base
 
                 if (!PARAM.inp.deepks_equiv) // training with force label not supported by equivariant version now
@@ -522,7 +510,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                     {
                         const std::vector<std::vector<double>>& dm_gamma
                             = dynamic_cast<const elecstate::ElecStateLCAO<double>*>(pelec)->get_DM()->get_DMK_vector();
-                        GlobalC::ld.cal_gdmx(dm_gamma, ucell, orb, GlobalC::GridD, kv.get_nks(), kv.kvec_d, isstress);
+                        GlobalC::ld.cal_gdmx(dm_gamma, ucell, orb, gd, kv.get_nks(), kv.kvec_d, isstress);
                     }
                     else
                     {
@@ -531,13 +519,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                                   ->get_DM()
                                   ->get_DMK_vector();
 
-                        GlobalC::ld.cal_gdmx(dm_k,
-                                             ucell,
-                                             orb,
-                                             GlobalC::GridD,
-                                             kv.get_nks(),
-                                             kv.kvec_d,
-                                             isstress);
+                        GlobalC::ld.cal_gdmx(dm_k, ucell, orb, gd, kv.get_nks(), kv.kvec_d, isstress);
                     }
                     if (PARAM.inp.deepks_out_unittest)
                     {
@@ -551,19 +533,17 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                     }
 
                     LCAO_deepks_io::save_npy_gvx(ucell.nat,
-                      GlobalC::ld.des_per_atom,
-                      GlobalC::ld.gvx_tensor,
-                      PARAM.globalv.global_out_dir,
-                      GlobalV::MY_RANK);
+                                                 GlobalC::ld.des_per_atom,
+                                                 GlobalC::ld.gvx_tensor,
+                                                 PARAM.globalv.global_out_dir,
+                                                 GlobalV::MY_RANK);
                 }
             }
             else
             {
                 const std::string file_fbase = PARAM.globalv.global_out_dir + "deepks_fbase.npy";
-                LCAO_deepks_io::save_npy_f(fcs, 
-                  file_fbase, 
-                  ucell.nat,
-                  GlobalV::MY_RANK); // no scf, F_base=F_tot
+                LCAO_deepks_io::save_npy_f(fcs, file_fbase, ucell.nat,
+                                           GlobalV::MY_RANK); // no scf, F_base=F_tot
             }
         }
 #endif
@@ -732,9 +712,9 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         {
             const std::string file_s = PARAM.globalv.global_out_dir + "deepks_sbase.npy";
             LCAO_deepks_io::save_npy_s(scs,
-                                      file_s,
-                                      ucell.omega,
-                                      GlobalV::MY_RANK); // change to energy unit Ry when printing, S_base;
+                                       file_s,
+                                       ucell.omega,
+                                       GlobalV::MY_RANK); // change to energy unit Ry when printing, S_base;
         }
         if (PARAM.inp.deepks_scf)
         {
@@ -752,12 +732,11 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         }
         if (PARAM.inp.deepks_out_labels) // not parallelized yet
         {
-			const std::string file_s = PARAM.globalv.global_out_dir + "deepks_stot.npy";
-			LCAO_deepks_io::save_npy_s(
-					scs,
-					file_s,
-					ucell.omega,
-					GlobalV::MY_RANK); // change to energy unit Ry when printing, S_tot, w/ model
+            const std::string file_s = PARAM.globalv.global_out_dir + "deepks_stot.npy";
+            LCAO_deepks_io::save_npy_s(scs,
+                                       file_s,
+                                       ucell.omega,
+                                       GlobalV::MY_RANK); // change to energy unit Ry when printing, S_tot, w/ model
 
             // wenfei add 2021/11/2
             if (PARAM.inp.deepks_scf)
@@ -767,12 +746,11 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                 {
                     GlobalC::ld.cal_gvepsl(ucell.nat);
 
-                    LCAO_deepks_io::save_npy_gvepsl(
-                      ucell.nat, 
-                      GlobalC::ld.des_per_atom,
-                      GlobalC::ld.gvepsl_tensor,
-                      PARAM.globalv.global_out_dir,
-                      GlobalV::MY_RANK); //  unitless, grad_vepsl
+                    LCAO_deepks_io::save_npy_gvepsl(ucell.nat,
+                                                    GlobalC::ld.des_per_atom,
+                                                    GlobalC::ld.gvepsl_tensor,
+                                                    PARAM.globalv.global_out_dir,
+                                                    GlobalV::MY_RANK); //  unitless, grad_vepsl
                 }
             }
         }
@@ -869,11 +847,11 @@ void Force_Stress_LCAO<T>::calForcePwPart(const UnitCell& ucell,
     // local pseudopotential force:
     // use charge density; plane wave; local pseudopotential;
     //--------------------------------------------------------
-    f_pw.cal_force_loc(ucell,fvl_dvl, rhopw, nlpp.vloc, chr);
+    f_pw.cal_force_loc(ucell, fvl_dvl, rhopw, nlpp.vloc, chr);
     //--------------------------------------------------------
     // ewald force: use plane wave only.
     //--------------------------------------------------------
-    f_pw.cal_force_ew(ucell,fewalds, rhopw, &sf); // remain problem
+    f_pw.cal_force_ew(ucell, fewalds, rhopw, &sf); // remain problem
 
     //--------------------------------------------------------
     // force due to core correlation.
@@ -892,6 +870,7 @@ void Force_Stress_LCAO<double>::integral_part(const bool isGammaOnly,
                                               const bool isforce,
                                               const bool isstress,
                                               const UnitCell& ucell,
+                                              Grid_Driver& gd,
                                               ForceStressArrays& fsr, // mohan add 2024-06-15
                                               const elecstate::ElecState* pelec,
                                               const psi::Psi<double>* psi,
@@ -918,6 +897,7 @@ void Force_Stress_LCAO<double>::integral_part(const bool isGammaOnly,
                isstress,
                fsr, // mohan add 2024-06-15
                ucell,
+               gd,
                psi,
                pelec,
                foverlap,
@@ -943,6 +923,7 @@ void Force_Stress_LCAO<std::complex<double>>::integral_part(const bool isGammaOn
                                                             const bool isforce,
                                                             const bool isstress,
                                                             const UnitCell& ucell,
+                                                            Grid_Driver& gd,
                                                             ForceStressArrays& fsr, // mohan add 2024-06-15
                                                             const elecstate::ElecState* pelec,
                                                             const psi::Psi<std::complex<double>>* psi,
@@ -968,6 +949,7 @@ void Force_Stress_LCAO<std::complex<double>>::integral_part(const bool isGammaOn
                isstress,
                fsr, // mohan add 2024-06-16
                ucell,
+               gd,
                psi,
                pelec,
                foverlap,
@@ -1009,17 +991,17 @@ void Force_Stress_LCAO<T>::calStressPwPart(const UnitCell& ucell,
     // local pseudopotential stress:
     // use charge density; plane wave; local pseudopotential;
     //--------------------------------------------------------
-    sc_pw.stress_loc(ucell,sigmadvl, rhopw, nlpp.vloc, &sf, 0, chr);
+    sc_pw.stress_loc(ucell, sigmadvl, rhopw, nlpp.vloc, &sf, 0, chr);
 
     //--------------------------------------------------------
     // hartree term
     //--------------------------------------------------------
-    sc_pw.stress_har(ucell,sigmahar, rhopw, 0, chr);
+    sc_pw.stress_har(ucell, sigmahar, rhopw, 0, chr);
 
     //--------------------------------------------------------
     // ewald stress: use plane wave only.
     //--------------------------------------------------------
-    sc_pw.stress_ewa(ucell,sigmaewa, rhopw, 0); // remain problem
+    sc_pw.stress_ewa(ucell, sigmaewa, rhopw, 0); // remain problem
 
     //--------------------------------------------------------
     // stress due to core correlation.
@@ -1034,7 +1016,7 @@ void Force_Stress_LCAO<T>::calStressPwPart(const UnitCell& ucell,
         sigmaxc(i, i) = -etxc / ucell.omega;
     }
     // Exchange-correlation for PBE
-    sc_pw.stress_gga(ucell,sigmaxc, rhopw, chr);
+    sc_pw.stress_gga(ucell, sigmaxc, rhopw, chr);
 
     return;
 }
@@ -1042,9 +1024,7 @@ void Force_Stress_LCAO<T>::calStressPwPart(const UnitCell& ucell,
 #include "module_base/mathzone.h"
 // do symmetry for total force
 template <typename T>
-void Force_Stress_LCAO<T>::forceSymmetry(const UnitCell& ucell,
-                                         ModuleBase::matrix& fcs, 
-                                         ModuleSymmetry::Symmetry* symm)
+void Force_Stress_LCAO<T>::forceSymmetry(const UnitCell& ucell, ModuleBase::matrix& fcs, ModuleSymmetry::Symmetry* symm)
 {
     double d1, d2, d3;
     for (int iat = 0; iat < ucell.nat; iat++)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h
index 927d08790e..5593fd0afb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h
@@ -34,6 +34,7 @@ class Force_Stress_LCAO
                         const bool istestf,
                         const bool istests,
                         const UnitCell& ucell,
+                        Grid_Driver& gd,
                         Parallel_Orbitals& pv,
                         const elecstate::ElecState* pelec,
                         const psi::Psi<T>* psi,
@@ -81,6 +82,7 @@ class Force_Stress_LCAO
                        const bool isforce,
                        const bool isstress,
                        const UnitCell& ucell,
+                       Grid_Driver& gd,
                        ForceStressArrays& fsr, // mohan add 2024-06-15
                        const elecstate::ElecState* pelec,
                        const psi::Psi<T>* psi,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
index 38e5be5051..6542f2a0f6 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
@@ -17,6 +17,7 @@
 
 template <>
 void Force_LCAO<double>::allocate(const UnitCell& ucell,
+                                  Grid_Driver& gd,
                                   const Parallel_Orbitals& pv,
                                   ForceStressArrays& fsr, // mohan add 2024-06-15
                                   const TwoCenterBundle& two_center_bundle,
@@ -80,7 +81,7 @@ void Force_LCAO<double>::allocate(const UnitCell& ucell,
                               orb,
                               pv,
                               two_center_bundle,
-                              &GlobalC::GridD,
+                              &gd,
                               nullptr);
 
     // calculate dT in LCAP
@@ -104,7 +105,7 @@ void Force_LCAO<double>::allocate(const UnitCell& ucell,
                               orb,
                               pv,
                               two_center_bundle,
-                              &GlobalC::GridD,
+                              &gd,
                               nullptr);
 
     // calculate asynchronous S matrix to output for Hefei-NAMD
@@ -175,6 +176,7 @@ void Force_LCAO<double>::ftable(const bool isforce,
                                 const bool isstress,
                                 ForceStressArrays& fsr, // mohan add 2024-06-16
                                 const UnitCell& ucell,
+                                Grid_Driver& gd,
                                 const psi::Psi<double>* psi,
                                 const elecstate::ElecState* pelec,
                                 ModuleBase::matrix& foverlap,
@@ -206,7 +208,7 @@ void Force_LCAO<double>::ftable(const bool isforce,
 
     // allocate DSloc_x, DSloc_y, DSloc_z
     // allocate DHloc_fixed_x, DHloc_fixed_y, DHloc_fixed_z
-    this->allocate(ucell,pv, fsr, two_center_bundle, orb);
+    this->allocate(ucell, gd, pv, fsr, two_center_bundle, orb);
 
     const double* dSx[3] = { fsr.DSloc_x, fsr.DSloc_y, fsr.DSloc_z };
     const double* dSxy[6] = { fsr.DSloc_11, fsr.DSloc_12, fsr.DSloc_13, fsr.DSloc_22, fsr.DSloc_23, fsr.DSloc_33 };
@@ -229,25 +231,24 @@ void Force_LCAO<double>::ftable(const bool isforce,
         const std::vector<std::vector<double>>& dm_gamma = dm->get_DMK_vector();
 
         // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm
-        //GlobalC::ld.cal_projected_DM(dm, ucell, orb, GlobalC::GridD);
+        // GlobalC::ld.cal_projected_DM(dm, ucell, orb, gd);
 
         GlobalC::ld.cal_descriptor(ucell.nat);
 
         GlobalC::ld.cal_gedm(ucell.nat);
 
-		DeePKS_domain::cal_f_delta_gamma(
-				dm_gamma, 
-				ucell, 
-				orb, 
-				GlobalC::GridD, 
-                *this->ParaV,
-                GlobalC::ld.lmaxd,
-                GlobalC::ld.nlm_save,
-                GlobalC::ld.gedm,
-                GlobalC::ld.inl_index,
-                GlobalC::ld.F_delta,
-				isstress, 
-				svnl_dalpha);
+        DeePKS_domain::cal_f_delta_gamma(dm_gamma,
+                                         ucell,
+                                         orb,
+                                         gd,
+                                         *this->ParaV,
+                                         GlobalC::ld.lmaxd,
+                                         GlobalC::ld.nlm_save,
+                                         GlobalC::ld.gedm,
+                                         GlobalC::ld.inl_index,
+                                         GlobalC::ld.F_delta,
+                                         isstress,
+                                         svnl_dalpha);
 
 #ifdef __MPI
         Parallel_Reduce::reduce_all(GlobalC::ld.F_delta.c, GlobalC::ld.F_delta.nr * GlobalC::ld.F_delta.nc);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
index f00b089a75..6b88139fc4 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
@@ -27,6 +27,7 @@
 
 template <>
 void Force_LCAO<std::complex<double>>::allocate(const UnitCell& ucell,
+                                                Grid_Driver& gd,
                                                 const Parallel_Orbitals& pv,
                                                 ForceStressArrays& fsr, // mohan add 2024-06-15
                                                 const TwoCenterBundle& two_center_bundle,
@@ -98,7 +99,7 @@ void Force_LCAO<std::complex<double>>::allocate(const UnitCell& ucell,
                               orb,
                               pv,
                               two_center_bundle,
-                              &GlobalC::GridD,
+                              &gd,
                               nullptr); // delete lm.SlocR
 
     //-----------------------------------------
@@ -129,7 +130,7 @@ void Force_LCAO<std::complex<double>>::allocate(const UnitCell& ucell,
                               orb,
                               pv,
                               two_center_bundle,
-                              &GlobalC::GridD,
+                              &gd,
                               nullptr); // delete lm.Hloc_fixedR
 
     // calculate asynchronous S matrix to output for Hefei-NAMD
@@ -148,7 +149,7 @@ void Force_LCAO<std::complex<double>>::allocate(const UnitCell& ucell,
                                   orb,
                                   pv,
                                   two_center_bundle,
-                                  &(GlobalC::GridD),
+                                  &(gd),
                                   nullptr, // delete lm.SlocR
                                   PARAM.inp.cal_syns,
                                   PARAM.inp.dmax);
@@ -271,6 +272,7 @@ void Force_LCAO<std::complex<double>>::ftable(const bool isforce,
                                               const bool isstress,
                                               ForceStressArrays& fsr, // mohan add 2024-06-15
                                               const UnitCell& ucell,
+                                              Grid_Driver& gd,
                                               const psi::Psi<std::complex<double>>* psi,
                                               const elecstate::ElecState* pelec,
                                               ModuleBase::matrix& foverlap,
@@ -298,6 +300,7 @@ void Force_LCAO<std::complex<double>>::ftable(const bool isforce,
         = dynamic_cast<const elecstate::ElecStateLCAO<std::complex<double>>*>(pelec)->get_DM();
 
     this->allocate(ucell,
+                   gd,
                    pv,
                    fsr, // mohan add 2024-06-16
                    two_center_bundle,
@@ -327,27 +330,26 @@ void Force_LCAO<std::complex<double>>::ftable(const bool isforce,
         const std::vector<std::vector<std::complex<double>>>& dm_k = dm->get_DMK_vector();
 
         // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm
-        //GlobalC::ld.cal_projected_DM_k(dm, ucell, orb, GlobalC::GridD);
+        // GlobalC::ld.cal_projected_DM_k(dm, ucell, orb, gd);
 
         GlobalC::ld.cal_descriptor(ucell.nat);
 
         GlobalC::ld.cal_gedm(ucell.nat);
 
-	    DeePKS_domain::cal_f_delta_k(
-				dm_k, 
-				ucell, 
-				orb, 
-				GlobalC::GridD, 
-                pv,
-                GlobalC::ld.lmaxd,
-				kv->get_nks(), 
-				kv->kvec_d, 
-                GlobalC::ld.nlm_save_k,
-                GlobalC::ld.gedm,
-                GlobalC::ld.inl_index,
-                GlobalC::ld.F_delta,
-				isstress, 
-				svnl_dalpha);
+        DeePKS_domain::cal_f_delta_k(dm_k,
+                                     ucell,
+                                     orb,
+                                     gd,
+                                     pv,
+                                     GlobalC::ld.lmaxd,
+                                     kv->get_nks(),
+                                     kv->kvec_d,
+                                     GlobalC::ld.nlm_save_k,
+                                     GlobalC::ld.gedm,
+                                     GlobalC::ld.inl_index,
+                                     GlobalC::ld.F_delta,
+                                     isstress,
+                                     svnl_dalpha);
 
 #ifdef __MPI
         Parallel_Reduce::reduce_all(GlobalC::ld.F_delta.c, GlobalC::ld.F_delta.nr * GlobalC::ld.F_delta.nc);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
index 572c26e4d6..b08da06af8 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
@@ -192,7 +192,7 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
                                                                     this->hR, // no explicit call yet
                                                                     &ucell,
                                                                     orb.cutoffs(),
-                                                                    &GlobalC::GridD,
+                                                                    &grid_d,
                                                                     PARAM.inp.nspin);
                 this->getOperator()->add(veff);
             }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
index 47d2f57c9f..476fa43ee7 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
@@ -1,8 +1,8 @@
 #include "deepks_lcao.h"
 
-#include "module_parameter/parameter.h"
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
+#include "module_parameter/parameter.h"
 #ifdef __DEEPKS
 #include "module_hamilt_lcao/module_deepks/LCAO_deepks.h"
 #endif
@@ -25,11 +25,11 @@ DeePKS<OperatorLCAO<TK, TR>>::DeePKS(HS_Matrix_K<TK>* hsk_in,
                                      const LCAO_Orbitals* ptr_orb,
                                      const int& nks_in,
                                      elecstate::DensityMatrix<TK, double>* DM_in)
-    : OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in),
-      DM(DM_in), ucell(ucell_in), 
-      intor_orb_alpha_(intor_orb_alpha), ptr_orb_(ptr_orb), nks(nks_in)
+    : OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), DM(DM_in), ucell(ucell_in), intor_orb_alpha_(intor_orb_alpha),
+      ptr_orb_(ptr_orb), nks(nks_in)
 {
     this->cal_type = calculation_type::lcao_deepks;
+    this->gd = GridD_in;
 #ifdef __DEEPKS
     this->initialize_HR(GridD_in);
 #endif
@@ -52,7 +52,7 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* Gr
     ModuleBase::TITLE("DeePKS", "initialize_HR");
     ModuleBase::timer::tick("DeePKS", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     // this->H_V_delta = new HContainer<TR>(paraV);
@@ -158,7 +158,7 @@ void DeePKS<OperatorLCAO<double, double>>::contributeHR()
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
         const Parallel_Orbitals* pv = this->hsk->get_pv();
-        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD);
+        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, *(this->gd));
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         GlobalC::ld.cal_gedm(this->ucell->nat);
         // recalculate the H_V_delta
@@ -186,7 +186,7 @@ void DeePKS<OperatorLCAO<std::complex<double>, double>>::contributeHR()
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
 
-        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD);
+        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, *this->gd);
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         // calculate dE/dD
         GlobalC::ld.cal_gedm(this->ucell->nat);
@@ -219,7 +219,7 @@ void DeePKS<OperatorLCAO<std::complex<double>, std::complex<double>>>::contribut
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
 
-        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, GlobalC::GridD);
+        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, *this->gd);
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         // calculate dE/dD
         GlobalC::ld.cal_gedm(this->ucell->nat);
@@ -290,9 +290,10 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::pre_calculate_nlm(
             ModuleBase::Vector3<double> dtau = tau0 - tau1;
             intor_orb_alpha_->snap(T1, L1, N1, M1, 0, dtau * ucell->lat0, false /*calc_deri*/, nlm);
             nlm_in[ad].insert({all_indexes[iw1l], nlm[0]});
-            if (npol == 2) {
+            if (npol == 2)
+            {
                 nlm_in[ad].insert({all_indexes[iw1l + 1], nlm[0]});
-}
+            }
         }
     }
 }
@@ -386,9 +387,10 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_HR()
             ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
             auto row_indexes = paraV->get_indexes_row(iat1);
             const int row_size = row_indexes.size();
-            if (row_size == 0) {
+            if (row_size == 0)
+            {
                 continue;
-}
+            }
 
             std::vector<double> s_1t(trace_alpha_size * row_size);
             for (int irow = 0; irow < row_size; irow++)
@@ -412,9 +414,10 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_HR()
                 hamilt::BaseMatrix<TR>* tmp
                     = this->H_V_delta->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2]);
                 // if not found , skip this pair of atoms
-                if (tmp == nullptr) {
+                if (tmp == nullptr)
+                {
                     continue;
-}
+                }
                 auto col_indexes = paraV->get_indexes_col(iat2);
                 const int col_size = col_indexes.size();
                 std::vector<double> hr_current(row_size * col_size, 0);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
index f95b18a607..32f7d1cddc 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
@@ -59,6 +59,8 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     const UnitCell* ucell = nullptr;
 
+    Grid_Driver* gd = nullptr;
+
     HContainer<TR>* H_V_delta = nullptr;
 
     // the following variable is introduced temporarily during LCAO refactoring
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
index 38f09cf72c..9f5bb551eb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
@@ -174,7 +174,8 @@ Grid::Grid(const int& test_grid_in) : test_grid(test_grid_in) {}
 Grid::~Grid() {}
 Grid_Driver::Grid_Driver(const int& test_d_in,
                          const int& test_grid_in)
-    : Grid(test_grid_in), test_deconstructor(test_d_in) {}
+    : Grid(test_grid_in), test_deconstructor(test_d_in) {
+}
 Grid_Driver::~Grid_Driver() {}
 
 // filter_adjs delete not adjacent atoms in adjs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
index 9710ec4b26..31be5f4582 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
@@ -34,16 +34,16 @@ void sparse_format::cal_dH(const UnitCell& ucell,
     const bool cal_deri = true;
     const bool cal_stress = false;
     LCAO_domain::build_ST_new(fsr_dh,
-                                'T',
-                                cal_deri,
-                                cal_stress,
-                                ucell,
-                                orb,
-                                pv,
-                                two_center_bundle,
-                                &GlobalC::GridD,
-                                nullptr,
-                                false); // delete unused parameter lm.Hloc_fixedR
+                              'T',
+                              cal_deri,
+                              cal_stress,
+                              ucell,
+                              orb,
+                              pv,
+                              two_center_bundle,
+                              &grid,
+                              nullptr,
+                              false); // delete unused parameter lm.Hloc_fixedR
 
     LCAO_domain::build_Nonlocal_mu_new(pv,
                                        fsr_dh,
@@ -52,7 +52,7 @@ void sparse_format::cal_dH(const UnitCell& ucell,
                                        ucell,
                                        orb,
                                        *(two_center_bundle.overlap_orb_beta),
-                                       &GlobalC::GridD);
+                                       &grid);
 
     sparse_format::cal_dSTN_R(ucell,pv, HS_Arrays, fsr_dh, grid, orb.cutoffs(), current_spin, sparse_thr);
 
@@ -60,8 +60,7 @@ void sparse_format::cal_dH(const UnitCell& ucell,
     delete[] fsr_dh.DHloc_fixedR_y;
     delete[] fsr_dh.DHloc_fixedR_z;
 
-    gint_k
-        .cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, HS_Arrays, &pv, ucell, GlobalC::GridD);
+    gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, HS_Arrays, &pv, ucell, grid);
 
     return;
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
index e4fbba3624..1435c91e3e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
@@ -79,7 +79,7 @@ void sparse_format::cal_TR(const UnitCell& ucell,
                               orb,
                               pv,
                               two_center_bundle,
-                              &(GlobalC::GridD),
+                              &(grid),
                               HS_Arrays.Hloc_fixedR.data());
 
     sparse_format::set_R_range(HS_Arrays.all_R_coor, grid);
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.h b/source/module_hamilt_lcao/module_dftu/dftu.h
index 68aae44516..7e8909c96f 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.h
+++ b/source/module_hamilt_lcao/module_dftu/dftu.h
@@ -180,16 +180,15 @@ class DFTU
     // dim = 1-3 : dS, for force
     // dim = 4-6 : dS * dR, for stress
 
-    void folding_matrix_k(
-        const UnitCell &ucell,
-        ForceStressArrays &fsr,
-        const Parallel_Orbitals &pv,
-        const int ik, 
-        const int dim1, 
-        const int dim2, 
-        std::complex<double>* mat_k, 
-        const std::vector<ModuleBase::Vector3<double>> &kvec_d);
-
+    void folding_matrix_k(const UnitCell& ucell,
+                          Grid_Driver& gd,
+                          ForceStressArrays& fsr,
+                          const Parallel_Orbitals& pv,
+                          const int ik,
+                          const int dim1,
+                          const int dim2,
+                          std::complex<double>* mat_k,
+                          const std::vector<ModuleBase::Vector3<double>>& kvec_d);
 
     /**
      * @brief new function of folding_S_matrix
@@ -202,9 +201,9 @@ class DFTU
     // In dftu_force.cpp
     // For calculating force and stress fomr DFT+U
     //=============================================================
-  public:
-
+ public:
    void force_stress(const UnitCell& ucell,
+                     Grid_Driver& gd,
                      const elecstate::ElecState* pelec,
                      const Parallel_Orbitals& pv,
                      ForceStressArrays& fsr,
@@ -212,43 +211,42 @@ class DFTU
                      ModuleBase::matrix& stress_dftu,
                      const K_Vectors& kv);
 
-  private:
-
-   void cal_force_k(const UnitCell &ucell,
-                    ForceStressArrays &fsr,
-                    const Parallel_Orbitals &pv,
+ private:
+   void cal_force_k(const UnitCell& ucell,
+                    Grid_Driver& gd,
+                    ForceStressArrays& fsr,
+                    const Parallel_Orbitals& pv,
                     const int ik,
                     const std::complex<double>* rho_VU,
                     ModuleBase::matrix& force_dftu,
                     const std::vector<ModuleBase::Vector3<double>>& kvec_d);
 
-    void cal_stress_k(
-      const UnitCell &ucell,
-			ForceStressArrays &fsr,
-			const Parallel_Orbitals &pv,
-			const int ik,
-			const std::complex<double>* rho_VU,
-			ModuleBase::matrix& stress_dftu,
-			const std::vector<ModuleBase::Vector3<double>>& kvec_d);
-
-	void cal_force_gamma(const UnitCell &ucell,
-                       const double* rho_VU, 
-                       const Parallel_Orbitals &pv,
-                       double* dsloc_x,
-                       double* dsloc_y,
-                       double* dsloc_z,
-                       ModuleBase::matrix& force_dftu);
-
-	void cal_stress_gamma(
-			const UnitCell &ucell,
-			const Parallel_Orbitals &pv,
-			Grid_Driver* gd,
-			double* dsloc_x,
-			double* dsloc_y,
-			double* dsloc_z,
-			double* dh_r,
-			const double* rho_VU, 
-			ModuleBase::matrix& stress_dftu);
+   void cal_stress_k(const UnitCell& ucell,
+                     Grid_Driver& gd,
+                     ForceStressArrays& fsr,
+                     const Parallel_Orbitals& pv,
+                     const int ik,
+                     const std::complex<double>* rho_VU,
+                     ModuleBase::matrix& stress_dftu,
+                     const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+
+   void cal_force_gamma(const UnitCell& ucell,
+                        const double* rho_VU,
+                        const Parallel_Orbitals& pv,
+                        double* dsloc_x,
+                        double* dsloc_y,
+                        double* dsloc_z,
+                        ModuleBase::matrix& force_dftu);
+
+   void cal_stress_gamma(const UnitCell& ucell,
+                         const Parallel_Orbitals& pv,
+                         Grid_Driver* gd,
+                         double* dsloc_x,
+                         double* dsloc_y,
+                         double* dsloc_z,
+                         double* dh_r,
+                         const double* rho_VU,
+                         ModuleBase::matrix& stress_dftu);
 #endif
 
     //=============================================================
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
index aac95478d7..12904bbd72 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
@@ -126,25 +126,33 @@ void DFTU::fold_dSR_gamma(
     return;
 }
 
-void DFTU::folding_matrix_k(
-        const UnitCell &ucell,
-        ForceStressArrays &fsr,
-        const Parallel_Orbitals &pv,
-		const int ik, 
-		const int dim1, 
-		const int dim2, 
-		std::complex<double>* mat_k, 
-		const std::vector<ModuleBase::Vector3<double>> &kvec_d)
+void DFTU::folding_matrix_k(const UnitCell& ucell,
+                            Grid_Driver& gd,
+                            ForceStressArrays& fsr,
+                            const Parallel_Orbitals& pv,
+                            const int ik,
+                            const int dim1,
+                            const int dim2,
+                            std::complex<double>* mat_k,
+                            const std::vector<ModuleBase::Vector3<double>>& kvec_d)
 {
     ModuleBase::TITLE("DFTU", "folding_matrix_k");
     ModuleBase::timer::tick("DFTU", "folding_matrix_k");
     ModuleBase::GlobalFunc::ZEROS(mat_k, pv.nloc);
 
     double* mat_ptr;
-    if      (dim1 == 1 || dim1 == 4) { mat_ptr = fsr.DSloc_Rx;
-    } else if (dim1 == 2 || dim1 == 5) { mat_ptr = fsr.DSloc_Ry;
-    } else if (dim1 == 3 || dim1 == 6) { mat_ptr = fsr.DSloc_Rz;
-}
+    if (dim1 == 1 || dim1 == 4)
+    {
+        mat_ptr = fsr.DSloc_Rx;
+    }
+    else if (dim1 == 2 || dim1 == 5)
+    {
+        mat_ptr = fsr.DSloc_Ry;
+    }
+    else if (dim1 == 3 || dim1 == 6)
+    {
+        mat_ptr = fsr.DSloc_Rz;
+    }
 
     int nnr = 0;
     ModuleBase::Vector3<double> dtau;
@@ -161,18 +169,18 @@ void DFTU::folding_matrix_k(
         for (int I1 = 0; I1 < atom1->na; ++I1)
         {
             tau1 = atom1->tau[I1];
-            GlobalC::GridD.Find_atom(ucell, tau1, T1, I1);
+            gd.Find_atom(ucell, tau1, T1, I1);
             Atom* atom1 = &ucell.atoms[T1];
             const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
 
             // (2) search among all adjacent atoms.
-            for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ++ad)
+            for (int ad = 0; ad < gd.getAdjacentNum() + 1; ++ad)
             {
-                const int T2 = GlobalC::GridD.getType(ad);
-                const int I2 = GlobalC::GridD.getNatom(ad);
+                const int T2 = gd.getType(ad);
+                const int I2 = gd.getNatom(ad);
                 Atom* atom2 = &ucell.atoms[T2];
 
-                tau2 = GlobalC::GridD.getAdjacentTau(ad);
+                tau2 = gd.getAdjacentTau(ad);
                 dtau = tau2 - tau1;
                 double distance = dtau.norm() * ucell.lat0;
                 double rcut = orb_cutoff_[T1] + orb_cutoff_[T2];
@@ -185,12 +193,12 @@ void DFTU::folding_matrix_k(
                 }
                 else if (distance >= rcut)
                 {
-                    for (int ad0 = 0; ad0 < GlobalC::GridD.getAdjacentNum() + 1; ++ad0)
+                    for (int ad0 = 0; ad0 < gd.getAdjacentNum() + 1; ++ad0)
                     {
-                        const int T0 = GlobalC::GridD.getType(ad0);
-                        const int I0 = GlobalC::GridD.getNatom(ad0);
+                        const int T0 = gd.getType(ad0);
+                        const int I0 = gd.getNatom(ad0);
 
-                        tau0 = GlobalC::GridD.getAdjacentTau(ad0);
+                        tau0 = gd.getAdjacentTau(ad0);
                         dtau1 = tau0 - tau1;
                         dtau2 = tau0 - tau2;
 
@@ -216,9 +224,7 @@ void DFTU::folding_matrix_k(
                     // exp(k dot dR)
                     // dR is the index of box in Crystal coordinates
                     //------------------------------------------------
-                    ModuleBase::Vector3<double> dR(GlobalC::GridD.getBox(ad).x,
-                                                   GlobalC::GridD.getBox(ad).y,
-                                                   GlobalC::GridD.getBox(ad).z);
+                    ModuleBase::Vector3<double> dR(gd.getBox(ad).x, gd.getBox(ad).y, gd.getBox(ad).z);
                     const double arg = (kvec_d[ik] * dR) * ModuleBase::TWO_PI;
                     const std::complex<double> kphase = std::complex<double>(cos(arg), sin(arg));
 
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_force.cpp b/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
index 3ab4ef2496..1903a33899 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
@@ -73,6 +73,7 @@ namespace ModuleDFTU
 {
 
 void DFTU::force_stress(const UnitCell& ucell,
+                        Grid_Driver& gd,
                         const elecstate::ElecState* pelec,
                         const Parallel_Orbitals& pv,
                         ForceStressArrays& fsr, // mohan add 2024-06-16
@@ -149,7 +150,7 @@ void DFTU::force_stress(const UnitCell& ucell,
             {
                 this->cal_stress_gamma(ucell,
                                        pv,
-                                       &GlobalC::GridD,
+                                       &gd,
                                        fsr.DSloc_x,
                                        fsr.DSloc_y,
                                        fsr.DSloc_z,
@@ -208,11 +209,11 @@ void DFTU::force_stress(const UnitCell& ucell,
 
             if (PARAM.inp.cal_force)
             {
-                cal_force_k(ucell,fsr, pv, ik, &rho_VU[0], force_dftu, kv.kvec_d);
+                cal_force_k(ucell, gd, fsr, pv, ik, &rho_VU[0], force_dftu, kv.kvec_d);
             }
             if (PARAM.inp.cal_stress)
             {
-                cal_stress_k(ucell,fsr, pv, ik, &rho_VU[0], stress_dftu, kv.kvec_d);
+                cal_stress_k(ucell, gd, fsr, pv, ik, &rho_VU[0], stress_dftu, kv.kvec_d);
             }
         } // ik
     }
@@ -249,6 +250,7 @@ void DFTU::force_stress(const UnitCell& ucell,
 }
 
 void DFTU::cal_force_k(const UnitCell& ucell,
+                       Grid_Driver& gd,
                        ForceStressArrays& fsr,
                        const Parallel_Orbitals& pv,
                        const int ik,
@@ -270,7 +272,7 @@ void DFTU::cal_force_k(const UnitCell& ucell,
 
     for (int dim = 0; dim < 3; dim++)
     {
-        this->folding_matrix_k(ucell,fsr, pv, ik, dim + 1, 0, &dSm_k[0], kvec_d);
+        this->folding_matrix_k(ucell, gd, fsr, pv, ik, dim + 1, 0, &dSm_k[0], kvec_d);
 
 #ifdef __MPI
         pzgemm_(&transN,
@@ -378,6 +380,7 @@ void DFTU::cal_force_k(const UnitCell& ucell,
 }
 
 void DFTU::cal_stress_k(const UnitCell& ucell,
+                        Grid_Driver& gd,
                         ForceStressArrays& fsr,
                         const Parallel_Orbitals& pv,
                         const int ik,
@@ -403,14 +406,7 @@ void DFTU::cal_stress_k(const UnitCell& ucell,
     {
         for (int dim2 = dim1; dim2 < 3; dim2++)
         {
-            this->folding_matrix_k(ucell,
-                                   fsr, // mohan add 2024-06-16
-                                   pv,
-                                   ik,
-                                   dim1 + 4,
-                                   dim2,
-                                   &dSR_k[0],
-                                   kvec_d);
+            this->folding_matrix_k(ucell, gd, fsr, pv, ik, dim1 + 4, dim2, &dSR_k[0], kvec_d);
 
 #ifdef __MPI
             pzgemm_(&transN,
diff --git a/source/module_io/berryphase.cpp b/source/module_io/berryphase.cpp
index a8b5eef5b3..d2ce73230d 100644
--- a/source/module_io/berryphase.cpp
+++ b/source/module_io/berryphase.cpp
@@ -40,11 +40,15 @@ void berryphase::get_occupation_bands()
 }
 
 #ifdef __LCAO
-void berryphase::lcao_init(const UnitCell& ucell, const K_Vectors& kv, const Grid_Technique& grid_tech, const LCAO_Orbitals& orb)
+void berryphase::lcao_init(const UnitCell& ucell,
+                           Grid_Driver& gd,
+                           const K_Vectors& kv,
+                           const Grid_Technique& grid_tech,
+                           const LCAO_Orbitals& orb)
 {
     ModuleBase::TITLE("berryphase", "lcao_init");
     lcao_method.init(ucell,grid_tech, kv.get_nkstot(), orb);
-    lcao_method.cal_R_number(ucell);
+    lcao_method.cal_R_number(ucell, gd);
     lcao_method.cal_orb_overlap(ucell);
     return;
 }
diff --git a/source/module_io/berryphase.h b/source/module_io/berryphase.h
index 7a3707c711..c0fbe215be 100644
--- a/source/module_io/berryphase.h
+++ b/source/module_io/berryphase.h
@@ -37,7 +37,11 @@ class berryphase
 
     void get_occupation_bands();
 #ifdef __LCAO
-    void lcao_init(const UnitCell& ucell, const K_Vectors& kv, const Grid_Technique& grid_tech, const LCAO_Orbitals& orb);
+    void lcao_init(const UnitCell& ucell,
+                   Grid_Driver& gd,
+                   const K_Vectors& kv,
+                   const Grid_Technique& grid_tech,
+                   const LCAO_Orbitals& orb);
 #endif
     void set_kpoints(const K_Vectors& kv, const int direction);
 
diff --git a/source/module_io/cal_r_overlap_R.cpp b/source/module_io/cal_r_overlap_R.cpp
index b43c181576..2758ad131d 100644
--- a/source/module_io/cal_r_overlap_R.cpp
+++ b/source/module_io/cal_r_overlap_R.cpp
@@ -239,20 +239,20 @@ void cal_r_overlap_R::init(const UnitCell& ucell,const Parallel_Orbitals& pv, co
     return;
 }
 
-void cal_r_overlap_R::out_rR(const UnitCell& ucell, const int& istep)
+void cal_r_overlap_R::out_rR(const UnitCell& ucell, Grid_Driver& gd, const int& istep)
 {
     ModuleBase::TITLE("cal_r_overlap_R", "out_rR");
     ModuleBase::timer::tick("cal_r_overlap_R", "out_rR");
 
     int step = istep;
     // set R coor range
-    int R_minX = int(-GlobalC::GridD.getTrueCellX());
-    int R_minY = int(-GlobalC::GridD.getTrueCellY());
-    int R_minZ = int(-GlobalC::GridD.getTrueCellZ());
+    int R_minX = int(-gd.getTrueCellX());
+    int R_minY = int(-gd.getTrueCellY());
+    int R_minZ = int(-gd.getTrueCellZ());
 
-    int R_x = GlobalC::GridD.getCellX();
-    int R_y = GlobalC::GridD.getCellY();
-    int R_z = GlobalC::GridD.getCellZ();
+    int R_x = gd.getCellX();
+    int R_y = gd.getCellY();
+    int R_z = gd.getCellZ();
 
     std::set<Abfs::Vector3_Order<int>> all_R_coor;
     for (int ix = 0; ix < R_x; ix++)
diff --git a/source/module_io/cal_r_overlap_R.h b/source/module_io/cal_r_overlap_R.h
index ae23718148..69a4d2c7fb 100644
--- a/source/module_io/cal_r_overlap_R.h
+++ b/source/module_io/cal_r_overlap_R.h
@@ -9,11 +9,13 @@
 #include "module_basis/module_ao/ORB_gaunt_table.h"
 #include "module_basis/module_ao/ORB_read.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_cell/unitcell.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/center2_orb-orb11.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/center2_orb-orb21.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/center2_orb.h"
 #include "single_R_io.h"
-#include "module_cell/unitcell.h"
+
 #include <map>
 #include <set>
 #include <vector>
@@ -31,7 +33,7 @@ class cal_r_overlap_R
     bool binary = false;
 
     void init(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb);
-    void out_rR(const UnitCell& ucell, const int& istep);
+    void out_rR(const UnitCell& ucell, Grid_Driver& gd, const int& istep);
     void out_rR_other(const UnitCell& ucell, const int& istep, const std::set<Abfs::Vector3_Order<int>>& output_R_coor);
 
   private:
diff --git a/source/module_io/output_mat_sparse.cpp b/source/module_io/output_mat_sparse.cpp
index d3a6f6e1b1..f12fd69d3e 100644
--- a/source/module_io/output_mat_sparse.cpp
+++ b/source/module_io/output_mat_sparse.cpp
@@ -80,7 +80,7 @@ void output_mat_sparse(const bool& out_mat_hsR,
         }
         else
         {
-            r_matrix.out_rR(ucell,istep);
+            r_matrix.out_rR(ucell, grid, istep);
         }
     }
 
diff --git a/source/module_io/output_mulliken.h b/source/module_io/output_mulliken.h
index 2d78d2fa52..1bcea3a21f 100644
--- a/source/module_io/output_mulliken.h
+++ b/source/module_io/output_mulliken.h
@@ -93,6 +93,7 @@ void cal_mag(Parallel_Orbitals* pv,
              const TwoCenterBundle& two_center_bundle,
              const LCAO_Orbitals& orb,
              UnitCell& ucell,
+             Grid_Driver& gd,
              const int istep,
              const bool print)
 {
@@ -134,15 +135,14 @@ void cal_mag(Parallel_Orbitals* pv,
         auto atomLabels = ucell.get_atomLabels();
         if(PARAM.inp.nspin == 2)
         {
-            auto sc_lambda = new hamilt::DeltaSpin<hamilt::OperatorLCAO<TK, double>>(
-                    nullptr,
-                    kv.kvec_d,
-                    nullptr,
-                    ucell,
-                    &GlobalC::GridD,
-                    two_center_bundle.overlap_orb_onsite.get(),
-                    orb.cutoffs()
-            );
+            auto sc_lambda
+                = new hamilt::DeltaSpin<hamilt::OperatorLCAO<TK, double>>(nullptr,
+                                                                          kv.kvec_d,
+                                                                          nullptr,
+                                                                          ucell,
+                                                                          &gd,
+                                                                          two_center_bundle.overlap_orb_onsite.get(),
+                                                                          orb.cutoffs());
             dynamic_cast<const elecstate::ElecStateLCAO<TK>*>(pelec)->get_DM()->switch_dmr(2);
             moments = sc_lambda->cal_moment(dmr, constrain);
             dynamic_cast<const elecstate::ElecStateLCAO<TK>*>(pelec)->get_DM()->switch_dmr(0);
@@ -162,14 +162,13 @@ void cal_mag(Parallel_Orbitals* pv,
         else if(PARAM.inp.nspin == 4)
         {
             auto sc_lambda = new hamilt::DeltaSpin<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(
-                    nullptr,
-                    kv.kvec_d,
-                    nullptr,
-                    ucell,
-                    &GlobalC::GridD,
-                    two_center_bundle.overlap_orb_onsite.get(),
-                    orb.cutoffs()
-            );
+                nullptr,
+                kv.kvec_d,
+                nullptr,
+                ucell,
+                &gd,
+                two_center_bundle.overlap_orb_onsite.get(),
+                orb.cutoffs());
             moments = sc_lambda->cal_moment(dmr, constrain);
             delete sc_lambda;
             //const std::vector<std::string> title = {"Total Magnetism (uB)", "", "", ""};
diff --git a/source/module_io/td_current_io.cpp b/source/module_io/td_current_io.cpp
index 96272a70e7..9534107da2 100644
--- a/source/module_io/td_current_io.cpp
+++ b/source/module_io/td_current_io.cpp
@@ -118,6 +118,7 @@ void ModuleIO::cal_tmp_DM(elecstate::DensityMatrix<std::complex<double>, double>
 }
 
 void ModuleIO::write_current(const UnitCell& ucell,
+                             Grid_Driver& gd,
                              const int istep,
                              const psi::Psi<std::complex<double>>* psi,
                              const elecstate::ElecState* pelec,
@@ -134,7 +135,7 @@ void ModuleIO::write_current(const UnitCell& ucell,
     std::vector<hamilt::HContainer<std::complex<double>>*> current_term = {nullptr, nullptr, nullptr};
     if (!TD_Velocity::tddft_velocity)
     {
-        cal_current = new TD_current(&ucell, &GlobalC::GridD, pv, orb, intor);
+        cal_current = new TD_current(&ucell, &gd, pv, orb, intor);
         cal_current->calculate_vcomm_r();
         cal_current->calculate_grad_term();
         for (int dir = 0; dir < 3; dir++)
diff --git a/source/module_io/td_current_io.h b/source/module_io/td_current_io.h
index 40aed95214..709a85c4d6 100644
--- a/source/module_io/td_current_io.h
+++ b/source/module_io/td_current_io.h
@@ -11,6 +11,7 @@ namespace ModuleIO
 #ifdef __LCAO
 /// @brief func to output current, only used in tddft
 void write_current(const UnitCell& ucell,
+                   Grid_Driver& gd,
                    const int istep,
                    const psi::Psi<std::complex<double>>* psi,
                    const elecstate::ElecState* pelec,
diff --git a/source/module_io/to_wannier90_lcao.cpp b/source/module_io/to_wannier90_lcao.cpp
index 72651d6991..3ecad5ea36 100644
--- a/source/module_io/to_wannier90_lcao.cpp
+++ b/source/module_io/to_wannier90_lcao.cpp
@@ -39,6 +39,7 @@ toWannier90_LCAO::~toWannier90_LCAO()
 }
 
 void toWannier90_LCAO::calculate(const UnitCell& ucell,
+                                 Grid_Driver& gd,
                                  const ModuleBase::matrix& ekb,
                                  const K_Vectors& kv,
                                  const psi::Psi<std::complex<double>>& psi,
@@ -114,7 +115,7 @@ void toWannier90_LCAO::calculate(const UnitCell& ucell,
 
         initialize_orb_table(ucell);
         produce_basis_orb();
-        set_R_coor(ucell);
+        set_R_coor(ucell, gd);
         count_delta_k(ucell,kv);
     }
 
@@ -138,7 +139,7 @@ void toWannier90_LCAO::calculate(const UnitCell& ucell,
                 return exp_idkr;
             };
 
-            FR[i].set_parameters(fr_ptr[i], &ucell, &orb_, &GlobalC::GridD, ParaV, 140, 110);
+            FR[i].set_parameters(fr_ptr[i], &ucell, &orb_, &gd, ParaV, 140, 110);
             FR[i].calculate_FR();
         }
 
@@ -303,15 +304,15 @@ void toWannier90_LCAO::initialize_orb_table(const UnitCell& ucell)
 #endif
 }
 
-void toWannier90_LCAO::set_R_coor(const UnitCell& ucell)
+void toWannier90_LCAO::set_R_coor(const UnitCell& ucell, const Grid_Driver& gd)
 {
-    int R_minX = int(-GlobalC::GridD.getTrueCellX());
-    int R_minY = int(-GlobalC::GridD.getTrueCellY());
-    int R_minZ = int(-GlobalC::GridD.getTrueCellZ());
+    int R_minX = int(-gd.getTrueCellX());
+    int R_minY = int(-gd.getTrueCellY());
+    int R_minZ = int(-gd.getTrueCellZ());
 
-    int R_x = GlobalC::GridD.getCellX();
-    int R_y = GlobalC::GridD.getCellY();
-    int R_z = GlobalC::GridD.getCellZ();
+    int R_x = gd.getCellX();
+    int R_y = gd.getCellY();
+    int R_z = gd.getCellZ();
 
     int R_num = R_x * R_y * R_z;
     R_coor_car.resize(R_num);
diff --git a/source/module_io/to_wannier90_lcao.h b/source/module_io/to_wannier90_lcao.h
index 34d0e64dbb..9a28e71d56 100644
--- a/source/module_io/to_wannier90_lcao.h
+++ b/source/module_io/to_wannier90_lcao.h
@@ -80,12 +80,14 @@ class toWannier90_LCAO : public toWannier90
     ~toWannier90_LCAO();
 
     void calculate(const UnitCell& ucell,
+                   Grid_Driver& gd,
                    const ModuleBase::matrix& ekb,
                    const K_Vectors& kv,
                    const psi::Psi<std::complex<double>>& psi,
                    const Parallel_Orbitals* pv);
 
     void calculate(const UnitCell& ucell,
+                   Grid_Driver& gd,
                    const ModuleBase::matrix& ekb,
                    const K_Vectors& kv,
                    const psi::Psi<double>& psi,
@@ -131,7 +133,7 @@ class toWannier90_LCAO : public toWannier90
 
     void initialize_orb_table(const UnitCell& ucell);
     void produce_basis_orb();
-    void set_R_coor(const UnitCell& ucell);
+    void set_R_coor(const UnitCell& ucell, const Grid_Driver& gd);
     void count_delta_k(const UnitCell& ucell, const K_Vectors& kv);
 
     std::vector<Coordinate_3D> delta_k_all;
diff --git a/source/module_io/unk_overlap_lcao.cpp b/source/module_io/unk_overlap_lcao.cpp
index b784f54a1c..78dbba044e 100644
--- a/source/module_io/unk_overlap_lcao.cpp
+++ b/source/module_io/unk_overlap_lcao.cpp
@@ -360,7 +360,7 @@ int unkOverlap_lcao::iw2im(const UnitCell& ucell, int iw)
 }
 
 // search for the nearest neighbor atoms
-void unkOverlap_lcao::cal_R_number(const UnitCell& ucell)
+void unkOverlap_lcao::cal_R_number(const UnitCell& ucell, Grid_Driver& gd)
 {
     // The number of overlaps between atomic orbitals 1 and atomic orbitals 2,
     // or the number of R, is empty when there is no overlap
@@ -377,18 +377,18 @@ void unkOverlap_lcao::cal_R_number(const UnitCell& ucell)
         for (int I1 = 0; I1 < atom1->na; ++I1)
         {
             tau1 = atom1->tau[I1];
-            GlobalC::GridD.Find_atom(ucell, tau1, T1, I1);
+            gd.Find_atom(ucell, tau1, T1, I1);
 
-            for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ++ad)
+            for (int ad = 0; ad < gd.getAdjacentNum() + 1; ++ad)
             {
-                const int T2 = GlobalC::GridD.getType(ad);
-                const int I2 = GlobalC::GridD.getNatom(ad);
+                const int T2 = gd.getType(ad);
+                const int I2 = gd.getNatom(ad);
                 Atom* atom2 = &ucell.atoms[T2];
-                const double R_direct_x = (double)GlobalC::GridD.getBox(ad).x;
-                const double R_direct_y = (double)GlobalC::GridD.getBox(ad).y;
-                const double R_direct_z = (double)GlobalC::GridD.getBox(ad).z;
+                const double R_direct_x = (double)gd.getBox(ad).x;
+                const double R_direct_y = (double)gd.getBox(ad).y;
+                const double R_direct_z = (double)gd.getBox(ad).z;
 
-                tau2 = GlobalC::GridD.getAdjacentTau(ad);
+                tau2 = gd.getAdjacentTau(ad);
                 dtau = tau2 - tau1;
                 double distance = dtau.norm() * ucell.lat0;
                 double rcut = rcut_orb_[T1] + rcut_orb_[T2];
diff --git a/source/module_io/unk_overlap_lcao.h b/source/module_io/unk_overlap_lcao.h
index 0ba664271e..2554da1142 100644
--- a/source/module_io/unk_overlap_lcao.h
+++ b/source/module_io/unk_overlap_lcao.h
@@ -54,7 +54,7 @@ class unkOverlap_lcao
     int iw2iL(const UnitCell& ucell, int iw);
     int iw2iN(const UnitCell& ucell, int iw);
     int iw2im(const UnitCell& ucell, int iw);
-    void cal_R_number(const UnitCell& ucell);
+    void cal_R_number(const UnitCell& ucell, Grid_Driver& gd);
     void cal_orb_overlap(const UnitCell& ucell);
     void prepare_midmatrix_pblas(const UnitCell& ucell,
                                  const int ik_L,
diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp
index 502ef01120..7b362242fe 100644
--- a/source/module_lr/esolver_lrtd_lcao.cpp
+++ b/source/module_lr/esolver_lrtd_lcao.cpp
@@ -146,6 +146,8 @@ LR::ESolver_LR<T, TR>::ESolver_LR(ModuleESolver::ESolver_KS_LCAO<T, TR>&& ks_sol
         throw std::invalid_argument("when lr_solver==spectrum, esolver_type must be set to `lr` to skip the KS calculation.");
 }
 
+    this->gd = std::move(ks_sol.gd);
+
     // xc kernel
     this->xc_kernel = inp.xc_kernel;
     std::transform(xc_kernel.begin(), xc_kernel.end(), xc_kernel.begin(), tolower);
@@ -326,11 +328,11 @@ LR::ESolver_LR<T, TR>::ESolver_LR(const Input_para& inp, UnitCell& ucell) : inpu
         ucell.infoNL.get_rcutmax_Beta(),
         PARAM.globalv.gamma_only_local);
     atom_arrange::search(PARAM.inp.search_pbc,
-        GlobalV::ofs_running,
-        GlobalC::GridD,
-        this->ucell,
-        search_radius,
-        PARAM.inp.test_atom_input);
+                         GlobalV::ofs_running,
+                         this->gd,
+                         this->ucell,
+                         search_radius,
+                         PARAM.inp.test_atom_input);
     this->set_gint();
     this->gint_->gridt = &this->gt_;
 
@@ -343,28 +345,28 @@ LR::ESolver_LR<T, TR>::ESolver_LR(const Input_para& inp, UnitCell& ucell) : inpu
 
     Gint_Tools::init_orb(dr_uniform, rcuts, ucell, orb, psi_u, dpsi_u, d2psi_u);
     this->gt_.set_pbc_grid(this->pw_rho->nx,
-        this->pw_rho->ny,
-        this->pw_rho->nz,
-        this->pw_big->bx,
-        this->pw_big->by,
-        this->pw_big->bz,
-        this->pw_big->nbx,
-        this->pw_big->nby,
-        this->pw_big->nbz,
-        this->pw_big->nbxx,
-        this->pw_big->nbzp_start,
-        this->pw_big->nbzp,
-        this->pw_rho->ny,
-        this->pw_rho->nplane,
-        this->pw_rho->startz_current,
-        ucell,
-        GlobalC::GridD,
-        dr_uniform,
-        rcuts,
-        psi_u,
-        dpsi_u,
-        d2psi_u,
-        PARAM.inp.nstream);
+                           this->pw_rho->ny,
+                           this->pw_rho->nz,
+                           this->pw_big->bx,
+                           this->pw_big->by,
+                           this->pw_big->bz,
+                           this->pw_big->nbx,
+                           this->pw_big->nby,
+                           this->pw_big->nbz,
+                           this->pw_big->nbxx,
+                           this->pw_big->nbzp_start,
+                           this->pw_big->nbzp,
+                           this->pw_rho->ny,
+                           this->pw_rho->nplane,
+                           this->pw_rho->startz_current,
+                           ucell,
+                           this->gd,
+                           dr_uniform,
+                           rcuts,
+                           psi_u,
+                           dpsi_u,
+                           d2psi_u,
+                           PARAM.inp.nstream);
     psi_u.clear();
     psi_u.shrink_to_fit();
     dpsi_u.clear();
@@ -388,7 +390,7 @@ LR::ESolver_LR<T, TR>::ESolver_LR(const Input_para& inp, UnitCell& ucell) : inpu
         this->pw_rho->startz_current,
         &ucell,
         &orb);
-    this->gint_->initialize_pvpR(ucell, &GlobalC::GridD, 1);    // always use nspin=1 for transition density
+    this->gint_->initialize_pvpR(ucell, &this->gd, 1); // always use nspin=1 for transition density
 
     // if EXX from scratch, init 2-center integral and calculate Cs, Vs 
 #ifdef __EXX
@@ -435,11 +437,26 @@ void LR::ESolver_LR<T, TR>::runner(UnitCell& ucell, const int istep)
                 if (input.lr_solver != "lapack") { pre_op.act(1, offset_is, 1, precondition.data() + offset_is, precondition.data() + offset_is); }
             }
             std::cout << "Solving spin-conserving excitation for open-shell system." << std::endl;
-            HamiltULR<T> hulr(xc_kernel, nspin, this->nbasis, this->nocc, this->nvirt, this->ucell, orb_cutoff_, GlobalC::GridD, *this->psi_ks, this->eig_ks,
+            HamiltULR<T> hulr(xc_kernel,
+                              nspin,
+                              this->nbasis,
+                              this->nocc,
+                              this->nvirt,
+                              this->ucell,
+                              orb_cutoff_,
+                              this->gd,
+                              *this->psi_ks,
+                              this->eig_ks,
 #ifdef __EXX
-                this->exx_lri, this->exx_info.info_global.hybrid_alpha,
+                              this->exx_lri,
+                              this->exx_info.info_global.hybrid_alpha,
 #endif
-                this->gint_, this->pot, this->kv, this->paraX_, this->paraC_, this->paraMat_);
+                              this->gint_,
+                              this->pot,
+                              this->kv,
+                              this->paraX_,
+                              this->paraC_,
+                              this->paraMat_);
             LR::HSolver::solve(hulr, this->X[0].template data<T>(), nloc_per_band, nstates, this->pelec->ekb.c, this->input.lr_solver, this->input.lr_thr, precondition);
             if (input.out_wfc_lr) { write_states("openshell", this->pelec->ekb.c, this->X[0].template data<T>(), nloc_per_band, nstates); }
         }
@@ -451,12 +468,29 @@ void LR::ESolver_LR<T, TR>::runner(UnitCell& ucell, const int istep)
             for (int is = 0;is < nspin;++is)
             {
                 std::cout << "Calculating " << spin_types[is] << " excitations" << std::endl;
-                HamiltLR<T> hlr(xc_kernel, nspin, this->nbasis, this->nocc, this->nvirt, this->ucell, orb_cutoff_, GlobalC::GridD, *this->psi_ks, this->eig_ks,
+                HamiltLR<T> hlr(xc_kernel,
+                                nspin,
+                                this->nbasis,
+                                this->nocc,
+                                this->nvirt,
+                                this->ucell,
+                                orb_cutoff_,
+                                this->gd,
+                                *this->psi_ks,
+                                this->eig_ks,
 #ifdef __EXX
-                    this->exx_lri, this->exx_info.info_global.hybrid_alpha,
+                                this->exx_lri,
+                                this->exx_info.info_global.hybrid_alpha,
 #endif
-                    this->gint_, this->pot[is], this->kv, this->paraX_, this->paraC_, this->paraMat_,
-                    spin_types[is], input.ri_hartree_benchmark, (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector<int>({})));
+                                this->gint_,
+                                this->pot[is],
+                                this->kv,
+                                this->paraX_,
+                                this->paraC_,
+                                this->paraMat_,
+                                spin_types[is],
+                                input.ri_hartree_benchmark,
+                                (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector<int>({})));
                 // solve the Casida equation
                 LR::HSolver::solve(hlr, this->X[is].template data<T>(), nloc_per_band, nstates,
                     this->pelec->ekb.c + is * nstates, this->input.lr_solver, this->input.lr_thr, precondition/*,
@@ -505,10 +539,24 @@ void LR::ESolver_LR<T, TR>::after_all_runners(UnitCell& ucell)
     auto spin_types = (nspin == 2 && !openshell) ? std::vector<std::string>({ "singlet", "triplet" }) : std::vector<std::string>({ "updown" });
     for (int is = 0;is < this->X.size();++is)
     {
-        LR_Spectrum<T> spectrum(nspin, this->nbasis, this->nocc, this->nvirt, this->gint_, *this->pw_rho, *this->psi_ks,
-            this->ucell, this->kv, GlobalC::GridD, this->orb_cutoff_,
-            this->paraX_, this->paraC_, this->paraMat_,
-            &this->pelec->ekb.c[is * nstates], this->X[is].template data<T>(), nstates, openshell);
+        LR_Spectrum<T> spectrum(nspin,
+                                this->nbasis,
+                                this->nocc,
+                                this->nvirt,
+                                this->gint_,
+                                *this->pw_rho,
+                                *this->psi_ks,
+                                this->ucell,
+                                this->kv,
+                                this->gd,
+                                this->orb_cutoff_,
+                                this->paraX_,
+                                this->paraC_,
+                                this->paraMat_,
+                                &this->pelec->ekb.c[is * nstates],
+                                this->X[is].template data<T>(),
+                                nstates,
+                                openshell);
         spectrum.transition_analysis(spin_types[is]);
         spectrum.optical_absorption(freq, input.abs_broadening, spin_types[is]);
     }
diff --git a/source/module_lr/esolver_lrtd_lcao.h b/source/module_lr/esolver_lrtd_lcao.h
index 05e2e7507f..a4ff3f85f5 100644
--- a/source/module_lr/esolver_lrtd_lcao.h
+++ b/source/module_lr/esolver_lrtd_lcao.h
@@ -49,6 +49,7 @@ namespace LR
       protected:
         const Input_para& input;
         const UnitCell& ucell;
+        Grid_Driver gd;
         std::vector<double> orb_cutoff_;
 
         // not to use ElecState because 2-particle state is quite different from 1-particle state.
diff --git a/source/module_lr/operator_casida/operator_lr_hxc.cpp b/source/module_lr/operator_casida/operator_lr_hxc.cpp
index 52ba4fc751..22af349ca1 100644
--- a/source/module_lr/operator_casida/operator_lr_hxc.cpp
+++ b/source/module_lr/operator_casida/operator_lr_hxc.cpp
@@ -123,7 +123,7 @@ namespace LR
 
                 // LR_Util::print_HR(*this->gint->get_hRGint(), this->ucell.nat, "VR(grid)");
                 HR_real_imag.set_zero();
-                this->gint->transfer_pvpR(&HR_real_imag, &ucell, &GlobalC::GridD);
+                this->gint->transfer_pvpR(&HR_real_imag, &ucell, &this->gd);
                 // LR_Util::print_HR(HR_real_imag, this->ucell.nat, "VR(real, 2d)");
                 LR_Util::set_HR_real_imag_part(HR_real_imag, *this->hR, ucell.nat, type);
             };
diff --git a/source/module_rdmft/rdmft.cpp b/source/module_rdmft/rdmft.cpp
index 3a7f515c9b..7e68635869 100644
--- a/source/module_rdmft/rdmft.cpp
+++ b/source/module_rdmft/rdmft.cpp
@@ -55,15 +55,16 @@ RDMFT<TK, TR>::~RDMFT()
 }
 
 template <typename TK, typename TR>
-void RDMFT<TK, TR>::init(Gint_Gamma& GG_in, 
-                         Gint_k& GK_in, 
-                         Parallel_Orbitals& ParaV_in, 
+void RDMFT<TK, TR>::init(Gint_Gamma& GG_in,
+                         Gint_k& GK_in,
+                         Parallel_Orbitals& ParaV_in,
                          UnitCell& ucell_in,
-                         K_Vectors& kv_in, 
-                         elecstate::ElecState& pelec_in, 
-                         LCAO_Orbitals& orb_in, 
-                         TwoCenterBundle& two_center_bundle_in, 
-                         std::string XC_func_rdmft_in, 
+                         Grid_Driver& gd_in,
+                         K_Vectors& kv_in,
+                         elecstate::ElecState& pelec_in,
+                         LCAO_Orbitals& orb_in,
+                         TwoCenterBundle& two_center_bundle_in,
+                         std::string XC_func_rdmft_in,
                          double alpha_power_in)
 {
     GG = &GG_in;
@@ -74,6 +75,7 @@ void RDMFT<TK, TR>::init(Gint_Gamma& GG_in,
     charge = pelec_in.charge;
     pelec = &pelec_in;
     orb = &orb_in;
+    gd = &gd_in;
     two_center_bundle = &two_center_bundle_in;
     XC_func_rdmft = XC_func_rdmft_in;
     alpha_power = alpha_power_in;
diff --git a/source/module_rdmft/rdmft.h b/source/module_rdmft/rdmft.h
index 5f4c5848ef..2d9861abf0 100644
--- a/source/module_rdmft/rdmft.h
+++ b/source/module_rdmft/rdmft.h
@@ -55,7 +55,7 @@ class RDMFT
     elecstate::ElecState* pelec = nullptr;
 
     //! update after ion step
-    const K_Vectors* kv = nullptr; 
+    const K_Vectors* kv = nullptr;
 
     int nk_total = 0;
     int nbands_total;
@@ -81,8 +81,17 @@ class RDMFT
     // std::vector<double> E_RDMFT(4);
 
     //! initialization of rdmft calculation
-    void init(Gint_Gamma& GG_in, Gint_k& GK_in, Parallel_Orbitals& ParaV_in, UnitCell& ucell_in,
-                        K_Vectors& kv_in, elecstate::ElecState& pelec_in, LCAO_Orbitals& orb_in, TwoCenterBundle& two_center_bundle_in, std::string XC_func_rdmft_in, double alpha_power_in);
+    void init(Gint_Gamma& GG_in,
+              Gint_k& GK_in,
+              Parallel_Orbitals& ParaV_in,
+              UnitCell& ucell_in,
+              Grid_Driver& gd_in,
+              K_Vectors& kv_in,
+              elecstate::ElecState& pelec_in,
+              LCAO_Orbitals& orb_in,
+              TwoCenterBundle& two_center_bundle_in,
+              std::string XC_func_rdmft_in,
+              double alpha_power_in);
 
     //! update in ion-step and get V_TV
     void update_ion(UnitCell& ucell_in, ModulePW::PW_Basis& rho_basis_in,
@@ -189,6 +198,7 @@ class RDMFT
 
     // update after ion step
     const UnitCell* ucell = nullptr;
+    Grid_Driver* gd = nullptr;
     const ModulePW::PW_Basis* rho_basis = nullptr;
     const ModuleBase::matrix* vloc = nullptr;
     const ModuleBase::ComplexMatrix* sf = nullptr;
diff --git a/source/module_rdmft/rdmft_pot.cpp b/source/module_rdmft/rdmft_pot.cpp
index f405364abe..06d9c5a3ff 100644
--- a/source/module_rdmft/rdmft_pot.cpp
+++ b/source/module_rdmft/rdmft_pot.cpp
@@ -50,64 +50,56 @@ template <typename TK, typename TR>
 void RDMFT<TK, TR>::cal_V_TV()
 {
     HR_TV->set_zero();
-    
-    V_ekinetic_potential = new hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>(
-        hsk_TV,
-        kv->kvec_d,
-        HR_TV,
-        &GlobalC::ucell,
-        orb->cutoffs(),
-        &GlobalC::GridD,
-        two_center_bundle->kinetic_orb.get()
-    );
-
-    V_nonlocal = new hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>(
-        hsk_TV,
-        kv->kvec_d,
-        HR_TV,
-        &GlobalC::ucell,
-        orb->cutoffs(),
-        &GlobalC::GridD,
-        two_center_bundle->overlap_orb_beta.get()
-    );
+
+    V_ekinetic_potential = new hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>(hsk_TV,
+                                                                                 kv->kvec_d,
+                                                                                 HR_TV,
+                                                                                 &GlobalC::ucell,
+                                                                                 orb->cutoffs(),
+                                                                                 this->gd,
+                                                                                 two_center_bundle->kinetic_orb.get());
+
+    V_nonlocal = new hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>(hsk_TV,
+                                                                       kv->kvec_d,
+                                                                       HR_TV,
+                                                                       &GlobalC::ucell,
+                                                                       orb->cutoffs(),
+                                                                       this->gd,
+                                                                       two_center_bundle->overlap_orb_beta.get());
 
     if( PARAM.inp.gamma_only )
     {
-        V_local = new rdmft::Veff_rdmft<TK,TR>(
-            GG,
-            hsk_TV,
-            kv->kvec_d,
-            this->pelec->pot,
-            HR_TV,
-            &GlobalC::ucell,
-            orb->cutoffs(),
-            &GlobalC::GridD,
-            nspin,
-            charge,
-            rho_basis,
-            vloc,
-            sf,
-            "local"
-        );
+        V_local = new rdmft::Veff_rdmft<TK, TR>(GG,
+                                                hsk_TV,
+                                                kv->kvec_d,
+                                                this->pelec->pot,
+                                                HR_TV,
+                                                &GlobalC::ucell,
+                                                orb->cutoffs(),
+                                                this->gd,
+                                                nspin,
+                                                charge,
+                                                rho_basis,
+                                                vloc,
+                                                sf,
+                                                "local");
     }
     else
     {
-        V_local = new rdmft::Veff_rdmft<TK,TR>(
-            GK,
-            hsk_TV,
-            kv->kvec_d,
-            this->pelec->pot,
-            HR_TV,
-            &GlobalC::ucell,
-            orb->cutoffs(),
-            &GlobalC::GridD,
-            nspin,
-            charge,
-            rho_basis,
-            vloc,
-            sf,
-            "local"
-        );
+        V_local = new rdmft::Veff_rdmft<TK, TR>(GK,
+                                                hsk_TV,
+                                                kv->kvec_d,
+                                                this->pelec->pot,
+                                                HR_TV,
+                                                &GlobalC::ucell,
+                                                orb->cutoffs(),
+                                                this->gd,
+                                                nspin,
+                                                charge,
+                                                rho_basis,
+                                                vloc,
+                                                sf,
+                                                "local");
     }
 
     // update HR_TV in ion-step, now HR_TV has the HR of V_ekinetic + V_nonlcao + V_local
@@ -125,42 +117,38 @@ void RDMFT<TK, TR>::cal_V_hartree()
 
     if( PARAM.inp.gamma_only )
     {
-        V_hartree = new rdmft::Veff_rdmft<TK,TR>(
-            GG,
-            hsk_hartree,
-            kv->kvec_d,
-            this->pelec->pot,
-            HR_hartree,
-            &GlobalC::ucell,
-            orb->cutoffs(),
-            &GlobalC::GridD,
-            nspin,
-            charge,
-            rho_basis,
-            vloc,
-            sf,
-            "hartree"
-        );
+        V_hartree = new rdmft::Veff_rdmft<TK, TR>(GG,
+                                                  hsk_hartree,
+                                                  kv->kvec_d,
+                                                  this->pelec->pot,
+                                                  HR_hartree,
+                                                  &GlobalC::ucell,
+                                                  orb->cutoffs(),
+                                                  this->gd,
+                                                  nspin,
+                                                  charge,
+                                                  rho_basis,
+                                                  vloc,
+                                                  sf,
+                                                  "hartree");
     }
     else
     {
         // this can be optimized, use potHartree.update_from_charge()
-        V_hartree = new rdmft::Veff_rdmft<TK,TR>(
-            GK,
-            hsk_hartree,
-            kv->kvec_d,
-            this->pelec->pot,
-            HR_hartree,
-            &GlobalC::ucell,
-            orb->cutoffs(),
-            &GlobalC::GridD,
-            nspin,
-            charge,
-            rho_basis,
-            vloc,
-            sf,
-            "hartree"
-        );
+        V_hartree = new rdmft::Veff_rdmft<TK, TR>(GK,
+                                                  hsk_hartree,
+                                                  kv->kvec_d,
+                                                  this->pelec->pot,
+                                                  HR_hartree,
+                                                  &GlobalC::ucell,
+                                                  orb->cutoffs(),
+                                                  this->gd,
+                                                  nspin,
+                                                  charge,
+                                                  rho_basis,
+                                                  vloc,
+                                                  sf,
+                                                  "hartree");
     }
 
     // in gamma only, must calculate HR_hartree before HR_local
@@ -182,7 +170,7 @@ void RDMFT<TK, TR>::cal_V_XC(const UnitCell& ucell)
 
     // elecstate::DensityMatrix<TK, double> DM_test(ParaV, nspin, kv->kvec_d, nk_total);
     // elecstate::cal_dm_psi(ParaV, wg, wfc, DM_test);
-    // DM_test.init_DMR(&GlobalC::GridD, &GlobalC::ucell);
+    // DM_test.init_DMR(this->gd, &GlobalC::ucell);
     // DM_test.cal_DMR();
 
     // // compare DM_XC and DM get in update_charge(or ABACUS)
@@ -209,46 +197,42 @@ void RDMFT<TK, TR>::cal_V_XC(const UnitCell& ucell)
         if( PARAM.inp.gamma_only )
         {
             // this can be optimized, use potXC.update_from_charge()
-            V_dft_XC = new rdmft::Veff_rdmft<TK,TR>(
-                GG,
-                hsk_dft_XC,
-                kv->kvec_d,
-                this->pelec->pot,
-                HR_dft_XC,
-                &GlobalC::ucell,
-                orb->cutoffs(),
-                &GlobalC::GridD,
-                nspin,
-                charge,
-                rho_basis,
-                vloc,
-                sf,
-                "xc",
-                &etxc,
-                &vtxc
-            );
+            V_dft_XC = new rdmft::Veff_rdmft<TK, TR>(GG,
+                                                     hsk_dft_XC,
+                                                     kv->kvec_d,
+                                                     this->pelec->pot,
+                                                     HR_dft_XC,
+                                                     &GlobalC::ucell,
+                                                     orb->cutoffs(),
+                                                     this->gd,
+                                                     nspin,
+                                                     charge,
+                                                     rho_basis,
+                                                     vloc,
+                                                     sf,
+                                                     "xc",
+                                                     &etxc,
+                                                     &vtxc);
         }
         else
         {   
             // this can be optimized, use potXC.update_from_charge()
-            V_dft_XC = new rdmft::Veff_rdmft<TK,TR>(
-                GK,
-                hsk_dft_XC,
-                kv->kvec_d,
-                this->pelec->pot,
-                HR_dft_XC,
-                &GlobalC::ucell,
-                orb->cutoffs(),
-                &GlobalC::GridD,
-                nspin,
-                charge,
-                rho_basis,
-                vloc,
-                sf,
-                "xc",
-                &etxc,
-                &vtxc
-            );
+            V_dft_XC = new rdmft::Veff_rdmft<TK, TR>(GK,
+                                                     hsk_dft_XC,
+                                                     kv->kvec_d,
+                                                     this->pelec->pot,
+                                                     HR_dft_XC,
+                                                     &GlobalC::ucell,
+                                                     orb->cutoffs(),
+                                                     this->gd,
+                                                     nspin,
+                                                     charge,
+                                                     rho_basis,
+                                                     vloc,
+                                                     sf,
+                                                     "xc",
+                                                     &etxc,
+                                                     &vtxc);
         }
         V_dft_XC->contributeHR();
     }
diff --git a/source/module_rdmft/update_state_rdmft.cpp b/source/module_rdmft/update_state_rdmft.cpp
index 4f2d329377..db5f47b6da 100644
--- a/source/module_rdmft/update_state_rdmft.cpp
+++ b/source/module_rdmft/update_state_rdmft.cpp
@@ -98,7 +98,7 @@ void RDMFT<TK, TR>::update_charge()
         // calculate DMK and DMR
         elecstate::DensityMatrix<TK, double> DM_gamma_only(ParaV, nspin);
         elecstate::cal_dm_psi(ParaV, wg, wfc, DM_gamma_only);
-        DM_gamma_only.init_DMR(&GlobalC::GridD, &GlobalC::ucell);
+        DM_gamma_only.init_DMR(this->gd, &GlobalC::ucell);
         DM_gamma_only.cal_DMR();
 
         for (int is = 0; is < nspin; is++)
@@ -128,7 +128,7 @@ void RDMFT<TK, TR>::update_charge()
         // calculate DMK and DMR
         elecstate::DensityMatrix<TK, double> DM(ParaV, nspin, kv->kvec_d, nk_total);
         elecstate::cal_dm_psi(ParaV, wg, wfc, DM);
-        DM.init_DMR(&GlobalC::GridD, &GlobalC::ucell);
+        DM.init_DMR(this->gd, &GlobalC::ucell);
         DM.cal_DMR();
 
         for (int is = 0; is < nspin; is++)

From 4891b2e4a46e8a7e0a01d5b00f08569f7a1d8d4e Mon Sep 17 00:00:00 2001
From: Yu Liu <77716030+YuLiu98@users.noreply.github.com>
Date: Fri, 13 Dec 2024 12:06:45 +0800
Subject: [PATCH 6/7] Refactor: add const for Grid_Driver (#5725)

* Refactor: add const for Grid_Driver

* add const for Find_atom() in unittests

* [pre-commit.ci lite] apply automatic fixes

---------

Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
---
 .../module_neighbor/sltk_grid_driver.cpp      | 12 +--
 .../module_neighbor/sltk_grid_driver.h        |  2 +-
 .../module_dm/density_matrix.h                |  2 +-
 .../module_dm/density_matrix_io.cpp           |  2 +-
 .../module_dm/test/test_dm_io.cpp             |  2 +-
 .../module_dm/test/tmp_mocks.cpp              |  2 +-
 .../module_hamilt_lcao/hamilt_lcaodft/FORCE.h |  4 +-
 .../hamilt_lcaodft/FORCE_STRESS.cpp           |  6 +-
 .../hamilt_lcaodft/FORCE_STRESS.h             |  4 +-
 .../hamilt_lcaodft/FORCE_gamma.cpp            |  4 +-
 .../hamilt_lcaodft/FORCE_k.cpp                |  4 +-
 .../hamilt_lcaodft/LCAO_domain.h              |  5 +-
 .../hamilt_lcaodft/LCAO_nl_mu.cpp             |  2 +-
 .../hamilt_lcaodft/LCAO_set_st.cpp            |  2 +-
 .../hamilt_lcaodft/hamilt_lcao.cpp            |  4 +-
 .../hamilt_lcaodft/hamilt_lcao.h              |  4 +-
 .../operator_lcao/deepks_lcao.cpp             |  4 +-
 .../operator_lcao/deepks_lcao.h               |  6 +-
 .../operator_lcao/dftu_lcao.cpp               |  4 +-
 .../hamilt_lcaodft/operator_lcao/dftu_lcao.h  |  4 +-
 .../operator_lcao/dspin_lcao.cpp              | 15 ++-
 .../hamilt_lcaodft/operator_lcao/dspin_lcao.h | 14 +--
 .../operator_lcao/ekinetic_new.cpp            |  4 +-
 .../operator_lcao/ekinetic_new.h              |  4 +-
 .../operator_lcao/nonlocal_new.cpp            |  4 +-
 .../operator_lcao/nonlocal_new.h              |  6 +-
 .../operator_lcao/overlap_new.cpp             |  4 +-
 .../operator_lcao/overlap_new.h               |  4 +-
 .../operator_lcao/td_ekinetic_lcao.cpp        |  4 +-
 .../operator_lcao/td_ekinetic_lcao.h          | 16 +--
 .../operator_lcao/td_nonlocal_lcao.cpp        |  4 +-
 .../operator_lcao/td_nonlocal_lcao.h          |  6 +-
 .../operator_lcao/test/tmp_mocks.cpp          |  3 +-
 .../operator_lcao/veff_lcao.cpp               |  3 +-
 .../hamilt_lcaodft/operator_lcao/veff_lcao.h  | 64 ++++++------
 .../hamilt_lcaodft/record_adj.cpp             |  4 +-
 .../hamilt_lcaodft/record_adj.h               |  4 +-
 .../hamilt_lcaodft/spar_dh.cpp                |  6 +-
 .../hamilt_lcaodft/spar_dh.h                  |  6 +-
 .../hamilt_lcaodft/spar_hsr.cpp               | 19 ++--
 .../hamilt_lcaodft/spar_hsr.h                 | 19 ++--
 .../hamilt_lcaodft/spar_st.cpp                | 22 ++---
 .../hamilt_lcaodft/spar_st.h                  | 13 +--
 .../module_deepks/LCAO_deepks.h               | 44 ++++-----
 .../module_deepks/LCAO_deepks_interface.cpp   | 28 +++---
 .../module_deepks/LCAO_deepks_interface.h     |  2 +-
 .../module_deepks/LCAO_deepks_pdm.cpp         | 14 +--
 .../module_deepks/LCAO_deepks_psialpha.cpp    |  4 +-
 .../module_deepks/LCAO_deepks_torch.cpp       | 37 +++----
 .../module_deepks/cal_gdmx.cpp                | 24 ++---
 .../module_deepks/deepks_fgamma.cpp           | 10 +-
 .../module_deepks/deepks_fk.cpp               | 14 +--
 .../module_deepks/deepks_force.h              | 64 ++++++------
 .../module_deepks/orbital_precalc.cpp         | 48 ++++-----
 .../module_deepks/test/LCAO_deepks_test.cpp   |  2 +-
 .../module_deepks/v_delta_precalc.cpp         | 35 +++----
 source/module_hamilt_lcao/module_dftu/dftu.h  | 31 +++---
 .../module_dftu/dftu_folding.cpp              | 23 +++--
 .../module_dftu/dftu_force.cpp                |  8 +-
 .../module_hamilt_lcao/module_gint/gint.cpp   |  3 +-
 source/module_hamilt_lcao/module_gint/gint.h  |  2 +-
 .../module_hamilt_lcao/module_gint/gint_k.h   |  9 +-
 .../module_gint/gint_k_pvpr.cpp               |  6 +-
 .../module_gint/gint_k_sparse1.cpp            |  2 +-
 .../module_gint/grid_technique.cpp            | 53 +++++-----
 .../module_gint/grid_technique.h              |  4 +-
 .../module_tddft/td_current.cpp               | 14 +--
 .../module_tddft/td_current.h                 |  8 +-
 source/module_io/berryphase.cpp               |  2 +-
 source/module_io/berryphase.h                 |  2 +-
 source/module_io/cal_r_overlap_R.cpp          |  2 +-
 source/module_io/cal_r_overlap_R.h            |  2 +-
 source/module_io/fR_overlap.cpp               | 18 ++--
 source/module_io/fR_overlap.h                 | 47 +++++----
 source/module_io/get_pchg_lcao.cpp            |  4 +-
 source/module_io/get_pchg_lcao.h              |  4 +-
 source/module_io/output_mat_sparse.cpp        |  4 +-
 source/module_io/output_mat_sparse.h          |  2 +-
 source/module_io/output_mulliken.h            |  2 +-
 source/module_io/td_current_io.cpp            |  2 +-
 source/module_io/td_current_io.h              |  2 +-
 source/module_io/to_wannier90_lcao.cpp        |  2 +-
 source/module_io/to_wannier90_lcao.h          |  4 +-
 source/module_io/unk_overlap_lcao.cpp         | 41 +++++---
 source/module_io/unk_overlap_lcao.h           |  2 +-
 source/module_io/write_HS_R.cpp               | 46 ++++-----
 source/module_io/write_HS_R.h                 | 38 +++----
 source/module_lr/hamilt_casida.h              | 47 ++++-----
 source/module_lr/lr_spectrum.cpp              |  9 +-
 source/module_lr/lr_spectrum.h                | 94 +++++++++++-------
 .../operator_casida/operator_lr_hxc.h         | 55 +++++------
 source/module_lr/utils/lr_util_hcontainer.h   | 15 ++-
 source/module_rdmft/rdmft.cpp                 |  2 +-
 source/module_rdmft/rdmft.h                   |  4 +-
 source/module_rdmft/rdmft_tools.cpp           |  3 +-
 source/module_rdmft/rdmft_tools.h             | 99 +++++++------------
 .../module_exx_symmetry/symmetry_rotation.cpp |  4 +-
 .../module_exx_symmetry/symmetry_rotation.h   |  4 +-
 98 files changed, 693 insertions(+), 685 deletions(-)

diff --git a/source/module_cell/module_neighbor/sltk_grid_driver.cpp b/source/module_cell/module_neighbor/sltk_grid_driver.cpp
index b4f3616fed..f493b726c6 100644
--- a/source/module_cell/module_neighbor/sltk_grid_driver.cpp
+++ b/source/module_cell/module_neighbor/sltk_grid_driver.cpp
@@ -21,13 +21,11 @@ Grid_Driver::~Grid_Driver()
 {
 }
 
-
-void Grid_Driver::Find_atom(
-	const UnitCell &ucell, 
-	const ModuleBase::Vector3<double> &cartesian_pos, 
-	const int &ntype, 
-	const int &nnumber,
-	AdjacentAtomInfo *adjs)
+void Grid_Driver::Find_atom(const UnitCell& ucell,
+                            const ModuleBase::Vector3<double>& cartesian_pos,
+                            const int& ntype,
+                            const int& nnumber,
+                            AdjacentAtomInfo* adjs) const
 {
 	ModuleBase::timer::tick("Grid_Driver","Find_atom");
 //	std::cout << "lenght in Find atom = " << atomlink[offset].fatom.getAdjacentSet()->getLength() << std::endl;
diff --git a/source/module_cell/module_neighbor/sltk_grid_driver.h b/source/module_cell/module_neighbor/sltk_grid_driver.h
index 4ea18c70de..ea4ada5f8d 100644
--- a/source/module_cell/module_neighbor/sltk_grid_driver.h
+++ b/source/module_cell/module_neighbor/sltk_grid_driver.h
@@ -70,7 +70,7 @@ class Grid_Driver : public Grid
                    const ModuleBase::Vector3<double>& cartesian_posi,
                    const int& ntype,
                    const int& nnumber,
-                   AdjacentAtomInfo* adjs = nullptr);
+                   AdjacentAtomInfo* adjs = nullptr) const;
 
     //==========================================================
     // EXPLAIN : The adjacent information for the input
diff --git a/source/module_elecstate/module_dm/density_matrix.h b/source/module_elecstate/module_dm/density_matrix.h
index d3a4d1f919..5b052d303a 100644
--- a/source/module_elecstate/module_dm/density_matrix.h
+++ b/source/module_elecstate/module_dm/density_matrix.h
@@ -66,7 +66,7 @@ class DensityMatrix
      * @param GridD_in pointer of Grid_Driver object (used to find ajacent atoms)
      * @param ucell pointer of UnitCell object
      */
-    void init_DMR(Grid_Driver* GridD_in, const UnitCell* ucell);
+    void init_DMR(const Grid_Driver* GridD_in, const UnitCell* ucell);
 
     /**
      * @brief initialize density matrix DMR from UnitCell and RA
diff --git a/source/module_elecstate/module_dm/density_matrix_io.cpp b/source/module_elecstate/module_dm/density_matrix_io.cpp
index 36548ae314..ceeff57a0e 100644
--- a/source/module_elecstate/module_dm/density_matrix_io.cpp
+++ b/source/module_elecstate/module_dm/density_matrix_io.cpp
@@ -12,7 +12,7 @@ namespace elecstate
 
 // initialize density matrix DMR from UnitCell (mainly used in UnitTest)
 template <typename TK, typename TR>
-void DensityMatrix<TK, TR>::init_DMR(Grid_Driver* GridD_in, const UnitCell* ucell)
+void DensityMatrix<TK, TR>::init_DMR(const Grid_Driver* GridD_in, const UnitCell* ucell)
 {
     ModuleBase::TITLE("DensityMatrix", "init_DMR");
     // ensure _DMR is empty
diff --git a/source/module_elecstate/module_dm/test/test_dm_io.cpp b/source/module_elecstate/module_dm/test/test_dm_io.cpp
index 901ea7bef8..56f47e36fe 100644
--- a/source/module_elecstate/module_dm/test/test_dm_io.cpp
+++ b/source/module_elecstate/module_dm/test/test_dm_io.cpp
@@ -48,7 +48,7 @@ void Grid_Driver::Find_atom(const UnitCell& ucell,
                             const ModuleBase::Vector3<double>& tau,
                             const int& T,
                             const int& I,
-                            AdjacentAtomInfo* adjs)
+                            AdjacentAtomInfo* adjs) const
 {
 }
 Grid::Grid(const int& test_grid_in) : test_grid(test_grid_in)
diff --git a/source/module_elecstate/module_dm/test/tmp_mocks.cpp b/source/module_elecstate/module_dm/test/tmp_mocks.cpp
index 3001dc4181..b9f1356ba5 100644
--- a/source/module_elecstate/module_dm/test/tmp_mocks.cpp
+++ b/source/module_elecstate/module_dm/test/tmp_mocks.cpp
@@ -78,7 +78,7 @@ void Grid_Driver::Find_atom(const UnitCell& ucell,
                             const ModuleBase::Vector3<double>& tau,
                             const int& T,
                             const int& I,
-                            AdjacentAtomInfo* adjs)
+                            AdjacentAtomInfo* adjs) const
 {
     adjs->adj_num = ucell.nat - 1;
     adjs->adjacent_tau.resize(ucell.nat);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h
index 3f6fc8f01d..9a4f74b68d 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h
@@ -51,7 +51,7 @@ class Force_LCAO
                 const bool isstress,
                 ForceStressArrays& fsr, // mohan add 2024-06-16
                 const UnitCell& ucell,
-                Grid_Driver& gd,
+                const Grid_Driver& gd,
                 const psi::Psi<T>* psi,
                 const elecstate::ElecState* pelec,
                 ModuleBase::matrix& foverlap,
@@ -74,7 +74,7 @@ class Force_LCAO
 
     // get the ds, dt, dvnl.
     void allocate(const UnitCell& ucell,
-                  Grid_Driver& gd,
+                  const Grid_Driver& gd,
                   const Parallel_Orbitals& pv,
                   ForceStressArrays& fsr, // mohan add 2024-06-15
                   const TwoCenterBundle& two_center_bundle,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
index c740a96550..edd8def7a8 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
@@ -36,7 +36,7 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
                                           const bool istestf,
                                           const bool istests,
                                           const UnitCell& ucell,
-                                          Grid_Driver& gd,
+                                          const Grid_Driver& gd,
                                           Parallel_Orbitals& pv,
                                           const elecstate::ElecState* pelec,
                                           const psi::Psi<T>* psi,
@@ -870,7 +870,7 @@ void Force_Stress_LCAO<double>::integral_part(const bool isGammaOnly,
                                               const bool isforce,
                                               const bool isstress,
                                               const UnitCell& ucell,
-                                              Grid_Driver& gd,
+                                              const Grid_Driver& gd,
                                               ForceStressArrays& fsr, // mohan add 2024-06-15
                                               const elecstate::ElecState* pelec,
                                               const psi::Psi<double>* psi,
@@ -923,7 +923,7 @@ void Force_Stress_LCAO<std::complex<double>>::integral_part(const bool isGammaOn
                                                             const bool isforce,
                                                             const bool isstress,
                                                             const UnitCell& ucell,
-                                                            Grid_Driver& gd,
+                                                            const Grid_Driver& gd,
                                                             ForceStressArrays& fsr, // mohan add 2024-06-15
                                                             const elecstate::ElecState* pelec,
                                                             const psi::Psi<std::complex<double>>* psi,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h
index 5593fd0afb..af6446ba7b 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h
@@ -34,7 +34,7 @@ class Force_Stress_LCAO
                         const bool istestf,
                         const bool istests,
                         const UnitCell& ucell,
-                        Grid_Driver& gd,
+                        const Grid_Driver& gd,
                         Parallel_Orbitals& pv,
                         const elecstate::ElecState* pelec,
                         const psi::Psi<T>* psi,
@@ -82,7 +82,7 @@ class Force_Stress_LCAO
                        const bool isforce,
                        const bool isstress,
                        const UnitCell& ucell,
-                       Grid_Driver& gd,
+                       const Grid_Driver& gd,
                        ForceStressArrays& fsr, // mohan add 2024-06-15
                        const elecstate::ElecState* pelec,
                        const psi::Psi<T>* psi,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
index 6542f2a0f6..e71dc14b72 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
@@ -17,7 +17,7 @@
 
 template <>
 void Force_LCAO<double>::allocate(const UnitCell& ucell,
-                                  Grid_Driver& gd,
+                                  const Grid_Driver& gd,
                                   const Parallel_Orbitals& pv,
                                   ForceStressArrays& fsr, // mohan add 2024-06-15
                                   const TwoCenterBundle& two_center_bundle,
@@ -176,7 +176,7 @@ void Force_LCAO<double>::ftable(const bool isforce,
                                 const bool isstress,
                                 ForceStressArrays& fsr, // mohan add 2024-06-16
                                 const UnitCell& ucell,
-                                Grid_Driver& gd,
+                                const Grid_Driver& gd,
                                 const psi::Psi<double>* psi,
                                 const elecstate::ElecState* pelec,
                                 ModuleBase::matrix& foverlap,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
index 6b88139fc4..e9203d6352 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
@@ -27,7 +27,7 @@
 
 template <>
 void Force_LCAO<std::complex<double>>::allocate(const UnitCell& ucell,
-                                                Grid_Driver& gd,
+                                                const Grid_Driver& gd,
                                                 const Parallel_Orbitals& pv,
                                                 ForceStressArrays& fsr, // mohan add 2024-06-15
                                                 const TwoCenterBundle& two_center_bundle,
@@ -272,7 +272,7 @@ void Force_LCAO<std::complex<double>>::ftable(const bool isforce,
                                               const bool isstress,
                                               ForceStressArrays& fsr, // mohan add 2024-06-15
                                               const UnitCell& ucell,
-                                              Grid_Driver& gd,
+                                              const Grid_Driver& gd,
                                               const psi::Psi<std::complex<double>>* psi,
                                               const elecstate::ElecState* pelec,
                                               ModuleBase::matrix& foverlap,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h
index f7a581b16c..4b02f1dc54 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h
@@ -25,7 +25,6 @@ void init_basis_lcao(Parallel_Orbitals& pv,
         TwoCenterBundle& two_center_bundle,
         LCAO_Orbitals& orb);
 
-
 void build_Nonlocal_mu_new(const Parallel_Orbitals& pv,
                            ForceStressArrays& fsr, // mohan 2024-06-16
                            double* HlocR,
@@ -33,7 +32,7 @@ void build_Nonlocal_mu_new(const Parallel_Orbitals& pv,
                            const UnitCell& ucell,
                            const LCAO_Orbitals& orb,
                            const TwoCenterIntegrator& intor_orb_beta,
-                           Grid_Driver* GridD);
+                           const Grid_Driver* GridD);
 
 /**
  * @brief prepare gird integration
@@ -164,7 +163,7 @@ void build_ST_new(ForceStressArrays& fsr,
                   const LCAO_Orbitals& orb,
                   const Parallel_Orbitals& pv,
                   const TwoCenterBundle& two_center_bundle,
-                  Grid_Driver* GridD,
+                  const Grid_Driver* GridD,
                   double* SHlocR,
                   bool cal_syns = false,
                   double dmax = 0.0);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_mu.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_mu.cpp
index ef90927746..be02ede822 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_mu.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_mu.cpp
@@ -16,7 +16,7 @@ void build_Nonlocal_mu_new(const Parallel_Orbitals& pv,
                            const UnitCell& ucell,
                            const LCAO_Orbitals& orb,
                            const TwoCenterIntegrator& intor_orb_beta,
-                           Grid_Driver* GridD)
+                           const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("LCAO_domain", "vnl_mu_new");
     ModuleBase::timer::tick("LCAO_domain", "vnl_mu_new");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_st.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_st.cpp
index 9d4c06988a..718d504615 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_st.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_st.cpp
@@ -316,7 +316,7 @@ void build_ST_new(ForceStressArrays& fsr,
                   const LCAO_Orbitals& orb,
                   const Parallel_Orbitals& pv,
                   const TwoCenterBundle& two_center_bundle,
-                  Grid_Driver* GridD,
+                  const Grid_Driver* GridD,
                   double* HSloc,
                   bool cal_syns,
                   double dmax)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
index b08da06af8..06ab104c84 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
@@ -45,7 +45,7 @@ namespace hamilt
 
 template <typename TK, typename TR>
 HamiltLCAO<TK, TR>::HamiltLCAO(const UnitCell& ucell,
-                               Grid_Driver& grid_d,
+                               const Grid_Driver& grid_d,
                                const Parallel_Orbitals* paraV,
                                const K_Vectors& kv_in,
                                const TwoCenterIntegrator& intor_overlap_orb,
@@ -74,7 +74,7 @@ template <typename TK, typename TR>
 HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
                                Gint_k* GK_in,
                                const UnitCell& ucell,
-                               Grid_Driver& grid_d,
+                               const Grid_Driver& grid_d,
                                const Parallel_Orbitals* paraV,
                                elecstate::Potential* pot_in,
                                const K_Vectors& kv_in,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
index ee05e59d34..65e95e22af 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
@@ -33,7 +33,7 @@ class HamiltLCAO : public Hamilt<TK>
     HamiltLCAO(Gint_Gamma* GG_in,
                Gint_k* GK_in,
                const UnitCell& ucell,
-               Grid_Driver& grid_d,
+               const Grid_Driver& grid_d,
                const Parallel_Orbitals* paraV,
                elecstate::Potential* pot_in,
                const K_Vectors& kv_in,
@@ -52,7 +52,7 @@ class HamiltLCAO : public Hamilt<TK>
      * @brief Constructor of vacuum Operators, only HR and SR will be initialed as empty HContainer
      */
     HamiltLCAO(const UnitCell& ucell,
-               Grid_Driver& grid_d,
+               const Grid_Driver& grid_d,
                const Parallel_Orbitals* paraV,
                const K_Vectors& kv_in,
                const TwoCenterIntegrator& intor_overlap_orb,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
index 476fa43ee7..18e19d071e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
@@ -20,7 +20,7 @@ DeePKS<OperatorLCAO<TK, TR>>::DeePKS(HS_Matrix_K<TK>* hsk_in,
                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                      HContainer<TR>* hR_in,
                                      const UnitCell* ucell_in,
-                                     Grid_Driver* GridD_in,
+                                     const Grid_Driver* GridD_in,
                                      const TwoCenterIntegrator* intor_orb_alpha,
                                      const LCAO_Orbitals* ptr_orb,
                                      const int& nks_in,
@@ -47,7 +47,7 @@ DeePKS<OperatorLCAO<TK, TR>>::~DeePKS()
 #ifdef __DEEPKS
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
+void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("DeePKS", "initialize_HR");
     ModuleBase::timer::tick("DeePKS", "initialize_HR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
index 32f7d1cddc..501298ed0e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
@@ -33,7 +33,7 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
                                  const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                  HContainer<TR>* hR_in,
                                  const UnitCell* ucell_in,
-                                 Grid_Driver* GridD_in,
+                                 const Grid_Driver* GridD_in,
                                  const TwoCenterIntegrator* intor_orb_alpha,
                                  const LCAO_Orbitals* ptr_orb,
                                  const int& nks_in,
@@ -59,7 +59,7 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     const UnitCell* ucell = nullptr;
 
-    Grid_Driver* gd = nullptr;
+    const Grid_Driver* gd = nullptr;
 
     HContainer<TR>* H_V_delta = nullptr;
 
@@ -74,7 +74,7 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the DeePKS real space Hamiltonian correction with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD);
+    void initialize_HR(const Grid_Driver* GridD);
 
     /**
      * @brief calculate the DeePKS correction matrix with specific <I,J,R> atom-pairs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
index 7e89c63d1b..e49b729245 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
@@ -16,7 +16,7 @@ hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::DFTU(HS_Matrix_K<TK>* hsk_in,
                                                  const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                                  hamilt::HContainer<TR>* hR_in,
                                                  const UnitCell& ucell_in,
-                                                 Grid_Driver* GridD_in,
+                                                 const Grid_Driver* GridD_in,
                                                  const TwoCenterIntegrator* intor,
                                                  const std::vector<double>& orb_cutoff,
                                                  ModuleDFTU::DFTU* dftu_in)
@@ -42,7 +42,7 @@ hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::~DFTU()
 
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
+void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("DFTU", "initialize_HR");
     ModuleBase::timer::tick("DFTU", "initialize_HR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h
index 15d517387c..811cb11202 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h
@@ -30,7 +30,7 @@ class DFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
                                const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                hamilt::HContainer<TR>* hR_in,
                                const UnitCell& ucell_in,
-                               Grid_Driver* gridD_in,
+                               const Grid_Driver* gridD_in,
                                const TwoCenterIntegrator* intor,
                                const std::vector<double>& orb_cutoff,
                                ModuleDFTU::DFTU* dftu_in);
@@ -67,7 +67,7 @@ class DFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * the size of HR will not change in DFTU,
      * because I don't want to expand HR larger than Nonlocal operator caused by DFTU
      */
-    void initialize_HR(Grid_Driver* gridD_in);
+    void initialize_HR(const Grid_Driver* gridD_in);
 
     /**
      * @brief calculate the <phi|alpha^I> overlap values and save them in this->nlm_tot
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.cpp
index c86e086c60..e2f8b65209 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.cpp
@@ -8,14 +8,13 @@
 #include "module_parameter/parameter.h"
 
 template <typename TK, typename TR>
-hamilt::DeltaSpin<hamilt::OperatorLCAO<TK, TR>>::DeltaSpin(
-    HS_Matrix_K<TK>* hsk_in,
-    const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-    hamilt::HContainer<TR>* hR_in,
-    const UnitCell& ucell_in,
-    Grid_Driver* gridD_in,
-    const TwoCenterIntegrator* intor,
-    const std::vector<double>& orb_cutoff)
+hamilt::DeltaSpin<hamilt::OperatorLCAO<TK, TR>>::DeltaSpin(HS_Matrix_K<TK>* hsk_in,
+                                                           const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                                           hamilt::HContainer<TR>* hR_in,
+                                                           const UnitCell& ucell_in,
+                                                           const Grid_Driver* gridD_in,
+                                                           const TwoCenterIntegrator* intor,
+                                                           const std::vector<double>& orb_cutoff)
     : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), intor_(intor), orb_cutoff_(orb_cutoff)
 {
     this->cal_type = calculation_type::lcao_sc_lambda;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.h
index 413624ffca..4353c5c0ba 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_lcao.h
@@ -27,12 +27,12 @@ class DeltaSpin<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     DeltaSpin<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                                      hamilt::HContainer<TR>* hR_in,
-                                      const UnitCell& ucell_in,
-                                      Grid_Driver* gridD_in,
-                                      const TwoCenterIntegrator* intor,
-                                      const std::vector<double>& orb_cutoff);
+                                    const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                    hamilt::HContainer<TR>* hR_in,
+                                    const UnitCell& ucell_in,
+                                    const Grid_Driver* gridD_in,
+                                    const TwoCenterIntegrator* intor,
+                                    const std::vector<double>& orb_cutoff);
     ~DeltaSpin<OperatorLCAO<TK, TR>>();
 
     /**
@@ -69,7 +69,7 @@ class DeltaSpin<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
   private:
     const UnitCell* ucell = nullptr;
 
-    Grid_Driver* gridD = nullptr;
+    const Grid_Driver* gridD = nullptr;
 
     const Parallel_Orbitals* paraV = nullptr;
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
index 97b454a710..c45a43fa32 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
@@ -14,7 +14,7 @@ hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::EkineticNew(
     hamilt::HContainer<TR>* hR_in,
     const UnitCell* ucell_in,
     const std::vector<double>& orb_cutoff,
-    Grid_Driver* GridD_in,
+    const Grid_Driver* GridD_in,
     const TwoCenterIntegrator* intor)
     : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), orb_cutoff_(orb_cutoff), intor_(intor)
 {
@@ -40,7 +40,7 @@ hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::~EkineticNew()
 
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
+void hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("EkineticNew", "initialize_HR");
     ModuleBase::timer::tick("EkineticNew", "initialize_HR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h
index 960985559f..103f7a3508 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h
@@ -45,7 +45,7 @@ class EkineticNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
                                       HContainer<TR>* hR_in,
                                       const UnitCell* ucell_in,
                                       const std::vector<double>& orb_cutoff,
-                                      Grid_Driver* GridD_in,
+                                      const Grid_Driver* GridD_in,
                                       const TwoCenterIntegrator* intor);
 
     /**
@@ -78,7 +78,7 @@ class EkineticNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the electronic kinetic matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD_in);
+    void initialize_HR(const Grid_Driver* GridD_in);
 
     /**
      * @brief calculate the electronic kinetic matrix with specific <I,J,R> atom-pairs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
index ea856eed4e..b8afc97987 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
@@ -16,7 +16,7 @@ hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::NonlocalNew(
     hamilt::HContainer<TR>* hR_in,
     const UnitCell* ucell_in,
     const std::vector<double>& orb_cutoff,
-    Grid_Driver* GridD_in,
+    const Grid_Driver* GridD_in,
     const TwoCenterIntegrator* intor)
     : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), orb_cutoff_(orb_cutoff), intor_(intor)
 {
@@ -45,7 +45,7 @@ hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::~NonlocalNew()
 
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
+void hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("NonlocalNew", "initialize_HR");
     ModuleBase::timer::tick("NonlocalNew", "initialize_HR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h
index 8816c36a42..414cd9b041 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h
@@ -44,7 +44,7 @@ class NonlocalNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
                                       hamilt::HContainer<TR>* hR_in,
                                       const UnitCell* ucell_in,
                                       const std::vector<double>& orb_cutoff,
-                                      Grid_Driver* GridD_in,
+                                      const Grid_Driver* GridD_in,
                                       const TwoCenterIntegrator* intor);
     ~NonlocalNew<OperatorLCAO<TK, TR>>();
 
@@ -81,7 +81,7 @@ class NonlocalNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD_in);
+    void initialize_HR(const Grid_Driver* GridD_in);
 
     /**
      * @brief calculate the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
@@ -101,7 +101,7 @@ class NonlocalNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
                     const std::unordered_map<int, std::vector<double>>& nlm2_all,
                     TR* data_pointer);
 
-    Grid_Driver* gridD = nullptr;
+    const Grid_Driver* gridD = nullptr;
     int current_type = 0;
     /**
      * @brief calculate the atomic Force of <I,J,R> atom pair
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
index 0454b7ee53..b7654aac65 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
@@ -15,7 +15,7 @@ hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::OverlapNew(HS_Matrix_K<TK>* hs
                                                              hamilt::HContainer<TR>* SR_in,
                                                              const UnitCell* ucell_in,
                                                              const std::vector<double>& orb_cutoff,
-                                                             Grid_Driver* GridD_in,
+                                                             const Grid_Driver* GridD_in,
                                                              const TwoCenterIntegrator* intor)
     : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), orb_cutoff_(orb_cutoff), intor_(intor)
 {
@@ -32,7 +32,7 @@ hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::OverlapNew(HS_Matrix_K<TK>* hs
 
 // initialize_SR()
 template <typename TK, typename TR>
-void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::initialize_SR(Grid_Driver* GridD)
+void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::initialize_SR(const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("OverlapNew", "initialize_SR");
     ModuleBase::timer::tick("OverlapNew", "initialize_SR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h
index 93699cecea..6ae100b225 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h
@@ -43,7 +43,7 @@ class OverlapNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
                                      hamilt::HContainer<TR>* SR_in,
                                      const UnitCell* ucell_in,
                                      const std::vector<double>& orb_cutoff,
-                                     Grid_Driver* GridD_in,
+                                     const Grid_Driver* GridD_in,
                                      const TwoCenterIntegrator* intor);
 
     virtual void contributeHR() override;
@@ -68,7 +68,7 @@ class OverlapNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the overlap matrix with specific <I,J,R> atom-pairs
      * the size of SR will be fixed after initialization
      */
-    void initialize_SR(Grid_Driver* GridD_in);
+    void initialize_SR(const Grid_Driver* GridD_in);
 
     /**
      * @brief calculate the overlap matrix with specific <I,J,R> atom-pairs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
index 9d7704189a..c6df9bfe1e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
@@ -20,7 +20,7 @@ TDEkinetic<OperatorLCAO<TK, TR>>::TDEkinetic(HS_Matrix_K<TK>* hsk_in,
                                              const K_Vectors* kv_in,
                                              const UnitCell* ucell_in,
                                              const std::vector<double>& orb_cutoff,
-                                             Grid_Driver* GridD_in,
+                                             const Grid_Driver* GridD_in,
                                              const TwoCenterIntegrator* intor)
     : OperatorLCAO<TK, TR>(hsk_in, kv_in->kvec_d, hR_in), orb_cutoff_(orb_cutoff), kv(kv_in), intor_(intor)
 {
@@ -244,7 +244,7 @@ void hamilt::TDEkinetic<hamilt::OperatorLCAO<TK, TR>>::set_HR_fixed(void* hR_tmp
     this->allocated = false;
 }
 template <typename TK, typename TR>
-void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
+void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driver* GridD)
 {
     if (elecstate::H_TDDFT_pw::stype != 1)
     {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
index c58082d07b..c5ec078c14 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
@@ -36,12 +36,12 @@ class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     TDEkinetic<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                 hamilt::HContainer<TR>* hR_in,
-                                 const K_Vectors* kv_in,
-                                 const UnitCell* ucell_in,
-                                 const std::vector<double>& orb_cutoff,
-                                 Grid_Driver* GridD_in,
-                                 const TwoCenterIntegrator* intor);
+                                     hamilt::HContainer<TR>* hR_in,
+                                     const K_Vectors* kv_in,
+                                     const UnitCell* ucell_in,
+                                     const std::vector<double>& orb_cutoff,
+                                     const Grid_Driver* GridD_in,
+                                     const TwoCenterIntegrator* intor);
     ~TDEkinetic();
 
     virtual void contributeHR() override;
@@ -54,7 +54,7 @@ class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD);
+    void initialize_HR(const Grid_Driver* GridD);
 
     /**
      * @brief initialize HR_tmp
@@ -89,7 +89,7 @@ class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
     HContainer<TR>* SR = nullptr;
     /// @brief Store real space hamiltonian. TD term should include imaginary part, thus it has to be complex type. Only shared between TD operators.
     HContainer<std::complex<double>>* hR_tmp = nullptr;
-    Grid_Driver* Grid = nullptr;
+    const Grid_Driver* Grid = nullptr;
 
     const K_Vectors* kv;
     /// @brief correction term iA⋅∇
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp
index 328b55dbec..1d63850ab8 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp
@@ -19,7 +19,7 @@ hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::TDNonlocal(HS_Matrix_K<TK>* hs
                                                              hamilt::HContainer<TR>* hR_in,
                                                              const UnitCell* ucell_in,
                                                              const LCAO_Orbitals& orb,
-                                                             Grid_Driver* GridD_in)
+                                                             const Grid_Driver* GridD_in)
     : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), orb_(orb)
 {
     this->cal_type = calculation_type::lcao_tddft_velocity;
@@ -50,7 +50,7 @@ void hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::init_td()
 }
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
+void hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(const Grid_Driver* GridD)
 {
     if (elecstate::H_TDDFT_pw::stype != 1)
     {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
index bef490a73a..b7e347b5fe 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
@@ -39,7 +39,7 @@ class TDNonlocal<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
                                      hamilt::HContainer<TR>* hR_in,
                                      const UnitCell* ucell_in,
                                      const LCAO_Orbitals& orb,
-                                     Grid_Driver* GridD_in);
+                                     const Grid_Driver* GridD_in);
     ~TDNonlocal<OperatorLCAO<TK, TR>>();
 
     /**
@@ -59,7 +59,7 @@ class TDNonlocal<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /// @brief Store real space hamiltonian. TD term should include imaginary part, thus it has to be complex type. Only
     /// shared between TD operators.
     HContainer<std::complex<double>>* hR_tmp = nullptr;
-    Grid_Driver* Grid = nullptr;
+    const Grid_Driver* Grid = nullptr;
 
     bool allocated = false;
 
@@ -70,7 +70,7 @@ class TDNonlocal<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD_in);
+    void initialize_HR(const Grid_Driver* GridD_in);
 
     /**
      * @brief initialize HR_tmp
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
index 9f5bb551eb..088d8c8ef2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
@@ -156,7 +156,8 @@ void Grid_Driver::Find_atom(const UnitCell& ucell,
                             const ModuleBase::Vector3<double>& tau,
                             const int& T,
                             const int& I,
-                            AdjacentAtomInfo* adjs) {
+                            AdjacentAtomInfo* adjs) const
+{
     adjs->adj_num = ucell.nat - 1;
     adjs->adjacent_tau.resize(ucell.nat);
     adjs->ntype.resize(ucell.nat, 0);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
index ccd2d1e809..aeb5d55c01 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
@@ -10,8 +10,7 @@ namespace hamilt
 
 // initialize_HR()
 template <typename TK, typename TR>
-void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in,
-                                        Grid_Driver* GridD)
+void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in, const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("Veff", "initialize_HR");
     ModuleBase::timer::tick("Veff", "initialize_HR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
index 03eb56d66d..65f6de8e27 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
@@ -36,14 +36,14 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * @param GK_in: the pointer of Gint_k object, used for grid integration
     */
     Veff<OperatorLCAO<TK, TR>>(Gint_k* GK_in,
-        HS_Matrix_K<TK>* hsk_in,
-        const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-        elecstate::Potential* pot_in,
-        hamilt::HContainer<TR>* hR_in,
-        const UnitCell* ucell_in,
-        const std::vector<double>& orb_cutoff,
-        Grid_Driver* GridD_in,
-        const int& nspin)
+                               HS_Matrix_K<TK>* hsk_in,
+                               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                               elecstate::Potential* pot_in,
+                               hamilt::HContainer<TR>* hR_in,
+                               const UnitCell* ucell_in,
+                               const std::vector<double>& orb_cutoff,
+                               const Grid_Driver* GridD_in,
+                               const int& nspin)
         : GK(GK_in), orb_cutoff_(orb_cutoff), pot(pot_in), ucell(ucell_in),
           gd(GridD_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
@@ -57,14 +57,14 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * @param GG_in: the pointer of Gint_Gamma object, used for grid integration
     */
     Veff<OperatorLCAO<TK, TR>>(Gint_Gamma* GG_in,
-        HS_Matrix_K<TK>* hsk_in,
-        const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-        elecstate::Potential* pot_in,
-        hamilt::HContainer<TR>* hR_in,
-        const UnitCell* ucell_in,
-        const std::vector<double>& orb_cutoff,
-        Grid_Driver* GridD_in,
-        const int& nspin)
+                               HS_Matrix_K<TK>* hsk_in,
+                               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                               elecstate::Potential* pot_in,
+                               hamilt::HContainer<TR>* hR_in,
+                               const UnitCell* ucell_in,
+                               const std::vector<double>& orb_cutoff,
+                               const Grid_Driver* GridD_in,
+                               const int& nspin)
         : GG(GG_in), orb_cutoff_(orb_cutoff), pot(pot_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
@@ -84,29 +84,29 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     virtual void contributeHR() override;
   
   const UnitCell* ucell;
-  Grid_Driver* gd;
-  private:
-    // used for k-dependent grid integration.
-    Gint_k* GK = nullptr;
+  const Grid_Driver* gd;
 
-    // used for gamma only algorithms.
-    Gint_Gamma* GG = nullptr;
+private:
+  // used for k-dependent grid integration.
+  Gint_k* GK = nullptr;
 
-    std::vector<double> orb_cutoff_;
+  // used for gamma only algorithms.
+  Gint_Gamma* GG = nullptr;
 
-    // Charge calculating method in LCAO base and contained grid base calculation: DM_R, DM, pvpR_reduced
+  std::vector<double> orb_cutoff_;
 
-    elecstate::Potential* pot = nullptr;
+  // Charge calculating method in LCAO base and contained grid base calculation: DM_R, DM, pvpR_reduced
 
-    int nspin = 1;
+  elecstate::Potential* pot = nullptr;
 
-    /**
-     * @brief initialize HR, search the nearest neighbor atoms
-     * HContainer is used to store the electronic kinetic matrix with specific <I,J,R> atom-pairs
-     * the size of HR will be fixed after initialization
-     */
-    void initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD_in);
+  int nspin = 1;
 
+  /**
+   * @brief initialize HR, search the nearest neighbor atoms
+   * HContainer is used to store the electronic kinetic matrix with specific <I,J,R> atom-pairs
+   * the size of HR will be fixed after initialization
+   */
+  void initialize_HR(const UnitCell* ucell_in, const Grid_Driver* GridD_in);
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.cpp
index b48de89998..150df06946 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.cpp
@@ -43,7 +43,7 @@ void Record_adj::delete_grid()
 // be called only once in an ion-step.
 //--------------------------------------------
 void Record_adj::for_2d(const UnitCell& ucell,
-                        Grid_Driver& grid_d,
+                        const Grid_Driver& grid_d,
                         Parallel_Orbitals& pv,
                         bool gamma_only,
                         const std::vector<double>& orb_cutoff)
@@ -287,7 +287,7 @@ void Record_adj::for_2d(const UnitCell& ucell,
 // grid division (cut along z direction)
 //--------------------------------------------
 void Record_adj::for_grid(const UnitCell& ucell,
-                          Grid_Driver& grid_d,
+                          const Grid_Driver& grid_d,
                           const Grid_Technique& gt,
                           const std::vector<double>& orb_cutoff)
 {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.h b/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.h
index 8a80b63cee..441f1452f6 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/record_adj.h
@@ -21,7 +21,7 @@ class Record_adj
     // HPSEPS's 2D block division.
     //--------------------------------------------
     void for_2d(const UnitCell& ucell,
-                Grid_Driver& grid_d,
+                const Grid_Driver& grid_d,
                 Parallel_Orbitals& pv,
                 bool gamma_only,
                 const std::vector<double>& orb_cutoff);
@@ -31,7 +31,7 @@ class Record_adj
     // grid division (cut along z direction)
     //--------------------------------------------
     void for_grid(const UnitCell& ucell,
-                  Grid_Driver& grid_d,
+                  const Grid_Driver& grid_d,
                   const Grid_Technique& gt,
                   const std::vector<double>& orb_cutoff);
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
index 31be5f4582..cab9977595 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
@@ -7,7 +7,7 @@
 void sparse_format::cal_dH(const UnitCell& ucell,
                            const Parallel_Orbitals& pv,
                            LCAO_HS_Arrays& HS_Arrays,
-                           Grid_Driver& grid,
+                           const Grid_Driver& grid,
                            const TwoCenterBundle& two_center_bundle,
                            const LCAO_Orbitals& orb,
                            const int& current_spin,
@@ -65,7 +65,7 @@ void sparse_format::cal_dH(const UnitCell& ucell,
     return;
 }
 
-void sparse_format::set_R_range(std::set<Abfs::Vector3_Order<int>>& all_R_coor, Grid_Driver& grid)
+void sparse_format::set_R_range(std::set<Abfs::Vector3_Order<int>>& all_R_coor, const Grid_Driver& grid)
 {
     const int RminX = int(-grid.getTrueCellX());
     const int RminY = int(-grid.getTrueCellY());
@@ -94,7 +94,7 @@ void sparse_format::cal_dSTN_R(const UnitCell& ucell,
                                const Parallel_Orbitals& pv,
                                LCAO_HS_Arrays& HS_Arrays,
                                ForceStressArrays& fsr,
-                               Grid_Driver& grid,
+                               const Grid_Driver& grid,
                                const std::vector<double>& orb_cutoff,
                                const int& current_spin,
                                const double& sparse_thr)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h
index f0657dfdd7..8c3f41cb6f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h
@@ -14,7 +14,7 @@ namespace sparse_format
 void cal_dH(const UnitCell& ucell,
             const Parallel_Orbitals& pv,
             LCAO_HS_Arrays& HS_Arrays,
-            Grid_Driver& grid,
+            const Grid_Driver& grid,
             const TwoCenterBundle& two_center_bundle,
             const LCAO_Orbitals& orb,
             const int& current_spin,
@@ -22,14 +22,14 @@ void cal_dH(const UnitCell& ucell,
             Gint_k& gint_k);
 
 // be called by 'cal_dH_sparse'
-void set_R_range(std::set<Abfs::Vector3_Order<int>>& all_R_coor, Grid_Driver& grid);
+void set_R_range(std::set<Abfs::Vector3_Order<int>>& all_R_coor, const Grid_Driver& grid);
 
 // be called by 'cal_dH_sparse'
 void cal_dSTN_R(const UnitCell& ucell,
                 const Parallel_Orbitals& pv,
                 LCAO_HS_Arrays& HS_Arrays,
                 ForceStressArrays& fsr, // mohan add 2024-06-16
-                Grid_Driver& grid,
+                const Grid_Driver& grid,
                 const std::vector<double>& orb_cutoff,
                 const int& current_spin,
                 const double& sparse_thr);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp
index 8fa45e7a6e..e2ab76647b 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp
@@ -8,16 +8,17 @@
 #include "spar_u.h"
 
 void sparse_format::cal_HSR(const UnitCell& ucell,
-    const Parallel_Orbitals& pv,
-    LCAO_HS_Arrays& HS_Arrays,
-    Grid_Driver& grid,
-    const int& current_spin,
-    const double& sparse_thr,
-    const int(&nmp)[3],
-    hamilt::Hamilt<std::complex<double>>* p_ham
+                            const Parallel_Orbitals& pv,
+                            LCAO_HS_Arrays& HS_Arrays,
+                            const Grid_Driver& grid,
+                            const int& current_spin,
+                            const double& sparse_thr,
+                            const int (&nmp)[3],
+                            hamilt::Hamilt<std::complex<double>>* p_ham
 #ifdef __EXX
-    , const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd
-    , const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc
+                            ,
+                            const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd,
+                            const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc
 #endif
 ) {
     ModuleBase::TITLE("sparse_format", "cal_HSR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h
index a1118f994e..bf80386678 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h
@@ -6,16 +6,17 @@
 namespace sparse_format {
     using TAC = std::pair<int, std::array<int, 3>>;
     void cal_HSR(const UnitCell& ucell,
-        const Parallel_Orbitals& pv,
-        LCAO_HS_Arrays& HS_Arrays,
-        Grid_Driver& grid,
-        const int& current_spin,
-        const double& sparse_thr,
-        const int(&nmp)[3],
-        hamilt::Hamilt<std::complex<double>>* p_ham
+                 const Parallel_Orbitals& pv,
+                 LCAO_HS_Arrays& HS_Arrays,
+                 const Grid_Driver& grid,
+                 const int& current_spin,
+                 const double& sparse_thr,
+                 const int (&nmp)[3],
+                 hamilt::Hamilt<std::complex<double>>* p_ham
 #ifdef __EXX
-        , const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr
-        , const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr
+                 ,
+                 const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr,
+                 const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr
 #endif
     );
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
index 1435c91e3e..8628c0cc34 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
@@ -12,14 +12,12 @@
 void sparse_format::cal_SR(
     const Parallel_Orbitals& pv,
     std::set<Abfs::Vector3_Order<int>>& all_R_coor,
-    std::map<Abfs::Vector3_Order<int>,
-             std::map<size_t, std::map<size_t, double>>>& SR_sparse,
-    std::map<Abfs::Vector3_Order<int>,
-             std::map<size_t, std::map<size_t, std::complex<double>>>>&
-        SR_soc_sparse,
-    Grid_Driver& grid,
+    std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>>& SR_sparse,
+    std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>>& SR_soc_sparse,
+    const Grid_Driver& grid,
     const double& sparse_thr,
-    hamilt::Hamilt<std::complex<double>>* p_ham) {
+    hamilt::Hamilt<std::complex<double>>* p_ham)
+{
     ModuleBase::TITLE("sparse_format", "cal_SR");
 
     sparse_format::set_R_range(all_R_coor, grid);
@@ -56,10 +54,11 @@ void sparse_format::cal_SR(
 void sparse_format::cal_TR(const UnitCell& ucell,
                            const Parallel_Orbitals& pv,
                            LCAO_HS_Arrays& HS_Arrays,
-                           Grid_Driver& grid,
+                           const Grid_Driver& grid,
                            const TwoCenterBundle& two_center_bundle,
                            const LCAO_Orbitals& orb,
-                           const double& sparse_thr) {
+                           const double& sparse_thr)
+{
     ModuleBase::TITLE("sparse_format", "cal_TR");
 
     // need to rebuild T(R)
@@ -92,9 +91,10 @@ void sparse_format::cal_TR(const UnitCell& ucell,
 void sparse_format::cal_STN_R_for_T(const UnitCell& ucell,
                                     const Parallel_Orbitals& pv,
                                     LCAO_HS_Arrays& HS_arrays,
-                                    Grid_Driver& grid,
+                                    const Grid_Driver& grid,
                                     const std::vector<double>& orb_cutoff,
-                                    const double& sparse_thr) {
+                                    const double& sparse_thr)
+{
     ModuleBase::TITLE("sparse_format", "cal_STN_R_for_T");
 
     const int nspin = PARAM.inp.nspin;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h
index 33f2b25440..6d97edaa9a 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h
@@ -8,12 +8,9 @@ namespace sparse_format {
 //! calculate overlap matrix with lattice vector R
 void cal_SR(const Parallel_Orbitals& pv,
             std::set<Abfs::Vector3_Order<int>>& all_R_coor,
-            std::map<Abfs::Vector3_Order<int>,
-                     std::map<size_t, std::map<size_t, double>>>& SR_sparse,
-            std::map<Abfs::Vector3_Order<int>,
-                     std::map<size_t, std::map<size_t, std::complex<double>>>>&
-                SR_soc_sparse,
-            Grid_Driver& grid,
+            std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>>& SR_sparse,
+            std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>>& SR_soc_sparse,
+            const Grid_Driver& grid,
             const double& sparse_thr,
             hamilt::Hamilt<std::complex<double>>* p_ham);
 
@@ -21,7 +18,7 @@ void cal_SR(const Parallel_Orbitals& pv,
 void cal_TR(const UnitCell& ucell,
             const Parallel_Orbitals& pv,
             LCAO_HS_Arrays& HS_arrays,
-            Grid_Driver& grid,
+            const Grid_Driver& grid,
             const TwoCenterBundle& two_center_bundle,
             const LCAO_Orbitals& orb,
             const double& sparse_thr);
@@ -30,7 +27,7 @@ void cal_TR(const UnitCell& ucell,
 void cal_STN_R_for_T(const UnitCell& ucell,
                      const Parallel_Orbitals& pv,
                      LCAO_HS_Arrays& HS_arrays,
-                     Grid_Driver& grid,
+                     const Grid_Driver& grid,
                      const std::vector<double>& orb_cutoff,
                      const double& sparse_thr);
 
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
index 51d765f1b2..179938f4ef 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
@@ -262,13 +262,13 @@ class LCAO_Deepks
     void build_psialpha(const bool& cal_deri /**< [in] 0 for 2-center intergration, 1 for its derivation*/,
                         const UnitCell& ucell,
                         const LCAO_Orbitals& orb,
-                        Grid_Driver& GridD,
+                        const Grid_Driver& GridD,
                         const TwoCenterIntegrator& overlap_orb_alpha);
 
     void check_psialpha(const bool& cal_deri /**< [in] 0 for 2-center intergration, 1 for its derivation*/,
                         const UnitCell& ucell,
                         const LCAO_Orbitals& orb,
-                        Grid_Driver& GridD);
+                        const Grid_Driver& GridD);
 
     //-------------------
     // LCAO_deepks_pdm.cpp
@@ -302,24 +302,24 @@ class LCAO_Deepks
     void cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm,
                           const UnitCell& ucell,
                           const LCAO_Orbitals& orb,
-                          Grid_Driver& GridD);
+                          const Grid_Driver& GridD);
 
     void cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
-                            const UnitCell& ucell,
-                            const LCAO_Orbitals& orb,
-                            Grid_Driver& GridD);
+                          const UnitCell& ucell,
+                          const LCAO_Orbitals& orb,
+                          const Grid_Driver& GridD);
 
     void check_projected_dm();
 
     void cal_projected_DM_equiv(const elecstate::DensityMatrix<double, double>* dm,
                                 const UnitCell& ucell,
                                 const LCAO_Orbitals& orb,
-                                Grid_Driver& GridD);
+                                const Grid_Driver& GridD);
 
     void cal_projected_DM_k_equiv(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
                                   const UnitCell& ucell,
                                   const LCAO_Orbitals& orb,
-                                  Grid_Driver& GridD);
+                                  const Grid_Driver& GridD);
 
     // calculate the gradient of pdm with regard to atomic positions
     // d/dX D_{Inl,mm'}
@@ -328,7 +328,7 @@ class LCAO_Deepks
         const std::vector<std::vector<TK>>& dm,
         const UnitCell& ucell,
         const LCAO_Orbitals& orb,
-        Grid_Driver& GridD,
+        const Grid_Driver& GridD,
         const int nks,
         const std::vector<ModuleBase::Vector3<double>>& kvec_d,
         const bool isstress);
@@ -470,17 +470,17 @@ class LCAO_Deepks
                              const std::vector<ModuleBase::Vector3<double>>& kvec_d,
                              const UnitCell& ucell,
                              const LCAO_Orbitals& orb,
-                             Grid_Driver& GridD);
+                             const Grid_Driver& GridD);
 
     //calculates v_delta_precalc
     template <typename TK>
     void cal_v_delta_precalc(const int nlocal,
-        const int nat,
-        const int nks,
-        const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-        const UnitCell &ucell,
-        const LCAO_Orbitals &orb,
-        Grid_Driver &GridD);
+                             const int nat,
+                             const int nks,
+                             const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                             const UnitCell& ucell,
+                             const LCAO_Orbitals& orb,
+                             const Grid_Driver& GridD);
 
     template <typename TK>
     void check_v_delta_precalc(const int nat, const int nks, const int nlocal);
@@ -488,12 +488,12 @@ class LCAO_Deepks
     // prepare psialpha for outputting npy file
     template <typename TK>
     void prepare_psialpha(const int nlocal,
-        const int nat,
-        const int nks,
-        const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-        const UnitCell &ucell,
-        const LCAO_Orbitals &orb,
-        Grid_Driver &GridD);
+                          const int nat,
+                          const int nks,
+                          const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                          const UnitCell& ucell,
+                          const LCAO_Orbitals& orb,
+                          const Grid_Driver& GridD);
 
     template <typename TK>
     void check_vdp_psialpha(const int nat, const int nks, const int nlocal);
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
index 266cac73ee..edb255de93 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
@@ -13,20 +13,20 @@ LCAO_Deepks_Interface<TK, TR>::LCAO_Deepks_Interface(std::shared_ptr<LCAO_Deepks
 {
 }
 
-template<typename TK, typename TR>
-void LCAO_Deepks_Interface<TK,TR>::out_deepks_labels(const double& etot,
-                                              const int& nks,
-                                              const int& nat,
-                                              const int& nlocal,
-                                              const ModuleBase::matrix& ekb,
-                                              const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-                                              const UnitCell& ucell,
-                                              const LCAO_Orbitals& orb,
-                                              Grid_Driver& GridD,
-                                              const Parallel_Orbitals* ParaV,
-                                              const psi::Psi<TK>& psi,
-                                              const elecstate::DensityMatrix<TK, double>* dm,
-                                              hamilt::HamiltLCAO<TK,TR>* p_ham)
+template <typename TK, typename TR>
+void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,
+                                                      const int& nks,
+                                                      const int& nat,
+                                                      const int& nlocal,
+                                                      const ModuleBase::matrix& ekb,
+                                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                                      const UnitCell& ucell,
+                                                      const LCAO_Orbitals& orb,
+                                                      const Grid_Driver& GridD,
+                                                      const Parallel_Orbitals* ParaV,
+                                                      const psi::Psi<TK>& psi,
+                                                      const elecstate::DensityMatrix<TK, double>* dm,
+                                                      hamilt::HamiltLCAO<TK, TR>* p_ham)
 {
     ModuleBase::TITLE("LCAO_Deepks_Interface", "out_deepks_labels");
     ModuleBase::timer::tick("LCAO_Deepks_Interface", "out_deepks_labels");
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h
index 1e657f48fe..695f942990 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.h
@@ -39,7 +39,7 @@ class LCAO_Deepks_Interface
                            const std::vector<ModuleBase::Vector3<double>>& kvec_d,
                            const UnitCell& ucell,
                            const LCAO_Orbitals& orb,
-                           Grid_Driver& GridD,
+                           const Grid_Driver& GridD,
                            const Parallel_Orbitals* ParaV,
                            const psi::Psi<TK>& psid,
                            const elecstate::DensityMatrix<TK, double>* dm,
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
index 29c1fd813e..8c098ffa2e 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
@@ -66,10 +66,10 @@ void LCAO_Deepks::read_projected_DM(bool read_pdm_file, bool is_equiv, const Num
 
 //this subroutine performs the calculation of projected density matrices
 //pdm_m,m'=\sum_{mu,nu} rho_{mu,nu} <chi_mu|alpha_m><alpha_m'|chi_nu>
-void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm, 
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver& GridD)
+void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm,
+                                   const UnitCell& ucell,
+                                   const LCAO_Orbitals& orb,
+                                   const Grid_Driver& GridD)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_projected_DM");
 
@@ -317,9 +317,9 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
 }
 
 void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver& GridD)
+                                   const UnitCell& ucell,
+                                   const LCAO_Orbitals& orb,
+                                   const Grid_Driver& GridD)
 {
     // if pdm has been initialized, skip the calculation
     if(this->init_pdm)
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp
index 393456c4f1..dcd8440eea 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp
@@ -16,7 +16,7 @@
 void LCAO_Deepks::build_psialpha(const bool& calc_deri,
                                  const UnitCell& ucell,
                                  const LCAO_Orbitals& orb,
-                                 Grid_Driver& GridD,
+                                 const Grid_Driver& GridD,
                                  const TwoCenterIntegrator& overlap_orb_alpha)
 {
     ModuleBase::TITLE("LCAO_Deepks", "build_psialpha");
@@ -133,7 +133,7 @@ void LCAO_Deepks::build_psialpha(const bool& calc_deri,
 void LCAO_Deepks::check_psialpha(const bool& calc_deri,
                                  const UnitCell& ucell,
                                  const LCAO_Orbitals& orb,
-                                 Grid_Driver& GridD)
+                                 const Grid_Driver& GridD)
 {
     ModuleBase::TITLE("LCAO_Deepks", "check_psialpha");
     ModuleBase::timer::tick("LCAO_Deepks", "check_psialpha");
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp
index 5ec6a7acb4..5dbec1d76d 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp
@@ -155,12 +155,12 @@ void LCAO_Deepks::load_model(const std::string& deepks_model) {
 // prepare_psialpha and prepare_gevdm for deepks_v_delta = 2
 template <typename TK>
 void LCAO_Deepks::prepare_psialpha(const int nlocal,
-    const int nat,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver &GridD)
+                                   const int nat,
+                                   const int nks,
+                                   const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                   const UnitCell& ucell,
+                                   const LCAO_Orbitals& orb,
+                                   const Grid_Driver& GridD)
 {
     ModuleBase::TITLE("LCAO_Deepks", "prepare_psialpha");
     int nlmax = this->inlmax/nat;
@@ -427,18 +427,19 @@ void LCAO_Deepks::check_vdp_gevdm(const int nat)
 template void LCAO_Deepks::prepare_psialpha<double>(const int nlocal,
                                                     const int nat,
                                                     const int nks,
-                                                    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-                                                    const UnitCell &ucell,
-                                                    const LCAO_Orbitals &orb,
-                                                    Grid_Driver &GridD);
-
-template void LCAO_Deepks::prepare_psialpha<std::complex<double>>(const int nlocal,
-                                                                  const int nat,
-                                                                  const int nks,
-                                                                  const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-                                                                  const UnitCell &ucell,
-                                                                  const LCAO_Orbitals &orb,
-                                                                  Grid_Driver &GridD);
+                                                    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                                    const UnitCell& ucell,
+                                                    const LCAO_Orbitals& orb,
+                                                    const Grid_Driver& GridD);
+
+template void LCAO_Deepks::prepare_psialpha<std::complex<double>>(
+    const int nlocal,
+    const int nat,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& GridD);
 
 template void LCAO_Deepks::check_vdp_psialpha<double>(const int nat, const int nks, const int nlocal);
 template void LCAO_Deepks::check_vdp_psialpha<std::complex<double>>(const int nat, const int nks, const int nlocal);
diff --git a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
index e28af0685c..cbbbac3b42 100644
--- a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
+++ b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
@@ -16,12 +16,12 @@
 ///           sum_{mu,nu} rho_{mu,nu} <chi_mu|alpha_m><alpha_m'|chi_nu>
 template <typename TK>
 void LCAO_Deepks::cal_gdmx(const std::vector<std::vector<TK>>& dm,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver& GridD,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-    const bool isstress)
+                           const UnitCell& ucell,
+                           const LCAO_Orbitals& orb,
+                           const Grid_Driver& GridD,
+                           const int nks,
+                           const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                           const bool isstress)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_gdmx");
     ModuleBase::timer::tick("LCAO_Deepks","cal_gdmx");
@@ -336,17 +336,17 @@ void LCAO_Deepks::check_gdmx(const int nat)
 }
 
 template void LCAO_Deepks::cal_gdmx<double>(const std::vector<std::vector<double>>& dm,
-                                            const UnitCell &ucell,
-                                            const LCAO_Orbitals &orb,
-                                            Grid_Driver& GridD,
+                                            const UnitCell& ucell,
+                                            const LCAO_Orbitals& orb,
+                                            const Grid_Driver& GridD,
                                             const int nks,
                                             const std::vector<ModuleBase::Vector3<double>>& kvec_d,
                                             const bool isstress);
 
 template void LCAO_Deepks::cal_gdmx<std::complex<double>>(const std::vector<std::vector<std::complex<double>>>& dm,
-                                                          const UnitCell &ucell,
-                                                          const LCAO_Orbitals &orb,
-                                                          Grid_Driver& GridD,
+                                                          const UnitCell& ucell,
+                                                          const LCAO_Orbitals& orb,
+                                                          const Grid_Driver& GridD,
                                                           const int nks,
                                                           const std::vector<ModuleBase::Vector3<double>>& kvec_d,
                                                           const bool isstress);
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp b/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp
index 57615ce4a9..8649aeb978 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp
+++ b/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp
@@ -21,16 +21,16 @@
 //Pulay and HF terms are calculated together
 void DeePKS_domain::cal_f_delta_gamma(
     const std::vector<std::vector<double>>& dm,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver& gd,
-    const Parallel_Orbitals &pv,
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& gd,
+    const Parallel_Orbitals& pv,
     const int lmaxd,
     std::vector<std::vector<std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save,
     double** gedm,
     ModuleBase::IntArray* inl_index,
     ModuleBase::matrix& f_delta,
-    const bool isstress, 
+    const bool isstress,
     ModuleBase::matrix& svnl_dalpha)
 {
     ModuleBase::TITLE("DeePKS_domain", "cal_f_delta_gamma");
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp b/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp
index cfd5e14d86..1d26f0adff 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp
+++ b/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp
@@ -16,19 +16,19 @@
 typedef std::tuple<int, int, int, int> key_tuple; // used in nlm_save_k
 
 void DeePKS_domain::cal_f_delta_k(
-    const std::vector<std::vector<std::complex<double>>>& dm,/**<[in] density matrix*/
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver& GridD,
+    const std::vector<std::vector<std::complex<double>>>& dm, /**<[in] density matrix*/
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& GridD,
     const Parallel_Orbitals& pv,
     const int lmaxd,
     const int nks,
-    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-    std::vector<std::map<key_tuple, std::unordered_map<int, std::vector<std::vector<double>>>>> &nlm_save_k,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+    std::vector<std::map<key_tuple, std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save_k,
     double** gedm,
     ModuleBase::IntArray* inl_index,
     ModuleBase::matrix& f_delta,
-    const bool isstress, 
+    const bool isstress,
     ModuleBase::matrix& svnl_dalpha)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_f_delta_hf_k_new");
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_force.h b/source/module_hamilt_lcao/module_deepks/deepks_force.h
index bac4b5961b..5ecf7d0f0e 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_force.h
+++ b/source/module_hamilt_lcao/module_deepks/deepks_force.h
@@ -33,44 +33,40 @@ namespace DeePKS_domain
     // 3. check_f_delta, which prints F_delta into F_delta.dat for checking
 
     // for gamma only, pulay and HF terms of force are calculated together
-	void cal_f_delta_gamma(
-			const std::vector<std::vector<double>>& dm,
-			const UnitCell &ucell,
-			const LCAO_Orbitals &orb,
-			Grid_Driver& gd,
-            const Parallel_Orbitals &pv,
-			const int lmaxd,
-			std::vector<std::vector<std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save,
-			double** gedm,
-			ModuleBase::IntArray* inl_index,
-			ModuleBase::matrix& f_delta,
-			const bool isstress,
-			ModuleBase::matrix& svnl_dalpha);
+void cal_f_delta_gamma(const std::vector<std::vector<double>>& dm,
+                       const UnitCell& ucell,
+                       const LCAO_Orbitals& orb,
+                       const Grid_Driver& gd,
+                       const Parallel_Orbitals& pv,
+                       const int lmaxd,
+                       std::vector<std::vector<std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save,
+                       double** gedm,
+                       ModuleBase::IntArray* inl_index,
+                       ModuleBase::matrix& f_delta,
+                       const bool isstress,
+                       ModuleBase::matrix& svnl_dalpha);
 
-    // for multi-k, pulay and HF terms of force are calculated together
+// for multi-k, pulay and HF terms of force are calculated together
 
-    typedef std::tuple<int, int, int, int> key_tuple;
+typedef std::tuple<int, int, int, int> key_tuple;
 
-	void cal_f_delta_k(
-			const std::vector<std::vector<std::complex<double>>>& dm,/**<[in] density matrix*/
-			const UnitCell &ucell,
-			const LCAO_Orbitals &orb,
-			Grid_Driver& GridD,
-            const Parallel_Orbitals& pv,
-			const int lmaxd,
-			const int nks,
-			const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-			std::vector<std::map<key_tuple, std::unordered_map<int, std::vector<std::vector<double>>>>> &nlm_save_k,
-			double** gedm,
-			ModuleBase::IntArray* inl_index,
-			ModuleBase::matrix& f_delta,
-			const bool isstress,
-			ModuleBase::matrix& svnl_dalpha);
+void cal_f_delta_k(
+    const std::vector<std::vector<std::complex<double>>>& dm, /**<[in] density matrix*/
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& GridD,
+    const Parallel_Orbitals& pv,
+    const int lmaxd,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+    std::vector<std::map<key_tuple, std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save_k,
+    double** gedm,
+    ModuleBase::IntArray* inl_index,
+    ModuleBase::matrix& f_delta,
+    const bool isstress,
+    ModuleBase::matrix& svnl_dalpha);
 
-	void check_f_delta(
-			const int nat, 
-			ModuleBase::matrix& f_delta,
-			ModuleBase::matrix& svnl_dalpha);
+void check_f_delta(const int nat, ModuleBase::matrix& f_delta, ModuleBase::matrix& svnl_dalpha);
 }
 
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp
index 5f202d1532..da8dcdd539 100644
--- a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp
+++ b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp
@@ -16,14 +16,13 @@
 // calculates orbital_precalc[1,NAt,NDscrpt] = gvdm * orbital_pdm_shell;
 // orbital_pdm_shell[2,Inl,nm*nm] = dm_hl * overlap * overlap;
 template <typename TK, typename TH>
-void LCAO_Deepks::cal_orbital_precalc(
-    const std::vector<std::vector<TH>>& dm_hl,
-    const int nat,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-    const UnitCell& ucell,
-    const LCAO_Orbitals& orb,
-    Grid_Driver& GridD) 
+void LCAO_Deepks::cal_orbital_precalc(const std::vector<std::vector<TH>>& dm_hl,
+                                      const int nat,
+                                      const int nks,
+                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                      const UnitCell& ucell,
+                                      const LCAO_Orbitals& orb,
+                                      const Grid_Driver& GridD)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_orbital_precalc");
     ModuleBase::timer::tick("LCAO_Deepks", "calc_orbital_precalc");
@@ -404,20 +403,21 @@ void LCAO_Deepks::cal_orbital_precalc(
     return;
 }
 
-template void LCAO_Deepks::cal_orbital_precalc<double, ModuleBase::matrix>(const std::vector<std::vector<ModuleBase::matrix>>& dm_hl,
-                                                                           const int nat,
-                                                                           const int nks,
-                                                                           const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-                                                                           const UnitCell& ucell,
-                                                                           const LCAO_Orbitals& orb,
-                                                                           Grid_Driver& GridD);
-
-
-template void LCAO_Deepks::cal_orbital_precalc<std::complex<double>, ModuleBase::ComplexMatrix>(const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl,
-                                                                                                const int nat,
-                                                                                                const int nks,
-                                                                                                const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-                                                                                                const UnitCell& ucell,
-                                                                                                const LCAO_Orbitals& orb,
-                                                                                                Grid_Driver& GridD);
+template void LCAO_Deepks::cal_orbital_precalc<double, ModuleBase::matrix>(
+    const std::vector<std::vector<ModuleBase::matrix>>& dm_hl,
+    const int nat,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& GridD);
+
+template void LCAO_Deepks::cal_orbital_precalc<std::complex<double>, ModuleBase::ComplexMatrix>(
+    const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl,
+    const int nat,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& GridD);
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
index c3d1abe61f..c79536a72a 100644
--- a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
+++ b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
@@ -4,7 +4,7 @@
 #undef private
 namespace Test_Deepks
 {
-Grid_Driver GridD(PARAM.input.test_deconstructor, PARAM.input.test_grid);
+const Grid_Driver GridD(PARAM.input.test_deconstructor, PARAM.input.test_grid);
 }
 
 test_deepks::test_deepks()
diff --git a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp
index 25eebd7490..6bb4c0ac6a 100644
--- a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp
+++ b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp
@@ -20,12 +20,12 @@
 // for deepks_v_delta = 1
 template <typename TK>
 void LCAO_Deepks::cal_v_delta_precalc(const int nlocal,
-    const int nat,
-    const int nks,
-    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-    const UnitCell &ucell,
-    const LCAO_Orbitals &orb,
-    Grid_Driver &GridD)
+                                      const int nat,
+                                      const int nks,
+                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                      const UnitCell& ucell,
+                                      const LCAO_Orbitals& orb,
+                                      const Grid_Driver& GridD)
 {
     ModuleBase::TITLE("LCAO_Deepks", "calc_v_delta_precalc");
     // timeval t_start;
@@ -327,18 +327,19 @@ void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int n
 template void LCAO_Deepks::cal_v_delta_precalc<double>(const int nlocal,
                                                        const int nat,
                                                        const int nks,
-                                                       const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-                                                       const UnitCell &ucell,
-                                                       const LCAO_Orbitals &orb,
-                                                       Grid_Driver &GridD);
+                                                       const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                                       const UnitCell& ucell,
+                                                       const LCAO_Orbitals& orb,
+                                                       const Grid_Driver& GridD);
 
-template void LCAO_Deepks::cal_v_delta_precalc<std::complex<double>>(const int nlocal,
-                                                                    const int nat,
-                                                                    const int nks,
-                                                                    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
-                                                                    const UnitCell &ucell,
-                                                                    const LCAO_Orbitals &orb,
-                                                                    Grid_Driver &GridD);
+template void LCAO_Deepks::cal_v_delta_precalc<std::complex<double>>(
+    const int nlocal,
+    const int nat,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& GridD);
 
 template void LCAO_Deepks::check_v_delta_precalc<double>(const int nat, const int nks, const int nlocal);
 template void LCAO_Deepks::check_v_delta_precalc<std::complex<double>>(const int nat, const int nks, const int nlocal);
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.h b/source/module_hamilt_lcao/module_dftu/dftu.h
index 7e8909c96f..03a97a4905 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.h
+++ b/source/module_hamilt_lcao/module_dftu/dftu.h
@@ -164,24 +164,23 @@ class DFTU
     // Subroutines for folding S and dS matrix
     //=============================================================
 
-	void fold_dSR_gamma(
-			const UnitCell &ucell,
-			const Parallel_Orbitals &pv,
-			Grid_Driver* gd,
-			double* dsloc_x,
-			double* dsloc_y,
-			double* dsloc_z,
-			double* dh_r,
-			const int dim1, 
-			const int dim2, 
-			double* dSR_gamma);
+    void fold_dSR_gamma(const UnitCell& ucell,
+                        const Parallel_Orbitals& pv,
+                        const Grid_Driver* gd,
+                        double* dsloc_x,
+                        double* dsloc_y,
+                        double* dsloc_z,
+                        double* dh_r,
+                        const int dim1,
+                        const int dim2,
+                        double* dSR_gamma);
 
     // dim = 0 : S, for Hamiltonian
     // dim = 1-3 : dS, for force
     // dim = 4-6 : dS * dR, for stress
 
     void folding_matrix_k(const UnitCell& ucell,
-                          Grid_Driver& gd,
+                          const Grid_Driver& gd,
                           ForceStressArrays& fsr,
                           const Parallel_Orbitals& pv,
                           const int ik,
@@ -203,7 +202,7 @@ class DFTU
     //=============================================================
  public:
    void force_stress(const UnitCell& ucell,
-                     Grid_Driver& gd,
+                     const Grid_Driver& gd,
                      const elecstate::ElecState* pelec,
                      const Parallel_Orbitals& pv,
                      ForceStressArrays& fsr,
@@ -213,7 +212,7 @@ class DFTU
 
  private:
    void cal_force_k(const UnitCell& ucell,
-                    Grid_Driver& gd,
+                    const Grid_Driver& gd,
                     ForceStressArrays& fsr,
                     const Parallel_Orbitals& pv,
                     const int ik,
@@ -222,7 +221,7 @@ class DFTU
                     const std::vector<ModuleBase::Vector3<double>>& kvec_d);
 
    void cal_stress_k(const UnitCell& ucell,
-                     Grid_Driver& gd,
+                     const Grid_Driver& gd,
                      ForceStressArrays& fsr,
                      const Parallel_Orbitals& pv,
                      const int ik,
@@ -240,7 +239,7 @@ class DFTU
 
    void cal_stress_gamma(const UnitCell& ucell,
                          const Parallel_Orbitals& pv,
-                         Grid_Driver* gd,
+                         const Grid_Driver* gd,
                          double* dsloc_x,
                          double* dsloc_y,
                          double* dsloc_z,
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
index 12904bbd72..3ae80f8e14 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
@@ -11,17 +11,16 @@
 namespace ModuleDFTU
 {
 
-void DFTU::fold_dSR_gamma(
-    const UnitCell &ucell,
-    const Parallel_Orbitals &pv,
-    Grid_Driver* gd,
-    double* dsloc_x,
-    double* dsloc_y,
-    double* dsloc_z,
-    double* dh_r,
-    const int dim1, 
-    const int dim2, 
-    double* dSR_gamma)
+void DFTU::fold_dSR_gamma(const UnitCell& ucell,
+                          const Parallel_Orbitals& pv,
+                          const Grid_Driver* gd,
+                          double* dsloc_x,
+                          double* dsloc_y,
+                          double* dsloc_z,
+                          double* dh_r,
+                          const int dim1,
+                          const int dim2,
+                          double* dSR_gamma)
 {
     ModuleBase::TITLE("DFTU", "fold_dSR_gamma");
 
@@ -127,7 +126,7 @@ void DFTU::fold_dSR_gamma(
 }
 
 void DFTU::folding_matrix_k(const UnitCell& ucell,
-                            Grid_Driver& gd,
+                            const Grid_Driver& gd,
                             ForceStressArrays& fsr,
                             const Parallel_Orbitals& pv,
                             const int ik,
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_force.cpp b/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
index 1903a33899..8d06d9a92f 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
@@ -73,7 +73,7 @@ namespace ModuleDFTU
 {
 
 void DFTU::force_stress(const UnitCell& ucell,
-                        Grid_Driver& gd,
+                        const Grid_Driver& gd,
                         const elecstate::ElecState* pelec,
                         const Parallel_Orbitals& pv,
                         ForceStressArrays& fsr, // mohan add 2024-06-16
@@ -250,7 +250,7 @@ void DFTU::force_stress(const UnitCell& ucell,
 }
 
 void DFTU::cal_force_k(const UnitCell& ucell,
-                       Grid_Driver& gd,
+                       const Grid_Driver& gd,
                        ForceStressArrays& fsr,
                        const Parallel_Orbitals& pv,
                        const int ik,
@@ -380,7 +380,7 @@ void DFTU::cal_force_k(const UnitCell& ucell,
 }
 
 void DFTU::cal_stress_k(const UnitCell& ucell,
-                        Grid_Driver& gd,
+                        const Grid_Driver& gd,
                         ForceStressArrays& fsr,
                         const Parallel_Orbitals& pv,
                         const int ik,
@@ -592,7 +592,7 @@ void DFTU::cal_force_gamma(const UnitCell& ucell,
 
 void DFTU::cal_stress_gamma(const UnitCell& ucell,
                             const Parallel_Orbitals& pv,
-                            Grid_Driver* gd,
+                            const Grid_Driver* gd,
                             double* dsloc_x,
                             double* dsloc_y,
                             double* dsloc_z,
diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index e670480e30..6ee4c238d1 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -138,7 +138,8 @@ void Gint::prep_grid(const Grid_Technique& gt,
     return;
 }
 
-void Gint::initialize_pvpR(const UnitCell& ucell_in, Grid_Driver* gd, const int& nspin) {
+void Gint::initialize_pvpR(const UnitCell& ucell_in, const Grid_Driver* gd, const int& nspin)
+{
     ModuleBase::TITLE("Gint", "initialize_pvpR");
 
     int npol = 1;
diff --git a/source/module_hamilt_lcao/module_gint/gint.h b/source/module_hamilt_lcao/module_gint/gint.h
index b0601af2d5..22d75e650b 100644
--- a/source/module_hamilt_lcao/module_gint/gint.h
+++ b/source/module_hamilt_lcao/module_gint/gint.h
@@ -55,7 +55,7 @@ class Gint {
      * @brief calculate the neighbor atoms of each atom in this processor
      * size of BaseMatrix with be the non-parallel version
      */
-    void initialize_pvpR(const UnitCell& unitcell, Grid_Driver* gd, const int& nspin);
+    void initialize_pvpR(const UnitCell& unitcell, const Grid_Driver* gd, const int& nspin);
 
     /**
      * @brief resize DMRGint to nspin and reallocate the memory
diff --git a/source/module_hamilt_lcao/module_gint/gint_k.h b/source/module_hamilt_lcao/module_gint/gint_k.h
index bc2a40cf42..74ac0a744d 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k.h
+++ b/source/module_hamilt_lcao/module_gint/gint_k.h
@@ -32,11 +32,8 @@ class Gint_k : public Gint {
      * @brief transfer pvpR to this->hRGint
      * then pass this->hRGint to Veff<OperatorLCAO>::hR
      */
-    void transfer_pvpR(hamilt::HContainer<double>* hR,
-                       const UnitCell* ucell_in, Grid_Driver* gd);
-    void transfer_pvpR(hamilt::HContainer<std::complex<double>>* hR,
-                       const UnitCell* ucell_in,
-                       Grid_Driver* gd);
+    void transfer_pvpR(hamilt::HContainer<double>* hR, const UnitCell* ucell_in, const Grid_Driver* gd);
+    void transfer_pvpR(hamilt::HContainer<std::complex<double>>* hR, const UnitCell* ucell_in, const Grid_Driver* gd);
 
     //------------------------------------------------------
     // in gint_k_env.cpp
@@ -78,7 +75,7 @@ class Gint_k : public Gint {
                                     LCAO_HS_Arrays& HS_Arrays,
                                     const Parallel_Orbitals* pv,
                                     const UnitCell& ucell,
-                                    Grid_Driver& gdriver);
+                                    const Grid_Driver& gdriver);
 
   private:
     //----------------------------
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
index ac17288351..9a6cca47f7 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
@@ -18,7 +18,7 @@
 #endif
 
 // transfer_pvpR, NSPIN = 1 or 2
-void Gint_k::transfer_pvpR(hamilt::HContainer<double>* hR, const UnitCell* ucell, Grid_Driver* gd)
+void Gint_k::transfer_pvpR(hamilt::HContainer<double>* hR, const UnitCell* ucell, const Grid_Driver* gd)
 {
     ModuleBase::TITLE("Gint_k", "transfer_pvpR");
     ModuleBase::timer::tick("Gint_k", "transfer_pvpR");
@@ -71,7 +71,9 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<double>* hR, const UnitCell* ucell
 }
 
 // transfer_pvpR, NSPIN = 4
-void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>>* hR, const UnitCell* ucell_in, Grid_Driver* gd)
+void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>>* hR,
+                           const UnitCell* ucell_in,
+                           const Grid_Driver* gd)
 {
     ModuleBase::TITLE("Gint_k", "transfer_pvpR");
     ModuleBase::timer::tick("Gint_k", "transfer_pvpR");
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
index 753c1eff17..873087bf05 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
@@ -323,7 +323,7 @@ void Gint_k::cal_dvlocal_R_sparseMatrix(const int& current_spin,
                                         LCAO_HS_Arrays& HS_Arrays,
                                         const Parallel_Orbitals* pv,
                                         const UnitCell& ucell,
-                                        Grid_Driver& gdriver)
+                                        const Grid_Driver& gdriver)
 {
     ModuleBase::TITLE("Gint_k", "cal_dvlocal_R_sparseMatrix");
 
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index a7363655c3..009b13e7ad 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -31,30 +31,30 @@ Grid_Technique::~Grid_Technique() {
 // after the orbital information has been read,
 // this function control the routinue to generate
 // grid technique parameters.
-void Grid_Technique::set_pbc_grid(
-    const int& ncx_in,
-    const int& ncy_in,
-    const int& ncz_in,
-    const int& bx_in,
-    const int& by_in,
-    const int& bz_in,
-    const int& nbx_in,
-    const int& nby_in,
-    const int& nbz_in,
-    const int& nbxx_in,
-    const int& nbzp_start_in,
-    const int& nbzp_in,
-    const int& ny,
-    const int& nplane,
-    const int& startz_current,
-    const UnitCell& ucell,
-    Grid_Driver& gd,
-    const double& dr_uniform,
-    const std::vector<double>& rcuts,
-    const std::vector<std::vector<double>>& psi_u,
-    const std::vector<std::vector<double>>& dpsi_u,
-    const std::vector<std::vector<double>>& d2psi_u,
-    const int& num_stream) {
+void Grid_Technique::set_pbc_grid(const int& ncx_in,
+                                  const int& ncy_in,
+                                  const int& ncz_in,
+                                  const int& bx_in,
+                                  const int& by_in,
+                                  const int& bz_in,
+                                  const int& nbx_in,
+                                  const int& nby_in,
+                                  const int& nbz_in,
+                                  const int& nbxx_in,
+                                  const int& nbzp_start_in,
+                                  const int& nbzp_in,
+                                  const int& ny,
+                                  const int& nplane,
+                                  const int& startz_current,
+                                  const UnitCell& ucell,
+                                  const Grid_Driver& gd,
+                                  const double& dr_uniform,
+                                  const std::vector<double>& rcuts,
+                                  const std::vector<std::vector<double>>& psi_u,
+                                  const std::vector<std::vector<double>>& dpsi_u,
+                                  const std::vector<std::vector<double>>& d2psi_u,
+                                  const int& num_stream)
+{
     ModuleBase::TITLE("Grid_Technique", "init");
     ModuleBase::timer::tick("Grid_Technique", "init");
 
@@ -538,8 +538,8 @@ void Grid_Technique::cal_trace_lo(const UnitCell& ucell) {
     return;
 }
 
-void Grid_Technique::init_ijr_and_nnrg(const UnitCell& ucell,
-                                   Grid_Driver& gd) {
+void Grid_Technique::init_ijr_and_nnrg(const UnitCell& ucell, const Grid_Driver& gd)
+{
     ModuleBase::TITLE("Grid_Technique", "init_ijr_and_nnrg");
 
     hamilt::HContainer<double> hRGint_tmp(ucell.nat);
@@ -613,7 +613,6 @@ void Grid_Technique::init_ijr_and_nnrg(const UnitCell& ucell,
     return;
 }
 
-
 #if ((defined __CUDA) /* || (defined __ROCM) */)
 
 void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index 7a3eac4d19..55ff74e151 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -104,7 +104,7 @@ class Grid_Technique : public Grid_MeshBall {
                       const int& nplane,
                       const int& startz_current,
                       const UnitCell& ucell,
-                      Grid_Driver& gd,
+                      const Grid_Driver& gd,
                       const double& dr_uniform,
                       const std::vector<double>& rcuts,
                       const std::vector<std::vector<double>>& psi_u,
@@ -137,7 +137,7 @@ class Grid_Technique : public Grid_MeshBall {
                             const UnitCell& ucell);
     void init_atoms_on_grid2(const int* index2normal, const UnitCell& ucell);
     // initialize the ijr_info and nnrg
-    void init_ijr_and_nnrg(const UnitCell& ucell, Grid_Driver& gd);
+    void init_ijr_and_nnrg(const UnitCell& ucell, const Grid_Driver& gd);
     void cal_grid_integration_index();
     void cal_trace_lo(const UnitCell& ucell);
     void check_bigcell(int* ind_bigcell, char* bigcell_on_processor);
diff --git a/source/module_hamilt_lcao/module_tddft/td_current.cpp b/source/module_hamilt_lcao/module_tddft/td_current.cpp
index 52c3cb8ea9..2ec3b472cc 100644
--- a/source/module_hamilt_lcao/module_tddft/td_current.cpp
+++ b/source/module_hamilt_lcao/module_tddft/td_current.cpp
@@ -9,11 +9,11 @@
 #endif
 
 TD_current::TD_current(const UnitCell* ucell_in,
-                      Grid_Driver* GridD_in,
-                      const Parallel_Orbitals* paraV,
-                      const LCAO_Orbitals& orb,
-                      const TwoCenterIntegrator* intor)
-    : ucell(ucell_in), paraV(paraV) , orb_(orb), Grid(GridD_in), intor_(intor)
+                       const Grid_Driver* GridD_in,
+                       const Parallel_Orbitals* paraV,
+                       const LCAO_Orbitals& orb,
+                       const TwoCenterIntegrator* intor)
+    : ucell(ucell_in), paraV(paraV), orb_(orb), Grid(GridD_in), intor_(intor)
 {   
     // for length gague, the A(t) = 0 for all the time.
     this->cart_At = ModuleBase::Vector3<double>(0,0,0);
@@ -28,7 +28,7 @@ TD_current::~TD_current()
     }
 }
 //allocate space for current_term
-void TD_current::initialize_vcomm_r(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orbitals* paraV)
 {
     ModuleBase::TITLE("TD_current", "initialize_vcomm_r");
     ModuleBase::timer::tick("TD_current", "initialize_vcomm_r");
@@ -103,7 +103,7 @@ void TD_current::initialize_vcomm_r(Grid_Driver* GridD, const Parallel_Orbitals*
     }
     ModuleBase::timer::tick("TD_current", "initialize_vcomm_r");
 }
-void TD_current::initialize_grad_term(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_Orbitals* paraV)
 {
     ModuleBase::TITLE("TD_current", "initialize_grad_term");
     ModuleBase::timer::tick("TD_current", "initialize_grad_term");
diff --git a/source/module_hamilt_lcao/module_tddft/td_current.h b/source/module_hamilt_lcao/module_tddft/td_current.h
index 9c3b2d362e..c2478648f8 100644
--- a/source/module_hamilt_lcao/module_tddft/td_current.h
+++ b/source/module_hamilt_lcao/module_tddft/td_current.h
@@ -15,7 +15,7 @@ class TD_current
 {
   public:
     TD_current(const UnitCell* ucell_in,
-               Grid_Driver* GridD_in,
+               const Grid_Driver* GridD_in,
                const Parallel_Orbitals* paraV,
                const LCAO_Orbitals& orb,
                const TwoCenterIntegrator* intor);
@@ -36,7 +36,7 @@ class TD_current
 
     const LCAO_Orbitals& orb_;
 
-    Grid_Driver* Grid = nullptr;
+    const Grid_Driver* Grid = nullptr;
     /// @brief Store real space hamiltonian. TD term should include imaginary part, thus it has to be complex type. Only shared between TD operators.
     std::vector<hamilt::HContainer<std::complex<double>>*> current_term = {nullptr, nullptr, nullptr};
     
@@ -47,8 +47,8 @@ class TD_current
      * HContainer is used to store the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_vcomm_r(Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
-    void initialize_grad_term(Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
+    void initialize_vcomm_r(const Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
+    void initialize_grad_term(const Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
 
     /**
      * @brief calculate the HR local matrix of <I,J,R> atom pair
diff --git a/source/module_io/berryphase.cpp b/source/module_io/berryphase.cpp
index d2ce73230d..c27c8b0f62 100644
--- a/source/module_io/berryphase.cpp
+++ b/source/module_io/berryphase.cpp
@@ -41,7 +41,7 @@ void berryphase::get_occupation_bands()
 
 #ifdef __LCAO
 void berryphase::lcao_init(const UnitCell& ucell,
-                           Grid_Driver& gd,
+                           const Grid_Driver& gd,
                            const K_Vectors& kv,
                            const Grid_Technique& grid_tech,
                            const LCAO_Orbitals& orb)
diff --git a/source/module_io/berryphase.h b/source/module_io/berryphase.h
index c0fbe215be..38029880f7 100644
--- a/source/module_io/berryphase.h
+++ b/source/module_io/berryphase.h
@@ -38,7 +38,7 @@ class berryphase
     void get_occupation_bands();
 #ifdef __LCAO
     void lcao_init(const UnitCell& ucell,
-                   Grid_Driver& gd,
+                   const Grid_Driver& gd,
                    const K_Vectors& kv,
                    const Grid_Technique& grid_tech,
                    const LCAO_Orbitals& orb);
diff --git a/source/module_io/cal_r_overlap_R.cpp b/source/module_io/cal_r_overlap_R.cpp
index 2758ad131d..6cbf7e7306 100644
--- a/source/module_io/cal_r_overlap_R.cpp
+++ b/source/module_io/cal_r_overlap_R.cpp
@@ -239,7 +239,7 @@ void cal_r_overlap_R::init(const UnitCell& ucell,const Parallel_Orbitals& pv, co
     return;
 }
 
-void cal_r_overlap_R::out_rR(const UnitCell& ucell, Grid_Driver& gd, const int& istep)
+void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep)
 {
     ModuleBase::TITLE("cal_r_overlap_R", "out_rR");
     ModuleBase::timer::tick("cal_r_overlap_R", "out_rR");
diff --git a/source/module_io/cal_r_overlap_R.h b/source/module_io/cal_r_overlap_R.h
index 69a4d2c7fb..a40ec30ed2 100644
--- a/source/module_io/cal_r_overlap_R.h
+++ b/source/module_io/cal_r_overlap_R.h
@@ -33,7 +33,7 @@ class cal_r_overlap_R
     bool binary = false;
 
     void init(const UnitCell& ucell,const Parallel_Orbitals& pv, const LCAO_Orbitals& orb);
-    void out_rR(const UnitCell& ucell, Grid_Driver& gd, const int& istep);
+    void out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep);
     void out_rR_other(const UnitCell& ucell, const int& istep, const std::set<Abfs::Vector3_Order<int>>& output_R_coor);
 
   private:
diff --git a/source/module_io/fR_overlap.cpp b/source/module_io/fR_overlap.cpp
index a5d3c56f6e..8f54ba8d97 100644
--- a/source/module_io/fR_overlap.cpp
+++ b/source/module_io/fR_overlap.cpp
@@ -13,15 +13,13 @@ FR_overlap<T>::FR_overlap()
 }
 
 template <typename T>
-void FR_overlap<T>::set_parameters(
-    fr_ptr fr_in, 
-    const UnitCell* ucell_in, 
-    const LCAO_Orbitals* ptr_orb, 
-    Grid_Driver* GridD_in, 
-    const Parallel_Orbitals* paraV,
-    int radial_grid_num,
-    int degree
-)
+void FR_overlap<T>::set_parameters(fr_ptr fr_in,
+                                   const UnitCell* ucell_in,
+                                   const LCAO_Orbitals* ptr_orb,
+                                   const Grid_Driver* GridD_in,
+                                   const Parallel_Orbitals* paraV,
+                                   int radial_grid_num,
+                                   int degree)
 {
     this->fr = fr_in;
     this->ucell = ucell_in;
@@ -70,7 +68,7 @@ FR_overlap<T>::~FR_overlap()
 }
 
 template <typename T>
-void FR_overlap<T>::initialize_FR(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void FR_overlap<T>::initialize_FR(const Grid_Driver* GridD, const Parallel_Orbitals* paraV)
 {
     ModuleBase::TITLE("FR_overlap", "initialize_FR");
     ModuleBase::timer::tick("FR_overlap", "initialize_FR");
diff --git a/source/module_io/fR_overlap.h b/source/module_io/fR_overlap.h
index 77b126c1b7..c89d2a4bb0 100644
--- a/source/module_io/fR_overlap.h
+++ b/source/module_io/fR_overlap.h
@@ -18,15 +18,13 @@ class FR_overlap
 
     FR_overlap();
 
-    void set_parameters(
-        fr_ptr fr_in, 
-        const UnitCell* ucell_in, 
-        const LCAO_Orbitals* ptr_orb, 
-        Grid_Driver* GridD_in, 
-        const Parallel_Orbitals* paraV,
-        int radial_grid_num = 140,
-        int degree = 110
-    );
+    void set_parameters(fr_ptr fr_in,
+                        const UnitCell* ucell_in,
+                        const LCAO_Orbitals* ptr_orb,
+                        const Grid_Driver* GridD_in,
+                        const Parallel_Orbitals* paraV,
+                        int radial_grid_num = 140,
+                        int degree = 110);
 
     FR_overlap(const FR_overlap<T>& FR_in);
 
@@ -42,25 +40,26 @@ class FR_overlap
     }
 
 protected:
-    void initialize_FR(Grid_Driver* GridD, const Parallel_Orbitals* paraV);
+  void initialize_FR(const Grid_Driver* GridD, const Parallel_Orbitals* paraV);
 
-    void cal_FR_IJR(const int& iat1, const int& iat2, const Parallel_Orbitals* paraV, const ModuleBase::Vector3<double>& dtau, T* data_pointer);
+  void cal_FR_IJR(const int& iat1,
+                  const int& iat2,
+                  const Parallel_Orbitals* paraV,
+                  const ModuleBase::Vector3<double>& dtau,
+                  T* data_pointer);
 
-    std::map<std::pair<int, int>, double> psi_inter(const int &T1, const std::set<std::pair<int, int>> &LN_pair1, const double &r_norm);
+  std::map<std::pair<int, int>, double> psi_inter(const int& T1,
+                                                  const std::set<std::pair<int, int>>& LN_pair1,
+                                                  const double& r_norm);
 
-    double Polynomial_Interpolation(
-        const double *psi_r,
-        const int &mesh_r,
-        const double &dr,
-        const double &x	
-    );
+  double Polynomial_Interpolation(const double* psi_r, const int& mesh_r, const double& dr, const double& x);
 
-    fr_ptr fr = nullptr;
-    const UnitCell* ucell = nullptr;
-    const LCAO_Orbitals* ptr_orb_ = nullptr;
-    int radial_grid_num = 140;
-    ModuleBase::Lebedev_laikov_grid *Leb_grid = nullptr;
-    hamilt::HContainer<T> *FR_container = nullptr;
+  fr_ptr fr = nullptr;
+  const UnitCell* ucell = nullptr;
+  const LCAO_Orbitals* ptr_orb_ = nullptr;
+  int radial_grid_num = 140;
+  ModuleBase::Lebedev_laikov_grid* Leb_grid = nullptr;
+  hamilt::HContainer<T>* FR_container = nullptr;
 };
 #endif
 #endif
diff --git a/source/module_io/get_pchg_lcao.cpp b/source/module_io/get_pchg_lcao.cpp
index f4567a67da..60898489bd 100644
--- a/source/module_io/get_pchg_lcao.cpp
+++ b/source/module_io/get_pchg_lcao.cpp
@@ -49,7 +49,7 @@ void IState_Charge::begin(Gint_Gamma& gg,
                           const std::string& global_out_dir,
                           std::ofstream& ofs_warning,
                           const UnitCell* ucell_in,
-                          Grid_Driver* GridD_in,
+                          const Grid_Driver* GridD_in,
                           const K_Vectors& kv)
 {
     ModuleBase::TITLE("IState_Charge", "begin");
@@ -172,7 +172,7 @@ void IState_Charge::begin(Gint_k& gk,
                           const std::string& global_out_dir,
                           std::ofstream& ofs_warning,
                           UnitCell* ucell_in,
-                          Grid_Driver* GridD_in,
+                          const Grid_Driver* GridD_in,
                           const K_Vectors& kv,
                           const bool if_separate_k,
                           Parallel_Grid* Pgrid,
diff --git a/source/module_io/get_pchg_lcao.h b/source/module_io/get_pchg_lcao.h
index a923397c32..031867dadd 100644
--- a/source/module_io/get_pchg_lcao.h
+++ b/source/module_io/get_pchg_lcao.h
@@ -52,7 +52,7 @@ class IState_Charge
                const std::string& global_out_dir,
                std::ofstream& ofs_warning,
                const UnitCell* ucell_in,
-               Grid_Driver* GridD_in,
+               const Grid_Driver* GridD_in,
                const K_Vectors& kv);
 
     // For multi-k
@@ -80,7 +80,7 @@ class IState_Charge
                const std::string& global_out_dir,
                std::ofstream& ofs_warning,
                UnitCell* ucell_in,
-               Grid_Driver* GridD_in,
+               const Grid_Driver* GridD_in,
                const K_Vectors& kv,
                const bool if_separate_k,
                Parallel_Grid* Pgrid,
diff --git a/source/module_io/output_mat_sparse.cpp b/source/module_io/output_mat_sparse.cpp
index f12fd69d3e..19432200e5 100644
--- a/source/module_io/output_mat_sparse.cpp
+++ b/source/module_io/output_mat_sparse.cpp
@@ -18,7 +18,7 @@ void output_mat_sparse(const bool& out_mat_hsR,
                        const TwoCenterBundle& two_center_bundle,
                        const LCAO_Orbitals& orb,
                        UnitCell& ucell,
-                       Grid_Driver& grid,
+                       const Grid_Driver& grid,
                        const K_Vectors& kv,
                        hamilt::Hamilt<double>* p_ham)
 {
@@ -36,7 +36,7 @@ void output_mat_sparse(const bool& out_mat_hsR,
                        const TwoCenterBundle& two_center_bundle,
                        const LCAO_Orbitals& orb,
                        UnitCell& ucell,
-                       Grid_Driver& grid,
+                       const Grid_Driver& grid,
                        const K_Vectors& kv,
                        hamilt::Hamilt<std::complex<double>>* p_ham)
 {
diff --git a/source/module_io/output_mat_sparse.h b/source/module_io/output_mat_sparse.h
index 2adf7448a1..6cd7b8b37c 100644
--- a/source/module_io/output_mat_sparse.h
+++ b/source/module_io/output_mat_sparse.h
@@ -22,7 +22,7 @@ void output_mat_sparse(const bool& out_mat_hsR,
                        const TwoCenterBundle& two_center_bundle,
                        const LCAO_Orbitals& orb,
                        UnitCell& ucell,
-                       Grid_Driver& grid, // mohan add 2024-04-06
+                       const Grid_Driver& grid, // mohan add 2024-04-06
                        const K_Vectors& kv,
                        hamilt::Hamilt<T>* p_ham);
 } // namespace ModuleIO
diff --git a/source/module_io/output_mulliken.h b/source/module_io/output_mulliken.h
index 1bcea3a21f..a5c0c21bbc 100644
--- a/source/module_io/output_mulliken.h
+++ b/source/module_io/output_mulliken.h
@@ -93,7 +93,7 @@ void cal_mag(Parallel_Orbitals* pv,
              const TwoCenterBundle& two_center_bundle,
              const LCAO_Orbitals& orb,
              UnitCell& ucell,
-             Grid_Driver& gd,
+             const Grid_Driver& gd,
              const int istep,
              const bool print)
 {
diff --git a/source/module_io/td_current_io.cpp b/source/module_io/td_current_io.cpp
index 9534107da2..fe4a537f08 100644
--- a/source/module_io/td_current_io.cpp
+++ b/source/module_io/td_current_io.cpp
@@ -118,7 +118,7 @@ void ModuleIO::cal_tmp_DM(elecstate::DensityMatrix<std::complex<double>, double>
 }
 
 void ModuleIO::write_current(const UnitCell& ucell,
-                             Grid_Driver& gd,
+                             const Grid_Driver& gd,
                              const int istep,
                              const psi::Psi<std::complex<double>>* psi,
                              const elecstate::ElecState* pelec,
diff --git a/source/module_io/td_current_io.h b/source/module_io/td_current_io.h
index 709a85c4d6..a3648ee884 100644
--- a/source/module_io/td_current_io.h
+++ b/source/module_io/td_current_io.h
@@ -11,7 +11,7 @@ namespace ModuleIO
 #ifdef __LCAO
 /// @brief func to output current, only used in tddft
 void write_current(const UnitCell& ucell,
-                   Grid_Driver& gd,
+                   const Grid_Driver& gd,
                    const int istep,
                    const psi::Psi<std::complex<double>>* psi,
                    const elecstate::ElecState* pelec,
diff --git a/source/module_io/to_wannier90_lcao.cpp b/source/module_io/to_wannier90_lcao.cpp
index 3ecad5ea36..adb5893135 100644
--- a/source/module_io/to_wannier90_lcao.cpp
+++ b/source/module_io/to_wannier90_lcao.cpp
@@ -39,7 +39,7 @@ toWannier90_LCAO::~toWannier90_LCAO()
 }
 
 void toWannier90_LCAO::calculate(const UnitCell& ucell,
-                                 Grid_Driver& gd,
+                                 const Grid_Driver& gd,
                                  const ModuleBase::matrix& ekb,
                                  const K_Vectors& kv,
                                  const psi::Psi<std::complex<double>>& psi,
diff --git a/source/module_io/to_wannier90_lcao.h b/source/module_io/to_wannier90_lcao.h
index 9a28e71d56..547d1ef21e 100644
--- a/source/module_io/to_wannier90_lcao.h
+++ b/source/module_io/to_wannier90_lcao.h
@@ -80,14 +80,14 @@ class toWannier90_LCAO : public toWannier90
     ~toWannier90_LCAO();
 
     void calculate(const UnitCell& ucell,
-                   Grid_Driver& gd,
+                   const Grid_Driver& gd,
                    const ModuleBase::matrix& ekb,
                    const K_Vectors& kv,
                    const psi::Psi<std::complex<double>>& psi,
                    const Parallel_Orbitals* pv);
 
     void calculate(const UnitCell& ucell,
-                   Grid_Driver& gd,
+                   const Grid_Driver& gd,
                    const ModuleBase::matrix& ekb,
                    const K_Vectors& kv,
                    const psi::Psi<double>& psi,
diff --git a/source/module_io/unk_overlap_lcao.cpp b/source/module_io/unk_overlap_lcao.cpp
index 78dbba044e..0b63f155c4 100644
--- a/source/module_io/unk_overlap_lcao.cpp
+++ b/source/module_io/unk_overlap_lcao.cpp
@@ -194,21 +194,33 @@ void unkOverlap_lcao::init(const UnitCell& ucell,
         }
     }
 
-    for (auto& co1: center2_orb11)
-        for (auto& co2: co1.second)
-            for (auto& co3: co2.second)
-                for (auto& co4: co3.second)
-                    for (auto& co5: co4.second)
-                        for (auto& co6: co5.second)
+    for (auto& co1: center2_orb11) {
+        for (auto& co2: co1.second) {
+            for (auto& co3: co2.second) {
+                for (auto& co4: co3.second) {
+                    for (auto& co5: co4.second) {
+                        for (auto& co6: co5.second) {
                             co6.second.init_radial_table();
+}
+}
+}
+}
+}
+}
 
-    for (auto& co1: center2_orb21_r)
-        for (auto& co2: co1.second)
-            for (auto& co3: co2.second)
-                for (auto& co4: co3.second)
-                    for (auto& co5: co4.second)
-                        for (auto& co6: co5.second)
+    for (auto& co1: center2_orb21_r) {
+        for (auto& co2: co1.second) {
+            for (auto& co3: co2.second) {
+                for (auto& co4: co3.second) {
+                    for (auto& co5: co4.second) {
+                        for (auto& co6: co5.second) {
                             co6.second.init_radial_table();
+}
+}
+}
+}
+}
+}
 
     rcut_orb_.resize(orb.get_ntype());
     for (int it = 0; it < orb.get_ntype(); ++it) {
@@ -360,7 +372,7 @@ int unkOverlap_lcao::iw2im(const UnitCell& ucell, int iw)
 }
 
 // search for the nearest neighbor atoms
-void unkOverlap_lcao::cal_R_number(const UnitCell& ucell, Grid_Driver& gd)
+void unkOverlap_lcao::cal_R_number(const UnitCell& ucell, const Grid_Driver& gd)
 {
     // The number of overlaps between atomic orbitals 1 and atomic orbitals 2,
     // or the number of R, is empty when there is no overlap
@@ -439,8 +451,9 @@ void unkOverlap_lcao::cal_orb_overlap(const UnitCell& ucell)
             // if ( !pv.in_this_processor(iw1,iw2) ) continue;
 
             // iw1 and iw2 never have overlap
-            if (orb1_orb2_R[iw1][iw2].empty())
+            if (orb1_orb2_R[iw1][iw2].empty()) {
                 continue;
+}
 
             int atomType1 = iw2it(ucell,iw1);
             int ia1 = iw2ia(ucell,iw1);
diff --git a/source/module_io/unk_overlap_lcao.h b/source/module_io/unk_overlap_lcao.h
index 2554da1142..c598a6b2a4 100644
--- a/source/module_io/unk_overlap_lcao.h
+++ b/source/module_io/unk_overlap_lcao.h
@@ -54,7 +54,7 @@ class unkOverlap_lcao
     int iw2iL(const UnitCell& ucell, int iw);
     int iw2iN(const UnitCell& ucell, int iw);
     int iw2im(const UnitCell& ucell, int iw);
-    void cal_R_number(const UnitCell& ucell, Grid_Driver& gd);
+    void cal_R_number(const UnitCell& ucell, const Grid_Driver& gd);
     void cal_orb_overlap(const UnitCell& ucell);
     void prepare_midmatrix_pblas(const UnitCell& ucell,
                                  const int ik_L,
diff --git a/source/module_io/write_HS_R.cpp b/source/module_io/write_HS_R.cpp
index 686787af00..0ed7cfa149 100644
--- a/source/module_io/write_HS_R.cpp
+++ b/source/module_io/write_HS_R.cpp
@@ -13,23 +13,22 @@
 // If the absolute value of the matrix element is less than or equal to the
 // 'sparse_thr', it will be ignored.
 void ModuleIO::output_HSR(const UnitCell& ucell,
-    const int& istep,
-    const ModuleBase::matrix& v_eff,
-    const Parallel_Orbitals& pv,
-    LCAO_HS_Arrays& HS_Arrays,
-    Grid_Driver& grid, // mohan add 2024-04-06
-    const K_Vectors& kv,
-    hamilt::Hamilt<std::complex<double>>* p_ham,
+                          const int& istep,
+                          const ModuleBase::matrix& v_eff,
+                          const Parallel_Orbitals& pv,
+                          LCAO_HS_Arrays& HS_Arrays,
+                          const Grid_Driver& grid, // mohan add 2024-04-06
+                          const K_Vectors& kv,
+                          hamilt::Hamilt<std::complex<double>>* p_ham,
 #ifdef __EXX
-    const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd,
-    const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc,
+                          const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd,
+                          const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc,
 #endif
-    const std::string& SR_filename,
-    const std::string& HR_filename_up,
-    const std::string HR_filename_down,
-    const bool& binary,
-    const double& sparse_thr
-) {
+                          const std::string& SR_filename,
+                          const std::string& HR_filename_up,
+                          const std::string HR_filename_down,
+                          const bool& binary,
+                          const double& sparse_thr) {
     ModuleBase::TITLE("ModuleIO", "output_HSR");
     ModuleBase::timer::tick("ModuleIO", "output_HSR");
 
@@ -86,16 +85,17 @@ void ModuleIO::output_HSR(const UnitCell& ucell,
 
 void ModuleIO::output_dHR(const int& istep,
                           const ModuleBase::matrix& v_eff,
-                          Gint_k& gint_k,    // mohan add 2024-04-01
+                          Gint_k& gint_k, // mohan add 2024-04-01
                           const UnitCell& ucell,
                           const Parallel_Orbitals& pv,
                           LCAO_HS_Arrays& HS_Arrays,
-                          Grid_Driver& grid, // mohan add 2024-04-06
+                          const Grid_Driver& grid, // mohan add 2024-04-06
                           const TwoCenterBundle& two_center_bundle,
                           const LCAO_Orbitals& orb,
                           const K_Vectors& kv,
                           const bool& binary,
-                          const double& sparse_thr) {
+                          const double& sparse_thr)
+{
     ModuleBase::TITLE("ModuleIO", "output_dHR");
     ModuleBase::timer::tick("ModuleIO", "output_dHR");
 
@@ -155,11 +155,12 @@ void ModuleIO::output_dHR(const int& istep,
 }
 
 void ModuleIO::output_SR(Parallel_Orbitals& pv,
-                         Grid_Driver& grid,
+                         const Grid_Driver& grid,
                          hamilt::Hamilt<std::complex<double>>* p_ham,
                          const std::string& SR_filename,
                          const bool& binary,
-                         const double& sparse_thr) {
+                         const double& sparse_thr)
+{
     ModuleBase::TITLE("ModuleIO", "output_SR");
     ModuleBase::timer::tick("ModuleIO", "output_SR");
 
@@ -208,12 +209,13 @@ void ModuleIO::output_TR(const int istep,
                          const UnitCell& ucell,
                          const Parallel_Orbitals& pv,
                          LCAO_HS_Arrays& HS_Arrays,
-                         Grid_Driver& grid,
+                         const Grid_Driver& grid,
                          const TwoCenterBundle& two_center_bundle,
                          const LCAO_Orbitals& orb,
                          const std::string& TR_filename,
                          const bool& binary,
-                         const double& sparse_thr) {
+                         const double& sparse_thr)
+{
     ModuleBase::TITLE("ModuleIO", "output_TR");
     ModuleBase::timer::tick("ModuleIO", "output_TR");
 
diff --git a/source/module_io/write_HS_R.h b/source/module_io/write_HS_R.h
index df4c0251a5..399064bc34 100644
--- a/source/module_io/write_HS_R.h
+++ b/source/module_io/write_HS_R.h
@@ -11,31 +11,31 @@
 namespace ModuleIO
 {
     using TAC = std::pair<int, std::array<int, 3>>;
-void output_HSR(const UnitCell& ucell,
-    const int& istep,
-    const ModuleBase::matrix& v_eff,
-    const Parallel_Orbitals& pv,
-    LCAO_HS_Arrays& HS_Arrays,
-    Grid_Driver& grid, // mohan add 2024-04-06
-    const K_Vectors& kv,
-    hamilt::Hamilt<std::complex<double>>* p_ham,
+    void output_HSR(const UnitCell& ucell,
+                    const int& istep,
+                    const ModuleBase::matrix& v_eff,
+                    const Parallel_Orbitals& pv,
+                    LCAO_HS_Arrays& HS_Arrays,
+                    const Grid_Driver& grid, // mohan add 2024-04-06
+                    const K_Vectors& kv,
+                    hamilt::Hamilt<std::complex<double>>* p_ham,
 #ifdef __EXX
-    const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr,
-    const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr,
+                    const std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr,
+                    const std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr,
 #endif
-    const std::string& SR_filename = "data-SR-sparse_SPIN0.csr",
-    const std::string& HR_filename_up = "data-HR-sparse_SPIN0.csr",
-    const std::string HR_filename_down = "data-HR-sparse_SPIN1.csr",
-    const bool& binary = false,
-    const double& sparse_threshold = 1e-10); // LiuXh add 2019-07-15, modify in 2021-12-3
+                    const std::string& SR_filename = "data-SR-sparse_SPIN0.csr",
+                    const std::string& HR_filename_up = "data-HR-sparse_SPIN0.csr",
+                    const std::string HR_filename_down = "data-HR-sparse_SPIN1.csr",
+                    const bool& binary = false,
+                    const double& sparse_threshold = 1e-10); // LiuXh add 2019-07-15, modify in 2021-12-3
 
 void output_dHR(const int& istep,
                 const ModuleBase::matrix& v_eff,
-                Gint_k& gint_k,    // mohan add 2024-04-01
+                Gint_k& gint_k, // mohan add 2024-04-01
                 const UnitCell& ucell,
                 const Parallel_Orbitals& pv,
                 LCAO_HS_Arrays& HS_Arrays,
-                Grid_Driver& grid, // mohan add 2024-04-06
+                const Grid_Driver& grid, // mohan add 2024-04-06
                 const TwoCenterBundle& two_center_bundle,
                 const LCAO_Orbitals& orb,
                 const K_Vectors& kv,
@@ -46,7 +46,7 @@ void output_TR(const int istep,
                const UnitCell& ucell,
                const Parallel_Orbitals& pv,
                LCAO_HS_Arrays& HS_Arrays,
-               Grid_Driver& grid,
+               const Grid_Driver& grid,
                const TwoCenterBundle& two_center_bundle,
                const LCAO_Orbitals& orb,
                const std::string& TR_filename = "data-TR-sparse_SPIN0.csr",
@@ -54,7 +54,7 @@ void output_TR(const int istep,
                const double& sparse_threshold = 1e-10);
 
 void output_SR(Parallel_Orbitals& pv,
-               Grid_Driver& grid,
+               const Grid_Driver& grid,
                hamilt::Hamilt<std::complex<double>>* p_ham,
                const std::string& SR_filename = "data-SR-sparse_SPIN0.csr",
                const bool& binary = false,
diff --git a/source/module_lr/hamilt_casida.h b/source/module_lr/hamilt_casida.h
index ab40c82611..47eeb05a7a 100644
--- a/source/module_lr/hamilt_casida.h
+++ b/source/module_lr/hamilt_casida.h
@@ -17,30 +17,31 @@ namespace LR
     class HamiltLR
     {
     public:
-        template<typename TGint>
-        HamiltLR(std::string& xc_kernel,
-            const int& nspin,
-            const int& naos,
-            const std::vector<int>& nocc,
-            const std::vector<int>& nvirt,
-            const UnitCell& ucell_in,
-            const std::vector<double>& orb_cutoff,
-            Grid_Driver& gd_in,
-            const psi::Psi<T>& psi_ks_in,
-            const ModuleBase::matrix& eig_ks,
+      template <typename TGint>
+      HamiltLR(std::string& xc_kernel,
+               const int& nspin,
+               const int& naos,
+               const std::vector<int>& nocc,
+               const std::vector<int>& nvirt,
+               const UnitCell& ucell_in,
+               const std::vector<double>& orb_cutoff,
+               const Grid_Driver& gd_in,
+               const psi::Psi<T>& psi_ks_in,
+               const ModuleBase::matrix& eig_ks,
 #ifdef __EXX
-            std::weak_ptr<Exx_LRI<T>> exx_lri_in,
-            const double& exx_alpha,
-#endif 
-            TGint* gint_in,
-            std::weak_ptr<PotHxcLR> pot_in,
-            const K_Vectors& kv_in,
-            const std::vector<Parallel_2D>& pX_in,
-            const Parallel_2D& pc_in,
-            const Parallel_Orbitals& pmat_in,
-            const std::string& spin_type,
-            const std::string& ri_hartree_benchmark = "none",
-            const std::vector<int>& aims_nbasis = {}) : nspin(nspin), nocc(nocc), nvirt(nvirt), pX(pX_in), nk(kv_in.get_nks() / nspin)
+               std::weak_ptr<Exx_LRI<T>> exx_lri_in,
+               const double& exx_alpha,
+#endif
+               TGint* gint_in,
+               std::weak_ptr<PotHxcLR> pot_in,
+               const K_Vectors& kv_in,
+               const std::vector<Parallel_2D>& pX_in,
+               const Parallel_2D& pc_in,
+               const Parallel_Orbitals& pmat_in,
+               const std::string& spin_type,
+               const std::string& ri_hartree_benchmark = "none",
+               const std::vector<int>& aims_nbasis = {})
+          : nspin(nspin), nocc(nocc), nvirt(nvirt), pX(pX_in), nk(kv_in.get_nks() / nspin)
         {
             ModuleBase::TITLE("HamiltLR", "HamiltLR");
             if (ri_hartree_benchmark != "aims") { assert(aims_nbasis.empty()); }
diff --git a/source/module_lr/lr_spectrum.cpp b/source/module_lr/lr_spectrum.cpp
index e38d81609a..3a919e9dc7 100644
--- a/source/module_lr/lr_spectrum.cpp
+++ b/source/module_lr/lr_spectrum.cpp
@@ -23,8 +23,8 @@ inline void check_sum_rule(const double& osc_tot)
 }
 }
 
-template<>
-void LR::LR_Spectrum<double>::oscillator_strength(Grid_Driver& gd, const std::vector<double>& orb_cutoff)
+template <>
+void LR::LR_Spectrum<double>::oscillator_strength(const Grid_Driver& gd, const std::vector<double>& orb_cutoff)
 {
     ModuleBase::TITLE("LR::LR_Spectrum", "oscillator_strength");
     std::vector<double>& osc = this->oscillator_strength_;  // unit: Ry
@@ -79,8 +79,9 @@ void LR::LR_Spectrum<double>::oscillator_strength(Grid_Driver& gd, const std::ve
     check_sum_rule(osc_tot);
 }
 
-template<>
-void LR::LR_Spectrum<std::complex<double>>::oscillator_strength(Grid_Driver& gd, const std::vector<double>& orb_cutoff)
+template <>
+void LR::LR_Spectrum<std::complex<double>>::oscillator_strength(const Grid_Driver& gd,
+                                                                const std::vector<double>& orb_cutoff)
 {
     ModuleBase::TITLE("LR::LR_Spectrum", "oscillator_strength");
     std::vector<double>& osc = this->oscillator_strength_;  // unit: Ry
diff --git a/source/module_lr/lr_spectrum.h b/source/module_lr/lr_spectrum.h
index 4b521cd564..80e1326343 100644
--- a/source/module_lr/lr_spectrum.h
+++ b/source/module_lr/lr_spectrum.h
@@ -10,20 +10,36 @@ namespace LR
     class LR_Spectrum
     {
     public:
-        LR_Spectrum(const int& nspin_global, const int& naos, const std::vector<int>& nocc, const std::vector<int>& nvirt,
-            typename TGint<T>::type* gint, const ModulePW::PW_Basis& rho_basis, psi::Psi<T>& psi_ks_in,
-            const UnitCell& ucell, const K_Vectors& kv_in, Grid_Driver& gd, const std::vector<double>& orb_cutoff,
-            const std::vector<Parallel_2D>& pX_in, const Parallel_2D& pc_in, const Parallel_Orbitals& pmat_in,
-            const double* eig, const T* X, const int& nstate, const bool& openshell) :
-            nspin_x(openshell ? 2 : 1), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin_global),
-            gint(gint), rho_basis(rho_basis), ucell(ucell), kv(kv_in),
-            pX(pX_in), pc(pc_in), pmat(pmat_in),
-            eig(eig), X(X), nstate(nstate),
-            ldim(nk* (nspin_x == 2 ? pX_in[0].get_local_size() + pX_in[1].get_local_size() : pX_in[0].get_local_size())),
-            gdim(nk* std::inner_product(nocc.begin(), nocc.end(), nvirt.begin(), 0))
-        {
-            for (int is = 0;is < nspin_global;++is) { psi_ks.emplace_back(LR_Util::get_psi_spin(psi_ks_in, is, nk)); }
-            this->oscillator_strength(gd, orb_cutoff);
+      LR_Spectrum(const int& nspin_global,
+                  const int& naos,
+                  const std::vector<int>& nocc,
+                  const std::vector<int>& nvirt,
+                  typename TGint<T>::type* gint,
+                  const ModulePW::PW_Basis& rho_basis,
+                  psi::Psi<T>& psi_ks_in,
+                  const UnitCell& ucell,
+                  const K_Vectors& kv_in,
+                  const Grid_Driver& gd,
+                  const std::vector<double>& orb_cutoff,
+                  const std::vector<Parallel_2D>& pX_in,
+                  const Parallel_2D& pc_in,
+                  const Parallel_Orbitals& pmat_in,
+                  const double* eig,
+                  const T* X,
+                  const int& nstate,
+                  const bool& openshell)
+          : nspin_x(openshell ? 2 : 1), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin_global),
+            gint(gint), rho_basis(rho_basis), ucell(ucell), kv(kv_in), pX(pX_in), pc(pc_in), pmat(pmat_in), eig(eig),
+            X(X), nstate(nstate),
+            ldim(nk
+                 * (nspin_x == 2 ? pX_in[0].get_local_size() + pX_in[1].get_local_size() : pX_in[0].get_local_size())),
+            gdim(nk * std::inner_product(nocc.begin(), nocc.end(), nvirt.begin(), 0))
+      {
+          for (int is = 0; is < nspin_global; ++is)
+          {
+              psi_ks.emplace_back(LR_Util::get_psi_spin(psi_ks_in, is, nk));
+          }
+          this->oscillator_strength(gd, orb_cutoff);
         };
         /// @brief calculate the optical absorption spectrum
         void optical_absorption(const std::vector<double>& freq, const double eta, const std::string& spintype);
@@ -31,31 +47,33 @@ namespace LR
         void transition_analysis(const std::string& spintype);
     private:
         /// $$2/3\Omega\sum_{ia\sigma} |\braket{\psi_{i}|\mathbf{r}|\psi_{a}} |^2\int \rho_{\alpha\beta}(\mathbf{r}) \mathbf{r} d\mathbf{r}$$
-        void oscillator_strength(Grid_Driver& gd, const std::vector<double>& orb_cutoff);
-        const int nspin_x = 1;   ///< 1 for singlet/triplet, 2 for updown(openshell)
-        const int naos = 1;
-        const std::vector<int>& nocc;
-        const std::vector<int>& nvirt;
-        const int nk = 1;
-        const int nstate = 1;
-        const int ldim = 1;///< local leading dimension of X, or the data size of each state
-        const int gdim = 1;///< global leading dimension of X
-        const double ana_thr = 0.3;     ///< {abs(X) > thr} will appear in the transition analysis log
-        const double* eig;
-        const T* X;
-        const K_Vectors& kv;
-        std::vector<psi::Psi<T>> psi_ks;
-        const std::vector<Parallel_2D>& pX;
-        const Parallel_2D& pc;
-        const Parallel_Orbitals& pmat;
-        typename TGint<T>::type* gint = nullptr;
-        const ModulePW::PW_Basis& rho_basis;
-        const UnitCell& ucell;
+      void oscillator_strength(const Grid_Driver& gd, const std::vector<double>& orb_cutoff);
+      const int nspin_x = 1; ///< 1 for singlet/triplet, 2 for updown(openshell)
+      const int naos = 1;
+      const std::vector<int>& nocc;
+      const std::vector<int>& nvirt;
+      const int nk = 1;
+      const int nstate = 1;
+      const int ldim = 1;         ///< local leading dimension of X, or the data size of each state
+      const int gdim = 1;         ///< global leading dimension of X
+      const double ana_thr = 0.3; ///< {abs(X) > thr} will appear in the transition analysis log
+      const double* eig;
+      const T* X;
+      const K_Vectors& kv;
+      std::vector<psi::Psi<T>> psi_ks;
+      const std::vector<Parallel_2D>& pX;
+      const Parallel_2D& pc;
+      const Parallel_Orbitals& pmat;
+      typename TGint<T>::type* gint = nullptr;
+      const ModulePW::PW_Basis& rho_basis;
+      const UnitCell& ucell;
 
-        void cal_gint_rho(double** rho, const int& nrxx);
-        std::map<std::string, int> get_pair_info(const int i);  ///< given the index in X, return its ispin, ik, iocc, ivirt
+      void cal_gint_rho(double** rho, const int& nrxx);
+      std::map<std::string, int> get_pair_info(
+          const int i); ///< given the index in X, return its ispin, ik, iocc, ivirt
 
-        std::vector<ModuleBase::Vector3<T>> transition_dipole_;   ///< $\braket{ \psi_{i} | \mathbf{r} | \psi_{a} }$
-        std::vector<double> oscillator_strength_;///< $2/3\Omega |\sum_{ia\sigma} \braket{\psi_{i}|\mathbf{r}|\psi_{a}} |^2$
+      std::vector<ModuleBase::Vector3<T>> transition_dipole_; ///< $\braket{ \psi_{i} | \mathbf{r} | \psi_{a} }$
+      std::vector<double>
+          oscillator_strength_; ///< $2/3\Omega |\sum_{ia\sigma} \braket{\psi_{i}|\mathbf{r}|\psi_{a}} |^2$
     };
 }
diff --git a/source/module_lr/operator_casida/operator_lr_hxc.h b/source/module_lr/operator_casida/operator_lr_hxc.h
index 1c97c907b8..e602d679e1 100644
--- a/source/module_lr/operator_casida/operator_lr_hxc.h
+++ b/source/module_lr/operator_casida/operator_lr_hxc.h
@@ -15,33 +15,32 @@ namespace LR
     {
     public:
         //when nspin=2, nks is 2 times of real number of k-points. else (nspin=1 or 4), nks is the real number of k-points
-        OperatorLRHxc(const int& nspin,
-            const int& naos,
-            const std::vector<int>& nocc,
-            const std::vector<int>& nvirt,
-            const psi::Psi<T, Device>& psi_ks_in,
-            std::unique_ptr<elecstate::DensityMatrix<T, T>>& DM_trans_in,
-            typename TGint<T>::type* gint_in,
-            std::weak_ptr<PotHxcLR> pot_in,
-            const UnitCell& ucell_in,
-            const std::vector<double>& orb_cutoff,
-            Grid_Driver& gd_in,
-            const K_Vectors& kv_in,
-            const std::vector<Parallel_2D>& pX_in,
-            const Parallel_2D& pc_in,
-            const Parallel_Orbitals& pmat_in,
-            const std::vector<int>& ispin_ks = { 0 })
-            : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin),
-            psi_ks(psi_ks_in), DM_trans(DM_trans_in), gint(gint_in), pot(pot_in),
-            ucell(ucell_in), orb_cutoff_(orb_cutoff), gd(gd_in), kv(kv_in),
-            pX(pX_in), pc(pc_in), pmat(pmat_in), ispin_ks(ispin_ks)
-        {
-            ModuleBase::TITLE("OperatorLRHxc", "OperatorLRHxc");
-            this->cal_type = hamilt::calculation_type::lcao_gint;
-            this->is_first_node = true;
-            this->hR = std::unique_ptr<hamilt::HContainer<T>>(new hamilt::HContainer<T>(&pmat_in));
-            LR_Util::initialize_HR<T, T>(*this->hR, ucell_in, gd_in, orb_cutoff);
-            assert(&pmat_in == this->hR->get_paraV());
+      OperatorLRHxc(const int& nspin,
+                    const int& naos,
+                    const std::vector<int>& nocc,
+                    const std::vector<int>& nvirt,
+                    const psi::Psi<T, Device>& psi_ks_in,
+                    std::unique_ptr<elecstate::DensityMatrix<T, T>>& DM_trans_in,
+                    typename TGint<T>::type* gint_in,
+                    std::weak_ptr<PotHxcLR> pot_in,
+                    const UnitCell& ucell_in,
+                    const std::vector<double>& orb_cutoff,
+                    const Grid_Driver& gd_in,
+                    const K_Vectors& kv_in,
+                    const std::vector<Parallel_2D>& pX_in,
+                    const Parallel_2D& pc_in,
+                    const Parallel_Orbitals& pmat_in,
+                    const std::vector<int>& ispin_ks = {0})
+          : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin), psi_ks(psi_ks_in),
+            DM_trans(DM_trans_in), gint(gint_in), pot(pot_in), ucell(ucell_in), orb_cutoff_(orb_cutoff), gd(gd_in),
+            kv(kv_in), pX(pX_in), pc(pc_in), pmat(pmat_in), ispin_ks(ispin_ks)
+      {
+          ModuleBase::TITLE("OperatorLRHxc", "OperatorLRHxc");
+          this->cal_type = hamilt::calculation_type::lcao_gint;
+          this->is_first_node = true;
+          this->hR = std::unique_ptr<hamilt::HContainer<T>>(new hamilt::HContainer<T>(&pmat_in));
+          LR_Util::initialize_HR<T, T>(*this->hR, ucell_in, gd_in, orb_cutoff);
+          assert(&pmat_in == this->hR->get_paraV());
         };
         ~OperatorLRHxc() { };
 
@@ -87,7 +86,7 @@ namespace LR
 
         const UnitCell& ucell;
         std::vector<double> orb_cutoff_;
-        Grid_Driver& gd;
+        const Grid_Driver& gd;
 
         /// test
         mutable bool first_print = true;
diff --git a/source/module_lr/utils/lr_util_hcontainer.h b/source/module_lr/utils/lr_util_hcontainer.h
index 4ac4ccb1f3..715e0dad8b 100644
--- a/source/module_lr/utils/lr_util_hcontainer.h
+++ b/source/module_lr/utils/lr_util_hcontainer.h
@@ -45,8 +45,11 @@ namespace LR_Util
         const int& nat,
         const char& type = 'R');
 
-    template<typename T, typename TR>
-    void initialize_HR(hamilt::HContainer<TR>& hR, const UnitCell& ucell, Grid_Driver& gd, const std::vector<double>& orb_cutoff)
+    template <typename T, typename TR>
+    void initialize_HR(hamilt::HContainer<TR>& hR,
+                       const UnitCell& ucell,
+                       const Grid_Driver& gd,
+                       const std::vector<double>& orb_cutoff)
     {
         const auto& pmat = *hR.get_paraV();
         for (int iat1 = 0; iat1 < ucell.nat; iat1++)
@@ -72,8 +75,12 @@ namespace LR_Util
         // hR.set_paraV(&pmat);
         if (std::is_same<T, double>::value) { hR.fix_gamma(); }
     }
-    template<typename T, typename TR>
-    void initialize_DMR(elecstate::DensityMatrix<T, TR>& dm, const Parallel_Orbitals& pmat, const UnitCell& ucell, Grid_Driver& gd, const std::vector<double>& orb_cutoff)
+    template <typename T, typename TR>
+    void initialize_DMR(elecstate::DensityMatrix<T, TR>& dm,
+                        const Parallel_Orbitals& pmat,
+                        const UnitCell& ucell,
+                        const Grid_Driver& gd,
+                        const std::vector<double>& orb_cutoff)
     {
         hamilt::HContainer<TR> hR_tmp(&pmat);
         initialize_HR<T, TR>(hR_tmp, ucell, gd, orb_cutoff);
diff --git a/source/module_rdmft/rdmft.cpp b/source/module_rdmft/rdmft.cpp
index 7e68635869..a37be99626 100644
--- a/source/module_rdmft/rdmft.cpp
+++ b/source/module_rdmft/rdmft.cpp
@@ -59,7 +59,7 @@ void RDMFT<TK, TR>::init(Gint_Gamma& GG_in,
                          Gint_k& GK_in,
                          Parallel_Orbitals& ParaV_in,
                          UnitCell& ucell_in,
-                         Grid_Driver& gd_in,
+                         const Grid_Driver& gd_in,
                          K_Vectors& kv_in,
                          elecstate::ElecState& pelec_in,
                          LCAO_Orbitals& orb_in,
diff --git a/source/module_rdmft/rdmft.h b/source/module_rdmft/rdmft.h
index 2d9861abf0..423fc7b94a 100644
--- a/source/module_rdmft/rdmft.h
+++ b/source/module_rdmft/rdmft.h
@@ -85,7 +85,7 @@ class RDMFT
               Gint_k& GK_in,
               Parallel_Orbitals& ParaV_in,
               UnitCell& ucell_in,
-              Grid_Driver& gd_in,
+              const Grid_Driver& gd_in,
               K_Vectors& kv_in,
               elecstate::ElecState& pelec_in,
               LCAO_Orbitals& orb_in,
@@ -198,7 +198,7 @@ class RDMFT
 
     // update after ion step
     const UnitCell* ucell = nullptr;
-    Grid_Driver* gd = nullptr;
+    const Grid_Driver* gd = nullptr;
     const ModulePW::PW_Basis* rho_basis = nullptr;
     const ModuleBase::matrix* vloc = nullptr;
     const ModuleBase::ComplexMatrix* sf = nullptr;
diff --git a/source/module_rdmft/rdmft_tools.cpp b/source/module_rdmft/rdmft_tools.cpp
index 1218b104f0..dacc2fcc18 100644
--- a/source/module_rdmft/rdmft_tools.cpp
+++ b/source/module_rdmft/rdmft_tools.cpp
@@ -192,8 +192,7 @@ template class Veff_rdmft<std::complex<double>, std::complex<double>>;
 // this part of the code is copying from class Veff
 // initialize_HR()
 template <typename TK, typename TR>
-void Veff_rdmft<TK, TR>::initialize_HR(const UnitCell* ucell_in,
-                                       Grid_Driver* GridD)
+void Veff_rdmft<TK, TR>::initialize_HR(const UnitCell* ucell_in, const Grid_Driver* GridD)
 {
     ModuleBase::TITLE("Veff", "initialize_HR");
     ModuleBase::timer::tick("Veff", "initialize_HR");
diff --git a/source/module_rdmft/rdmft_tools.h b/source/module_rdmft/rdmft_tools.h
index 207afa7700..3f9ff268a5 100644
--- a/source/module_rdmft/rdmft_tools.h
+++ b/source/module_rdmft/rdmft_tools.h
@@ -301,35 +301,24 @@ class Veff_rdmft : public hamilt::OperatorLCAO<TK, TR>
      * @param GK_in: the pointer of Gint_k object, used for grid integration
     */
     Veff_rdmft(Gint_k* GK_in,
-                      hamilt::HS_Matrix_K<TK>* hsk_in,
-                      const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                      elecstate::Potential* pot_in,
-                      hamilt::HContainer<TR>* hR_in,
-                      const UnitCell* ucell_in,
-                      const std::vector<double>& orb_cutoff,
-                      Grid_Driver* GridD_in,
-                      const int& nspin,
-                      const Charge* charge_in,
-                      const ModulePW::PW_Basis* rho_basis_in,
-                      const ModuleBase::matrix* vloc_in,
-                      const ModuleBase::ComplexMatrix* sf_in,
-                      const std::string potential_in,
-                      double* etxc_in = nullptr,
-                      double* vtxc_in = nullptr
-                    )
-        : GK(GK_in),
-          orb_cutoff_(orb_cutoff),
-          pot(pot_in),
-          ucell(ucell_in),
-          gd(GridD_in),
-          hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in),
-          charge_(charge_in),
-          rho_basis_(rho_basis_in),
-          vloc_(vloc_in),
-          sf_(sf_in),
-          potential_(potential_in),
-          etxc(etxc_in),
-          vtxc(vtxc_in)
+               hamilt::HS_Matrix_K<TK>* hsk_in,
+               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+               elecstate::Potential* pot_in,
+               hamilt::HContainer<TR>* hR_in,
+               const UnitCell* ucell_in,
+               const std::vector<double>& orb_cutoff,
+               const Grid_Driver* GridD_in,
+               const int& nspin,
+               const Charge* charge_in,
+               const ModulePW::PW_Basis* rho_basis_in,
+               const ModuleBase::matrix* vloc_in,
+               const ModuleBase::ComplexMatrix* sf_in,
+               const std::string potential_in,
+               double* etxc_in = nullptr,
+               double* vtxc_in = nullptr)
+        : GK(GK_in), orb_cutoff_(orb_cutoff), pot(pot_in), ucell(ucell_in),
+          gd(GridD_in), hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), charge_(charge_in),
+          rho_basis_(rho_basis_in), vloc_(vloc_in), sf_(sf_in), potential_(potential_in), etxc(etxc_in), vtxc(vtxc_in)
     {
         this->cal_type = hamilt::calculation_type::lcao_gint;
 
@@ -338,35 +327,24 @@ class Veff_rdmft : public hamilt::OperatorLCAO<TK, TR>
         GK_in->initialize_pvpR(*ucell_in, GridD_in, nspin);
     }
     Veff_rdmft(Gint_Gamma* GG_in,
-                      hamilt::HS_Matrix_K<TK>* hsk_in,
-                      const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                      elecstate::Potential* pot_in,
-                      hamilt::HContainer<TR>* hR_in,
-                      const UnitCell* ucell_in,
-                      const std::vector<double>& orb_cutoff,
-                      Grid_Driver* GridD_in,
-                      const int& nspin,
-                      const Charge* charge_in,
-                      const ModulePW::PW_Basis* rho_basis_in,
-                      const ModuleBase::matrix* vloc_in,
-                      const ModuleBase::ComplexMatrix* sf_in,  
-                      const std::string potential_in,
-                      double* etxc_in = nullptr,
-                      double* vtxc_in = nullptr
-                    )
-        : GG(GG_in), 
-          orb_cutoff_(orb_cutoff),
-          pot(pot_in),
-          hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in),
-          ucell(ucell_in),
-          gd(GridD_in),
-          charge_(charge_in),
-          rho_basis_(rho_basis_in),
-          vloc_(vloc_in),
-          sf_(sf_in),
-          potential_(potential_in),
-          etxc(etxc_in),
-          vtxc(vtxc_in)
+               hamilt::HS_Matrix_K<TK>* hsk_in,
+               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+               elecstate::Potential* pot_in,
+               hamilt::HContainer<TR>* hR_in,
+               const UnitCell* ucell_in,
+               const std::vector<double>& orb_cutoff,
+               const Grid_Driver* GridD_in,
+               const int& nspin,
+               const Charge* charge_in,
+               const ModulePW::PW_Basis* rho_basis_in,
+               const ModuleBase::matrix* vloc_in,
+               const ModuleBase::ComplexMatrix* sf_in,
+               const std::string potential_in,
+               double* etxc_in = nullptr,
+               double* vtxc_in = nullptr)
+        : GG(GG_in), orb_cutoff_(orb_cutoff), pot(pot_in), hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in),
+          ucell(ucell_in), gd(GridD_in), charge_(charge_in), rho_basis_(rho_basis_in), vloc_(vloc_in), sf_(sf_in),
+          potential_(potential_in), etxc(etxc_in), vtxc(vtxc_in)
     {
         this->cal_type = hamilt::calculation_type::lcao_gint;
 
@@ -387,7 +365,7 @@ class Veff_rdmft : public hamilt::OperatorLCAO<TK, TR>
 
     const UnitCell* ucell;
 
-    Grid_Driver* gd;
+    const Grid_Driver* gd;
 
   private:
     // used for k-dependent grid integration.
@@ -410,8 +388,7 @@ class Veff_rdmft : public hamilt::OperatorLCAO<TK, TR>
      * HContainer is used to store the electronic kinetic matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD_in);
-
+    void initialize_HR(const UnitCell* ucell_in, const Grid_Driver* GridD_in);
 
     // added by jghan
 
diff --git a/source/module_ri/module_exx_symmetry/symmetry_rotation.cpp b/source/module_ri/module_exx_symmetry/symmetry_rotation.cpp
index 9270ef6bf9..c73d587ff2 100644
--- a/source/module_ri/module_exx_symmetry/symmetry_rotation.cpp
+++ b/source/module_ri/module_exx_symmetry/symmetry_rotation.cpp
@@ -445,7 +445,9 @@ namespace ModuleSymmetry
         return DMk;
     }
 
-    std::vector<TC> Symmetry_rotation::get_Rs_from_adjacent_list(const UnitCell& ucell, Grid_Driver& gd, const Parallel_Orbitals& pv) const
+    std::vector<TC> Symmetry_rotation::get_Rs_from_adjacent_list(const UnitCell& ucell,
+                                                                 const Grid_Driver& gd,
+                                                                 const Parallel_Orbitals& pv) const
     {
         // find the union set of Rs for all the atom pairs
         std::set<TC> Rs_set;
diff --git a/source/module_ri/module_exx_symmetry/symmetry_rotation.h b/source/module_ri/module_exx_symmetry/symmetry_rotation.h
index 9bef3510c3..c50342f12a 100644
--- a/source/module_ri/module_exx_symmetry/symmetry_rotation.h
+++ b/source/module_ri/module_exx_symmetry/symmetry_rotation.h
@@ -114,7 +114,9 @@ namespace ModuleSymmetry
     private:
         //--------------------------------------------------------------------------------
         std::vector<TC> get_Rs_from_BvK(const K_Vectors& kv)const;
-        std::vector<TC> get_Rs_from_adjacent_list(const UnitCell& ucell, Grid_Driver& gd, const Parallel_Orbitals& pv)const;
+        std::vector<TC> get_Rs_from_adjacent_list(const UnitCell& ucell,
+                                                  const Grid_Driver& gd,
+                                                  const Parallel_Orbitals& pv) const;
         //--------------------------------------------------------------------------------
 
         /// The sub functions to rotate matrices

From ccd28748924e387a40f371f3b8737aeaa5a760c4 Mon Sep 17 00:00:00 2001
From: Critsium <tsfxwbbzxy@163.com>
Date: Fri, 13 Dec 2024 02:20:42 -0500
Subject: [PATCH 7/7] Feature: Allow directly compiling CUDA version on DCU
 harware (#5727)

* Initial commit

* Modify CMakeLists
---
 CMakeLists.txt                                               | 5 +++++
 source/module_base/module_device/device.h                    | 2 +-
 .../module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu  | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9086f0b6c4..4701f76d75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,6 +40,7 @@ option(ENABLE_CNPY "Enable cnpy usage." OFF)
 option(ENABLE_PEXSI "Enable support for PEXSI." OFF)
 option(ENABLE_CUSOLVERMP "Enable cusolvermp." OFF)
 option(USE_DSP "Enable DSP usage." OFF)
+option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
 
 # enable json support
 if(ENABLE_RAPIDJSON)
@@ -126,6 +127,10 @@ if (USE_DSP)
   set(ABACUS_BIN_NAME abacus_dsp)
 endif()
 
+if (USE_CUDA_ON_DCU)
+  add_compile_definitions(__CUDA_ON_DCU)
+endif()
+
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
 if(ENABLE_COVERAGE)
diff --git a/source/module_base/module_device/device.h b/source/module_base/module_device/device.h
index c89d3bc9cd..b416f00edb 100644
--- a/source/module_base/module_device/device.h
+++ b/source/module_base/module_device/device.h
@@ -86,7 +86,7 @@ void record_device_memory(const Device* dev, std::ofstream& ofs_device, std::str
  * @brief for compatibility with __CUDA_ARCH__ 600 and earlier
  *
  */
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 && !defined(__CUDA_ON_DCU)
 static __inline__ __device__ double atomicAdd(double* address, double val)
 {
     unsigned long long int* address_as_ull = (unsigned long long int*)address;
diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu b/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu
index b18e5c5160..dfca5c4e8e 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu
+++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu
@@ -893,7 +893,7 @@ void cal_force_npw_op<FPTYPE, base_device::DEVICE_GPU>::operator()(
     int t_num = (npw%t_size) ? (npw/t_size + 1) : (npw/t_size);
     dim3 npwgrid(((t_num%THREADS_PER_BLOCK) ? (t_num/THREADS_PER_BLOCK + 1) : (t_num/THREADS_PER_BLOCK)));
 
-    cal_force_npw << < npwgrid, THREADS_PER_BLOCK >> > (
+    cal_force_npw <<< npwgrid, THREADS_PER_BLOCK >>> (
         reinterpret_cast<const thrust::complex<FPTYPE>*>(psiv),
         gv_x, gv_y, gv_z, rhocgigg_vec, force, pos_x, pos_y, pos_z,
         npw, omega, tpiba