diff --git a/src/apps/cc2/cc2.cc b/src/apps/cc2/cc2.cc index 79f7d3864b7..8fe169735ae 100644 --- a/src/apps/cc2/cc2.cc +++ b/src/apps/cc2/cc2.cc @@ -79,8 +79,8 @@ int main(int argc, char **argv) { nemo->get_calc()->param.set_derived_value("print_level", 2); nemo->param.set_derived_value("k", 5); nemo->get_calc()->param.set_derived_value("k", 5); - nemo->param.set_derived_value("localize", "canon"); - nemo->get_calc()->param.set_derived_value("localize", "canon"); + // nemo->param.set_derived_value("localize", "canon"); + // nemo->get_calc()->param.set_derived_value("localize", "canon"); nemo->param.set_derived_values(nemo->molecule(),nemo->get_calc()->aobasis,parser); nemo->get_calc()->param.set_derived_values(nemo->molecule(),nemo->get_calc()->aobasis,parser); CC2 cc2(world, parser, nemo); diff --git a/src/madness/chem/BSHApply.h b/src/madness/chem/BSHApply.h index 137481911f3..dfb59f0ba02 100644 --- a/src/madness/chem/BSHApply.h +++ b/src/madness/chem/BSHApply.h @@ -24,6 +24,7 @@ template class BSHApply { public: + enum return_value {update, residual}; World& world; double levelshift=0.0; double lo=1.e-6; @@ -31,6 +32,7 @@ class BSHApply { bool printme=false; bool destroy_Vpsi=false; Function metric; + return_value ret_value=residual; // return the new orbitals/functions or the residuals public: BSHApply(World& world) : world(world), @@ -62,6 +64,7 @@ class BSHApply { std::vector < std::shared_ptr > > ops(psi.size()); for (int i=0; i >( BSHOperatorPtr(world, sqrt(-2.0*eps_in_green(e_i)), lo, bshtol)); ops[i]->destructive()=true; @@ -91,7 +94,11 @@ class BSHApply { double cpu1=cpu_time(); if (printme) printf("time in BSHApply() %8.4fs\n",cpu1-cpu0); - return std::make_tuple(res,delta_eps); + if (ret_value==update) return std::make_tuple(tmp,delta_eps); + else if (ret_value==residual) return std::make_tuple(res,delta_eps); + else { + MADNESS_EXCEPTION("unknown return value in BSHApply",1); + } } @@ -124,7 +131,7 @@ class BSHApply { const Tensor fock1) const { // check dimensions - bool consistent=(psi.size()==size_t(fock1.dim(0))); + bool consistent=(psi.size()==size_t(fock1.dim(0))); if ((fock1.ndim()==2) and not (psi.size()==size_t(fock1.dim(1)))) consistent=false; if (not consistent) { @@ -144,6 +151,10 @@ class BSHApply { for (int i=0; i mp2pairs, cc2pairs; // singles for ground state CC_vecfunction cc2singles(PARTICLE); + // Pairs structure to vector if necessary + const std::size_t nfreeze=parameters.freeze(); + const int nocc=CCOPS.mo_ket().size(); + triangular_map=PairVectorMap::triangular_map(nfreeze,nocc); + double mp2_energy=0.0, cc2_energy=0.0, mp3_energy=0.0; bool need_tdhf=parameters.response(); @@ -46,7 +57,7 @@ CC2::solve() { // check for restart data for CC2, otherwise use MP2 as guess if (need_cc2) { Pairs dummypairs; - bool found_cc2d = initialize_pairs(dummypairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE)); + bool found_cc2d = initialize_pairs(dummypairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE), 0, info); if (not found_cc2d) need_mp2=true; } @@ -58,11 +69,11 @@ CC2::solve() { } if (need_mp2) { - bool restarted=initialize_pairs(mp2pairs, GROUND_STATE, CT_MP2, CC_vecfunction(PARTICLE), CC_vecfunction(RESPONSE), 0); + bool restarted=initialize_pairs(mp2pairs, GROUND_STATE, CT_MP2, CC_vecfunction(PARTICLE), CC_vecfunction(RESPONSE), 0, info); if (restarted and parameters.no_compute_mp2()) { // for (auto& pair : mp2pairs.allpairs) mp2_energy+=CCOPS.compute_pair_correlation_energy(pair.second); } else { - mp2_energy = solve_mp2_coupled(mp2pairs); + mp2_energy = solve_mp2_coupled(mp2pairs, info); output_calc_info_schema("mp2",mp2_energy); } output.section(assign_name(CT_MP2) + " Calculation Ended !"); @@ -74,8 +85,8 @@ CC2::solve() { if (need_cc2) { // check if singles or/and doubles to restart are there - initialize_singles(cc2singles, PARTICLE); - const bool load_doubles = initialize_pairs(cc2pairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE), 0); + cc2singles=initialize_singles(PARTICLE); + const bool load_doubles = initialize_pairs(cc2pairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE), 0, info); // nothing to restart -> make MP2 if (not load_doubles) { @@ -87,13 +98,12 @@ CC2::solve() { } } - cc2_energy = solve_cc2(cc2singles, cc2pairs); + cc2_energy = solve_cc2(cc2singles, cc2pairs, info); output_calc_info_schema("cc2",cc2_energy); output.section(assign_name(CT_CC2) + " Calculation Ended !"); if (world.rank() == 0) { printf_msg_energy_time("CC2 correlation energy",cc2_energy,wall_time()); -// std::cout << std::fixed << std::setprecision(10) << " MP2 Correlation Energy =" << mp2_energy << "\n"; std::cout << std::fixed << std::setprecision(10) << " CC2 Correlation Energy =" << cc2_energy << "\n"; } } @@ -128,10 +138,10 @@ CC2::solve() { CCTimer time_ex(world, "CIS(D) for Excitation " + std::to_string(int(excitation))); // check the convergence of the cis function (also needed to store the ccs potential) and to recalulate the excitation energy - iterate_ccs_singles(ccs); + iterate_ccs_singles(ccs, info); Pairs cispd; - initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation); + initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation, info); const double ccs_omega = ccs.omega; const double cispd_omega = solve_cispd(cispd, mp2pairs, ccs); @@ -175,14 +185,13 @@ CC2::solve() { CCTimer time_ex(world, "ADC(2) for Excitation " + std::to_string(int(excitation))); // check the convergence of the cis function (also needed to store the ccs potential) and to recalulate the excitation energy - CC_vecfunction dummy = ccs.copy(); - iterate_ccs_singles(dummy); + CC_vecfunction dummy = copy(ccs); + iterate_ccs_singles(dummy, CCOPS.info); ccs.omega = dummy.omega; // will be overwritten soon output("Changes not stored!"); Pairs xpairs; - const bool restart = initialize_pairs(xpairs, EXCITED_STATE, CT_ADC2, CC_vecfunction(PARTICLE), ccs, - excitation); + const bool restart = initialize_pairs(xpairs, EXCITED_STATE, CT_ADC2, CC_vecfunction(PARTICLE), ccs, excitation, CCOPS.info); // if no restart: Calculate CIS(D) as first guess const double ccs_omega = ccs.omega; @@ -190,7 +199,7 @@ CC2::solve() { if (not restart) { output.section("No Restart-Pairs found: Calculating CIS(D) as first Guess"); Pairs cispd; - initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation); + initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation, CCOPS.info); cispd_omega = solve_cispd(cispd, mp2pairs, ccs); for (auto& tmp:cispd.allpairs) { const size_t i = tmp.first.first; @@ -199,10 +208,10 @@ CC2::solve() { } } - iterate_adc2_singles(mp2pairs, ccs, xpairs); + iterate_adc2_singles(mp2pairs, ccs, xpairs, CCOPS.info); for (size_t iter = 0; iter < 10; iter++) { bool dconv = iterate_adc2_pairs(xpairs, ccs); - bool sconv = iterate_adc2_singles(mp2pairs, ccs, xpairs); + bool sconv = iterate_adc2_singles(mp2pairs, ccs, xpairs, CCOPS.info); if (sconv and dconv) { output("ADC(2) Converged"); break; @@ -244,66 +253,71 @@ CC2::solve() { } else if (ctype == CT_LRCC2) { CCTimer time(world, "Whole LRCC2 Calculation"); - std::vector > results; - std::vector > > timings; - - auto vccs=solve_ccs(); - - std::vector > > results_ex; - for (size_t xxx = 0; xxx < vccs.size(); xxx++) { - const size_t excitation = parameters.excitations()[xxx]; - CCTimer time_ex(world, "LRCC2 Calculation for Excitation " + std::to_string(int(excitation))); - CC_vecfunction lrcc2_s = vccs[xxx]; - // needed to assign an omega - const vector_real_function_3d backup = copy(world, lrcc2_s.get_vecfunction()); - CC_vecfunction test(backup, RESPONSE, parameters.freeze()); - iterate_ccs_singles(test); - lrcc2_s.omega = test.omega; - output("CCS Iteration: Changes are not applied (just omega)!"); - - - Pairs lrcc2_d; - bool found_lrcc2d = initialize_pairs(lrcc2_d, EXCITED_STATE, CT_LRCC2, cc2singles, lrcc2_s, excitation); - - if (found_lrcc2d) iterate_lrcc2_singles(cc2singles, cc2pairs, lrcc2_s, lrcc2_d); - else iterate_ccs_singles(lrcc2_s); - const double omega_cis = lrcc2_s.omega; - - for (size_t iter = 0; iter < parameters.iter_max(); iter++) { - output.section("Macroiteration " + std::to_string(int(iter)) + " of LRCC2"); - bool dconv = iterate_lrcc2_pairs(cc2singles, cc2pairs, lrcc2_s, lrcc2_d); - bool sconv = iterate_lrcc2_singles(cc2singles, cc2pairs, lrcc2_s, lrcc2_d); - if (dconv and sconv) break; - } - const double omega_cc2 = lrcc2_s.omega; - const std::string msg = "Excitation " + std::to_string(int(excitation)); - results_ex.push_back(std::make_pair(msg, std::make_pair(omega_cis, omega_cc2))); - timings.push_back(std::make_pair(msg, time_ex.current_time(true))); - - } - - timings.push_back(std::make_pair("Whole LRCC2", time.current_time(true))); - output.section("LRCC2 Finished"); - output("Ground State Results:"); - for (const auto& res:results) { - if (world.rank() == 0) - std::cout << std::fixed << std::setprecision(10) - << res.first << "=" << res.second << "\n"; - } - output("Response Results:"); - for (const auto& res:results_ex) { - if (world.rank() == 0) - std::cout << std::fixed << std::setprecision(10) - << res.first << ": " << res.second.first << " (CIS)*, " << res.second.second << " (CC2)\n"; - } - if (world.rank() == 0) std::cout << "*only if CIS vectors where given in the beginning (not for CC2 restart)\n"; - output("\nTimings"); - for (const auto& time:timings) { - if (world.rank() == 0) - std::cout << std::scientific << std::setprecision(2) - << std::setfill(' ') << std::setw(15) << time.first - << ": " << time.second.first << " (Wall), " << time.second.second << " (CPU)" << "\n"; - } + auto vccs=solve_ccs(); + + if (world.rank()==0) print_header3("reiterating CCS"); + iterate_ccs_singles(vccs[0], info); + if (world.rank()==0) print_header3("end reiterating CCS"); + + for (size_t iexcitation = 0; iexcitation < vccs.size(); iexcitation++) { + if (world.rank()==0) print_header1("Solving LRCC2 for excitation " + std::to_string(iexcitation) + + " with omega "+std::to_string(vccs[iexcitation].omega)); + solve_lrcc2(cc2pairs,cc2singles,vccs[iexcitation],iexcitation,info); + } + // const size_t excitation = parameters.excitations()[xxx]; + // CCTimer time_ex(world, "LRCC2 Calculation for Excitation " + std::to_string(int(excitation))); + // CC_vecfunction lrcc2_s = vccs[xxx]; + // // needed to assign an omega + // const vector_real_function_3d backup = copy(world, lrcc2_s.get_vecfunction()); + // CC_vecfunction test(backup, RESPONSE, parameters.freeze()); + // iterate_ccs_singles(test, info); + // lrcc2_s.omega = test.omega; + // output("CCS Iteration: Changes are not applied (just omega)!"); + + + // Pairs lrcc2_d; + // bool found_lrcc2d = initialize_pairs(lrcc2_d, EXCITED_STATE, CT_LRCC2, cc2singles, lrcc2_s, excitation, info); + + // if (found_lrcc2d) iterate_lrcc2_singles(world, cc2singles, cc2pairs, lrcc2_s, lrcc2_d, info); + // else iterate_ccs_singles(lrcc2_s, info); + // const double omega_cis = lrcc2_s.omega; + + // for (size_t iter = 0; iter < parameters.iter_max(); iter++) { + // output.section("Macroiteration " + std::to_string(int(iter)) + " of LRCC2"); + // bool dconv = iterate_lrcc2_pairs(world, cc2singles, lrcc2_s, lrcc2_d, info); + // bool sconv = iterate_lrcc2_singles(world, cc2singles, cc2pairs, lrcc2_s, lrcc2_d, info); + // update_reg_residues_ex(world, cc2singles, lrcc2_s, lrcc2_d, info); + // if (dconv and sconv) break; + // } + // const double omega_cc2 = lrcc2_s.omega; + // const std::string msg = "Excitation " + std::to_string(int(excitation)); + // results_ex.push_back(std::make_pair(msg, std::make_pair(omega_cis, omega_cc2))); + // timings.push_back(std::make_pair(msg, time_ex.current_time(true))); + + //} + +// timings.push_back(std::make_pair("Whole LRCC2", time.current_time(true))); +// output.section("LRCC2 Finished"); +// output("Ground State Results:"); +// for (const auto& res:results) { +// if (world.rank() == 0) +// std::cout << std::fixed << std::setprecision(10) +// << res.first << "=" << res.second << "\n"; +// } +// output("Response Results:"); +// for (const auto& res:results_ex) { +// if (world.rank() == 0) +// std::cout << std::fixed << std::setprecision(10) +// << res.first << ": " << res.second.first << " (CIS)*, " << res.second.second << " (CC2)\n"; +// } +// if (world.rank() == 0) std::cout << "*only if CIS vectors where given in the beginning (not for CC2 restart)\n"; +// output("\nTimings"); +// for (const auto& time:timings) { +// if (world.rank() == 0) +// std::cout << std::scientific << std::setprecision(2) +// << std::setfill(' ') << std::setw(15) << time.first +// << ": " << time.second.first << " (Wall), " << time.second.second << " (CPU)" << "\n"; +// } } else MADNESS_EXCEPTION(("Unknown Calculation Type: " + assign_name(ctype)).c_str(), 1); @@ -375,7 +389,8 @@ Tensor CC2::enforce_core_valence_separation(const Tensor& fmat) }; // Solve the CCS equations for the ground state (debug potential and check HF convergence) -std::vector CC2::solve_ccs() { +std::vector CC2::solve_ccs() const +{ // output.section("SOLVE CCS"); // std::vector excitations; // for (size_t k = 0; k < parameters.excitations().size(); k++) { @@ -394,16 +409,15 @@ std::vector CC2::solve_ccs() { 1); result.push_back(excitations[x]); } + print_header3("Solution of the CCS equations"); + tdhf->analyze(result); return result; } -double CC2::solve_mp2_coupled(Pairs& doubles) { +double CC2::solve_mp2_coupled(Pairs& doubles, Info& info) { if (world.rank()==0) print_header2(" computing the MP1 wave function"); double total_energy = 0.0; - const std::size_t nfreeze=parameters.freeze(); - const int nocc=CCOPS.mo_ket().size(); - auto triangular_map=PairVectorMap::triangular_map(nfreeze,nocc); // make vector holding CCPairs for partitioner of MacroTask std::vector pair_vec=Pairs::pairs2vector(doubles,triangular_map); @@ -418,33 +432,31 @@ double CC2::solve_mp2_coupled(Pairs& doubles) { } } else { - if (world.rank()==0) print_header3("Starting MP2 constant part calculation"); - // calc constant part via taskq - auto taskq = std::shared_ptr(new MacroTaskQ(world, world.size())); - taskq->set_printlevel(3); - MacroTaskMp2ConstantPart t; - MacroTask task(world, t, taskq); - task.set_name("MP2_Constant_Part"); - std::vector result_vec = task(pair_vec, CCOPS.mo_ket().get_vecfunction(), - CCOPS.mo_bra().get_vecfunction(), parameters, - nemo->R_square, nemo->ncf->U1vec(),std::vector({"Ue","KffK"})); - taskq->print_taskq(); - taskq->run_all(); - - if (world.rank()==0) std::cout << std::fixed << std::setprecision(1) << "\nFinished constant part at time " << wall_time() << std::endl; - if (world.rank()==0) std::cout << std::fixed << std::setprecision(1) << "\nStarting saving pairs and energy calculation at time " << wall_time() << std::endl; + + if (world.rank()==0) { + std::cout << std::fixed << std::setprecision(1) << "\nStarting constant part at time " << wall_time() << std::endl; + } + MacroTaskConstantPart t; + MacroTask task(world, t); + std::vector> gs_singles, ex_singles; // dummy vectors + std::vector result_vec = task(pair_vec, gs_singles, ex_singles, info) ; + + if (world.rank()==0) { + std::cout << std::fixed << std::setprecision(1) << "\nFinished constant part at time " << wall_time() << std::endl; + std::cout << std::fixed << std::setprecision(1) << "\nStarting saving pairs and energy calculation at time " << wall_time() << std::endl; + } // transform vector back to Pairs structure for (size_t i = 0; i < pair_vec.size(); i++) { pair_vec[i].constant_part = result_vec[i]; - pair_vec[i].functions[0] = CCPairFunction(result_vec[i]); + // pair_vec[i].functions[0] = CCPairFunction(result_vec[i]); pair_vec[i].constant_part.truncate().reduce_rank(); pair_vec[i].constant_part.print_size("constant_part"); pair_vec[i].function().truncate().reduce_rank(); save(pair_vec[i].constant_part, pair_vec[i].name() + "_const"); // save(pair_vec[i].function(), pair_vec[i].name()); if (pair_vec[i].type == GROUND_STATE) { - double energy = CCOPS.compute_pair_correlation_energy(pair_vec[i]); + double energy = CCOPS.compute_pair_correlation_energy(world,info,pair_vec[i]); if (world.rank()==0) printf("pair energy for pair %zu %zu: %12.8f\n", pair_vec[i].i, pair_vec[i].j, energy); total_energy += energy; } @@ -458,9 +470,6 @@ double CC2::solve_mp2_coupled(Pairs& doubles) { if (world.rank()==0) print_header3("Starting updating MP2 pairs"); - // create new pairs structure - Pairs updated_pairs; - for (auto& tmp_pair : pair_vec) updated_pairs.insert(tmp_pair.i, tmp_pair.j, tmp_pair); auto solver= nonlinear_vector_solver(world,pair_vec.size()); solver.set_maxsub(parameters.kain_subspace()); @@ -468,34 +477,36 @@ double CC2::solve_mp2_coupled(Pairs& doubles) { for (size_t iter = 0; iter < parameters.iter_max_6D(); iter++) { + if (world.rank()==0) print_header3("Starting iteration " + std::to_string(int(iter)) + " of MP2"); // compute the coupling between the pair functions - Pairs coupling=compute_local_coupling(updated_pairs); + Pairs coupling=compute_local_coupling(pair_vec, info); auto coupling_vec=Pairs::pairs2vector(coupling,triangular_map); if (parameters.debug()) print_size(world, coupling_vec, "couplingvector"); - double old_energy = total_energy; - total_energy = 0.0; - // calc update for pairs via macrotask - auto taskq = std::shared_ptr(new MacroTaskQ(world, world.size())); - taskq->set_printlevel(3); - //taskq->cloud.set_debug(true); - MacroTaskMp2UpdatePair t; - MacroTask task1(world, t, taskq); - std::vector u_update = task1(pair_vec, coupling_vec, parameters, nemo->get_calc()->molecule.get_all_coords_vec(), - CCOPS.mo_ket().get_vecfunction(), CCOPS.mo_bra().get_vecfunction(), - nemo->ncf->U1vec(), nemo->ncf->U2()); - taskq->print_taskq(); - taskq->run_all(); + if (world.rank()==0) { + std::cout << std::fixed << std::setprecision(1) << "\nStart updating pairs part at time " << wall_time() << std::endl; + } + + MacroTaskIteratePair t; + MacroTask task1(world, t); + CC_vecfunction dummy_singles1(PARTICLE); + const std::size_t maxiter=1; + auto unew = task1(pair_vec, coupling_vec, dummy_singles1, dummy_singles1, info, maxiter); + std::vector u; + for (auto p : pair_vec) u.push_back(p.function()); + auto residual=u-unew; + // some statistics + auto [rmsrnorm, maxrnorm]=CCPotentials::residual_stats(residual); + + // update the pair functions if (parameters.kain()) { if (world.rank()==0) std::cout << "Update with KAIN" << std::endl; - - std::vector u; - for (auto p : pair_vec) u.push_back(p.function()); - std::vector kain_update = copy(world,solver.update(u, u_update)); + // std::vector kain_update = copy(world,solver.update(u, u_update)); + std::vector kain_update = copy(world,solver.update(u, residual)); for (size_t i=0; i& doubles) { } else { if (world.rank()==0) std::cout << "Update without KAIN" << std::endl; for (size_t i=0; i& doubles) { //print pair energies if converged if (converged) { if (world.rank() == 0) std::cout << "\nPairs converged!\n"; - if (world.rank() == 0) std::cout << "\nMP2 Pair Correlation Energies:\n"; - for (auto& pair : updated_pairs.allpairs) { - const double pair_energy = CCOPS.compute_pair_correlation_energy(pair.second); - if (world.rank() == 0) { - std::cout << std::fixed << std::setprecision(10) << "omega_" - << pair.second.i << pair.second.j << "=" << pair_energy << "\n"; - } - } - if (world.rank() == 0) std::cout << "sum =" << total_energy << "\n"; break; } } @@ -571,15 +559,16 @@ double CC2::solve_mp2_coupled(Pairs& doubles) { /// add the coupling terms for local MP2 /// @return \sum_{k\neq i} f_ki |u_kj> + \sum_{l\neq j} f_lj |u_il> -Pairs CC2::compute_local_coupling(const Pairs& pairs) const { +Pairs CC2::compute_local_coupling(const Pairs& pairs, const Info& info) { - const int nmo = nemo->get_calc()->amo.size(); + const int nmo = info.mo_ket.size(); + World& world=pairs.allpairs.begin()->second.world(); // temporarily make all N^2 pair functions typedef std::map, real_function_6d> pairsT; pairsT quadratic; - for (int k = parameters.freeze(); k < nmo; ++k) { - for (int l = parameters.freeze(); l < nmo; ++l) { + for (int k = info.parameters.freeze(); k < nmo; ++k) { + for (int l = info.parameters.freeze(); l < nmo; ++l) { if (l >= k) { quadratic[std::make_pair(k, l)] = pairs(k, l); } else { @@ -592,35 +581,36 @@ Pairs CC2::compute_local_coupling(const Pairs fock1 = nemo->compute_fock_matrix(nemo->get_calc()->amo, nemo->get_calc()->aocc); + // Tensor fock1 = nemo->compute_fock_matrix(nemo->get_calc()->amo, nemo->get_calc()->aocc); + Tensor fock1 = copy(info.fock); for (int k = 0; k < nmo; ++k) { if (fock1(k, k) > 0.0) MADNESS_EXCEPTION("positive orbital energies", 1); fock1(k, k) = 0.0; } Pairs coupling; - for (int i = parameters.freeze(); i < nmo; ++i) { + for (int i = info.parameters.freeze(); i < nmo; ++i) { for (int j = i; j < nmo; ++j) { coupling.insert(i, j, real_factory_6d(world).compressed()); } } - for (int i = parameters.freeze(); i < nmo; ++i) { + for (int i = info.parameters.freeze(); i < nmo; ++i) { for (int j = i; j < nmo; ++j) { - for (int k = parameters.freeze(); k < nmo; ++k) { + for (int k = info.parameters.freeze(); k < nmo; ++k) { if (fock1(k, i) != 0.0) { coupling(i, j).gaxpy(1.0, quadratic[std::make_pair(k, j)], fock1(k, i), false); } } - for (int l = parameters.freeze(); l < nmo; ++l) { + for (int l = info.parameters.freeze(); l < nmo; ++l) { if (fock1(l, j) != 0.0) { coupling(i, j).gaxpy(1.0, quadratic[std::make_pair(i, l)], fock1(l, j), false); } } world.gop.fence(); const double thresh = FunctionDefaults<6>::get_thresh(); - coupling(i, j).truncate(thresh * 0.1).reduce_rank(); + coupling(i, j).truncate(thresh * 0.3).reduce_rank(); } } world.gop.fence(); @@ -683,95 +673,196 @@ CC2::iterate_adc2_pairs(Pairs& cispd, const CC_vecfunction& ccs) { } bool -CC2::iterate_lrcc2_pairs(const CC_vecfunction& cc2_s, const Pairs& cc2_d, const CC_vecfunction lrcc2_s, - Pairs& lrcc2_d) { - output.section("Solve LRCC2 for Excitation energy " + std::to_string(double(lrcc2_s.omega))); +CC2::iterate_lrcc2_pairs(World& world, const CC_vecfunction& cc2_s, + const CC_vecfunction lrcc2_s, Pairs& lrcc2_d, const Info& info) { + // output.section("Solve LRCC2 for Excitation energy " + std::to_string(double(lrcc2_s.omega))); + if (world.rank()==0) { + print_header3("Solving LRCC2 doubles equations"); + print("starting at time ",wall_time()); + print("using macrotasks with redirected output"); + } MADNESS_ASSERT(lrcc2_s.type == RESPONSE); - CCOPS.update_intermediates(lrcc2_s); - bool conv = true; - for (auto& tmp:lrcc2_d.allpairs) { - CCPair& pair = tmp.second; - const size_t i = pair.i; - const size_t j = pair.j; - // check if singles have significantly changed - if (lrcc2_s(i).current_error < 0.1 * parameters.thresh_6D() and - lrcc2_s(j).current_error < 0.1 * parameters.thresh_6D()) - output("Skipping Pair Iteration, No significant Change in Singles"); - else { - pair.bsh_eps = CCOPS.get_epsilon(pair.i, pair.j) + lrcc2_s.omega; - update_constant_part_lrcc2(pair, cc2_s, lrcc2_s); - conv = iterate_pair(pair, lrcc2_s); - } + auto triangular_map=PairVectorMap::triangular_map(info.parameters.freeze(),info.mo_ket.size()); + auto pair_vec=Pairs::pairs2vector(lrcc2_d,triangular_map); + + // make new constant part + MacroTaskConstantPart tc; + MacroTask task(world, tc); + auto cp = task(pair_vec, cc2_s.get_vecfunction(), lrcc2_s.get_vecfunction(), info) ; + print_size(world,cp,"constant part in iter"); + + for (int i=0; i coupling=compute_local_coupling(pair_vec, info); + auto coupling_vec=Pairs::pairs2vector(coupling,triangular_map); + reconstruct(world,coupling_vec); + for (auto& p : pair_vec) { + p.constant_part.reconstruct(); + p.function().reconstruct(); + } + + if (info.parameters.debug()) print_size(world, coupling_vec, "couplingvector"); + + // iterate the pair + MacroTaskIteratePair t1; + MacroTask task1(world, t1); + // temporary fix: create dummy functions to that the cloud is not confused + // real_function_6d tmp=real_factory_6d(world).functor([](const coord_6d& r){return 0.0;}); + // std::vector vdummy_6d(pair_vec.size(),tmp); // dummy vectors + const std::size_t maxiter=10; + auto unew = task1(pair_vec, coupling_vec, cc2_s, lrcc2_s, info, maxiter); + + for (const auto& u : unew) u.print_size("u after iter"); + // get some statistics + std::vector> uold; + for (const auto & p : pair_vec) uold.push_back(p.function()); + auto residual=uold-unew; + double nold=norm2(world,uold); + double nnew=norm2(world,unew); + print("norm(old), norm(new) ",nold,nnew); + auto [rmsrnorm, rmsrmax] = CCPotentials::residual_stats(residual); + if (world.rank()==0) CCPotentials::print_convergence("LRCC2 doubles",rmsrnorm, rmsrmax,0,0); + + // update the pair functions + for (int i=0; i::vector2pairs(pair_vec,triangular_map); + + // save latest iteration + if (world.rank()==0) print("saving latest iteration of LRCC2 to file"); + for (const auto& pair : pair_vec) { + save(pair.constant_part, pair.name() + "_const"); + save(pair.function(), pair.name()); + } + + return (rmsrnorm& doubles) { +CC2::solve_cc2(CC_vecfunction& singles, Pairs& doubles, Info& info) const +{ output.section("Solving CC2 Ground State"); MADNESS_ASSERT(singles.type == PARTICLE); - CCOPS.update_intermediates(singles); - output.section("Solve CC2 Ground State"); CCTimer time(world, "CC2 Ground State"); - double omega = CCOPS.compute_cc2_correlation_energy(singles, doubles); + double omega = CCPotentials::compute_cc2_correlation_energy(world, singles, doubles, info); if (world.rank() == 0) std::cout << std::fixed << std::setprecision(10) << "Current Correlation Energy = " << omega << "\n"; - if (not parameters.no_compute_cc2()) { - // first singles iteration - output.section("Initialize Singles to the Doubles"); - iterate_cc2_singles(singles, doubles); - // update correlation energy - omega = CCOPS.compute_cc2_correlation_energy(singles, doubles); - - for (size_t iter = 0; iter < parameters.iter_max(); iter++) { - CCTimer time_miter(world, "Macroiteration " + std::to_string(int(iter)) + " of CC2"); - output.section("Macroiteration " + std::to_string(int(iter)) + " of CC2"); - - // iterate doubles - bool doubles_converged = true; - for (auto& pairs: doubles.allpairs) { - CCPair& pair = pairs.second; - update_constant_part_cc2_gs(singles, pair); - bool pair_converged = iterate_pair(pair, singles); - save(pair.function(), pair.name()); - if (not pair_converged) doubles_converged = false; - } + if (parameters.no_compute_cc2()) { + if (world.rank()==0) print("found no_compute_cc2 key -- recompute singles for the singles-potentials"); + iterate_cc2_singles(world, singles, doubles, info); + return omega; + } - // new omega - omega = CCOPS.compute_cc2_correlation_energy(singles, doubles); + CC_vecfunction ex_singles_dummy; - // check if singles converged - const bool singles_converged = iterate_cc2_singles(singles, doubles); + // first singles iteration + output.section("Initialize Singles to the Doubles"); - // check if energy converged - const double omega_new = CCOPS.compute_cc2_correlation_energy(singles, doubles); - const double delta = omega_new - omega; - const bool omega_converged(delta < parameters.econv()); - omega = omega_new; - if (world.rank() == 0) - std::cout << std::fixed << std::setprecision(10) << "Current Correlation Energy = " << omega << "\n"; - if (world.rank() == 0) - std::cout << std::fixed << std::setprecision(10) << "Difference = " << delta << "\n"; + // given the doubles, we can solve the singles equations + iterate_cc2_singles(world, singles, doubles, info); + // the doubles ansatz depends on the singles and must be updated: |\tau_ij> = |u_ij> + Q12 f12 |t_i t_j> + update_reg_residues_gs(world, singles, doubles, info); + omega = CCPotentials::compute_cc2_correlation_energy(world, singles, doubles, info); - if (doubles_converged and singles_converged and omega_converged) break; + for (size_t iter = 0; iter < parameters.iter_max(); iter++) { + CCTimer time_miter(world, "Macroiteration " + std::to_string(int(iter)) + " of CC2"); + output.section("Macroiteration " + std::to_string(int(iter)) + " of CC2"); - time_miter.info(); + if (world.rank()==0) print("computing the constant part via macrotasks -- output redirected"); + timer timer1(world); + + std::vector pair_vec=Pairs::pairs2vector(doubles,triangular_map); + MacroTaskConstantPart t; + MacroTask task(world, t); + std::vector constant_part_vec = task(pair_vec, singles.get_vecfunction(), + ex_singles_dummy.get_vecfunction(), info) ; + for (int i=0; i coupling=compute_local_coupling(pair_vec, info); + auto coupling_vec=Pairs::pairs2vector(coupling,triangular_map); + timer1.tag("computing local coupling"); + + if (world.rank()==0) print("update the pair functions via macrotasks -- output redirected"); + MacroTaskIteratePair t1; + MacroTask task1(world, t1); + CC_vecfunction dummy_ex_singles; + std::vector vdummy_3d; // dummy vectors + const std::size_t maxiter=3; + auto unew = task1(pair_vec, coupling_vec, singles, dummy_ex_singles, + info, maxiter); + + + std::vector u_old; + for (auto p : pair_vec) u_old.push_back(p.function()); + + auto residual=u_old-unew; + timer1.tag("computing pair function update via macrotasks"); + + for (int i=0; i::vector2pairs(pair_vec,triangular_map); + + // save latest iteration + if (world.rank()==0) print("saving latest iteration to file"); + for (const auto& pair : pair_vec) { + save(pair.constant_part, pair.name() + "_const"); + save(pair.function(), pair.name()); + singles.save_restartdata(world,madness::name(singles.type)); + } + + auto [rmsrnorm,maxrnorm]=CCPotentials::residual_stats(residual); + bool doubles_converged=rmsrnorm& doubles) { } +/// solve the excited state LR-CC2 equations for a given excitation + +/// @param[in] gs_doubles: the ground state doubles +/// @param[in] gs_singles: the ground state singles +/// @param[in] cis: the CIS singles +/// @param[in] excitation: the excitation number +/// @return a tuple with the excited state doubles, the excited state singles and the excitation energy +std::tuple, CC_vecfunction, double> +CC2::solve_lrcc2(Pairs& gs_doubles, const CC_vecfunction& gs_singles, const CC_vecfunction& cis, + const std::size_t excitation, Info& info) const { + CCTimer time(world, "Whole LRCC2 Calculation"); + + std::vector> results; + std::vector>> timings; + std::vector>> results_ex; + + auto ex_singles = copy(cis); + + Pairs ex_doubles; + bool found_lrcc2d = initialize_pairs(ex_doubles, EXCITED_STATE, CT_LRCC2, gs_singles, ex_singles, excitation, info); + + if (found_lrcc2d) iterate_lrcc2_singles(world, gs_singles, gs_doubles, ex_singles, ex_doubles, info); + else iterate_ccs_singles(ex_singles, info); + const double omega_cis = ex_singles.omega; + + for (size_t iter = 0; iter < parameters.iter_max(); iter++) { + if (world.rank()==0) print_header2("Macroiteration " + std::to_string(int(iter)) + " of LRCC2 for excitation energy "+std::to_string(ex_singles.omega)); + update_reg_residues_ex(world, gs_singles, ex_singles, ex_doubles, info); + bool dconv = iterate_lrcc2_pairs(world, gs_singles, ex_singles, ex_doubles, info); + bool sconv = iterate_lrcc2_singles(world, gs_singles, gs_doubles, ex_singles, ex_doubles, info); + // update_reg_residues_ex(world, gs_singles, ex_singles, ex_doubles, info); + if (sconv and dconv) break; + } + + const double omega_cc2 = ex_singles.omega; + const std::string msg = "Excitation " + std::to_string(int(excitation)); + results_ex.push_back(std::make_pair(msg, std::make_pair(omega_cis, omega_cc2))); + // timings.push_back(std::make_pair(msg, time_ex.current_time(true))); + + + timings.push_back(std::make_pair("Whole LRCC2", time.current_time(true))); + output.section("LRCC2 Finished"); + output("Ground State Results:"); + for (const auto& res : results) + { + if (world.rank() == 0) + std::cout << std::fixed << std::setprecision(10) + << res.first << "=" << res.second << "\n"; + } + output("Response Results:"); + for (const auto& res : results_ex) + { + if (world.rank() == 0) + std::cout << std::fixed << std::setprecision(10) + << res.first << ": " << res.second.first << " (CIS)*, " << res.second.second << " (CC2)\n"; + } + if (world.rank() == 0) std::cout << "*only if CIS vectors where given in the beginning (not for CC2 restart)\n"; + output("\nTimings"); + for (const auto& time : timings) + { + if (world.rank() == 0) + std::cout << std::scientific << std::setprecision(2) + << std::setfill(' ') << std::setw(15) << time.first + << ": " << time.second.first << " (Wall), " << time.second.second << " (CPU)" << "\n"; + } + + return std::make_tuple(ex_doubles, ex_singles, omega_cc2); + +}; + bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const { output.section("Iterate Pair " + pair.name()); if (pair.ctype == CT_CC2) MADNESS_ASSERT(singles.type == PARTICLE); @@ -804,8 +965,11 @@ bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const { bool converged = false; double omega = 0.0; - if (pair.type == GROUND_STATE) omega = CCOPS.compute_pair_correlation_energy(pair, singles); - if (pair.type == EXCITED_STATE) omega = CCOPS.compute_excited_pair_energy(pair, singles); + Info info; + info.mo_bra=CCOPS.mo_bra_.get_vecfunction(); + info.parameters=parameters; + if (pair.type == GROUND_STATE) omega = CCOPS.compute_pair_correlation_energy(world, info,pair, singles); + if (pair.type == EXCITED_STATE) omega = CCOPS.compute_excited_pair_energy(world, pair, singles, info); if (world.rank() == 0) std::cout << "Correlation Energy of Pair " << pair.name() << " =" << std::fixed << std::setprecision(10) @@ -851,8 +1015,8 @@ bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const { double omega_new = 0.0; double delta = 0.0; - if (pair.type == GROUND_STATE) omega_new = CCOPS.compute_pair_correlation_energy(pair, singles); - else if (pair.type == EXCITED_STATE) omega_new = CCOPS.compute_excited_pair_energy(pair, singles); + if (pair.type == GROUND_STATE) omega_new = CCOPS.compute_pair_correlation_energy(world, info, pair, singles); + else if (pair.type == EXCITED_STATE) omega_new = CCOPS.compute_excited_pair_energy(world, pair, singles, info); delta = omega - omega_new; const double current_norm = pair.function().norm2(); @@ -885,42 +1049,44 @@ bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const { } -bool -CC2::initialize_singles(CC_vecfunction& singles, const FuncType type, const int ex) const { - MADNESS_ASSERT(singles.size() == 0); - bool restarted = false; +CC_vecfunction +CC2::initialize_singles(const FuncType type, const int ex) const { + + std::string fname=madness::name(type,ex); + if (world.rank()==0) print("initializing singles",fname); + CC_vecfunction singles(type); + try { + singles=CC_vecfunction::load_restartdata(world,fname); + if (world.rank()==0) print(" .. singles found on file"); + return singles; + } catch (...) { + if (world.rank()==0) print(" .. singles not found on file"); + } - std::vector> vs; + if (world.rank()==0) print(" .. initializing singles to zero functions"); for (size_t i = parameters.freeze(); i < CCOPS.mo_ket().size(); i++) { - CCFunction single_i; - single_i.type = type; - single_i.i = i; - std::string name; - if (ex < 0) name = single_i.name(); - else name = std::to_string(ex) + "_" + single_i.name(); real_function_3d tmpi = real_factory_3d(world); - const bool found = CCOPS.load_function(tmpi, name); - if (found) restarted = true; - else output("Initialized " + single_i.name() + " of type " + assign_name(type) + " as zero-function"); - single_i.function = copy(tmpi); - vs.push_back(single_i); + CCFunction single_i(tmpi, i, type); + singles.insert(i,single_i); } - - singles = CC_vecfunction(vs, type); -// if (type == RESPONSE) singles.excitation = ex; - - return restarted; + return singles; } + bool CC2::initialize_pairs(Pairs& pairs, const CCState ftype, const CalcType ctype, const CC_vecfunction& tau, - const CC_vecfunction& x, const size_t excitation) const { + const CC_vecfunction& x, const size_t excitation, const Info& info) const { MADNESS_ASSERT(tau.type == PARTICLE); MADNESS_ASSERT(x.type == RESPONSE); MADNESS_ASSERT(pairs.empty()); - output("Initialize " + assign_name(ctype) + " Pairs for " + assign_name(ftype)); + + std::string fname=assign_name(ftype); + if (world.rank()==0) print("initializing doubles",fname); + // output("Initialize " + assign_name(ctype) + " Pairs for " + assign_name(ftype)); bool restarted = false; + // std::vector vconst_part; + // load_function(world,vconst_part,"constant_part"); for (size_t i = parameters.freeze(); i < CCOPS.mo_ket().size(); i++) { for (size_t j = i; j < CCOPS.mo_ket().size(); j++) { @@ -932,65 +1098,70 @@ CC2::initialize_pairs(Pairs& pairs, const CCState ftype, const CalcType if (found) restarted = true; // if a single pair was found then the calculation is not from scratch real_function_6d const_part; CCOPS.load_function(const_part, name + "_const"); - CCPair tmp = CCOPS.make_pair_gs(utmp, tau, i, j); + CCPair tmp; + if (ctype==CT_MP2) tmp=CCPotentials::make_pair_mp2(utmp, i, j, info); + if (ctype==CT_CC2) tmp=CCPotentials::make_pair_cc2(utmp, tau, i, j, info); tmp.constant_part = const_part; pairs.insert(i, j, tmp); - //const double omega = CCOPS.compute_pair_correlation_energy(tmp); - //if(world.rank()==0) std::cout << "initialized pair " << tmp.name() << " with correlation energy=" << std::fixed << std::setprecision(10) << omega << "\n"; - } else if (ftype == EXCITED_STATE) { - name = std::to_string(int(excitation)) + "_" + name; + // name = std::to_string(int(excitation)) + "_" + name; real_function_6d utmp = real_factory_6d(world); const bool found = CCOPS.load_function(utmp, name); if (found) restarted = true; real_function_6d const_part; CCOPS.load_function(const_part, name + "_const"); CCPair tmp = CCOPS.make_pair_ex(utmp, tau, x, i, j, ctype); -// tmp.excitation = excitation; + + { + CCPair tmp2=CCPotentials::make_pair_lrcc2(world, ctype, utmp, tau, x, i, j, info); + std::swap(tmp,tmp2); + print("going on with Florian's pair"); + // print("going on with Jakob's pair"); + } + tmp.constant_part = const_part; pairs.insert(i, j, tmp); + // CCPotentials::compute_excited_pair_energy(world, pairs(i, j), x, info); } else error("Unknown pairtype"); } } return restarted; } -void CC2::update_reg_residues_gs(const CC_vecfunction& singles, Pairs& doubles) const { +void CC2::update_reg_residues_gs(World& world, const CC_vecfunction& singles, Pairs& doubles, const Info& info) +{ CCTimer time(world, "Updated Regularization Residues of the Ground State"); MADNESS_ASSERT(singles.type == PARTICLE); Pairs updated_pairs; - // output("Correlation energy with old pairs"); - // CCOPS.compute_cc2_correlation_energy(singles,doubles); for (auto& tmp:doubles.allpairs) { MADNESS_ASSERT(tmp.second.type == GROUND_STATE); CCPair& pair = tmp.second; const size_t i = pair.i; const size_t j = pair.j; - const CCPair updated_pair = CCOPS.make_pair_gs(pair.function(), singles, i, j); + // const CCPair updated_pair = CCOPS.make_pair_gs(pair.function(), singles, i, j); + const CCPair updated_pair = CCPotentials::make_pair_cc2(pair.function(), singles, i, j, info); updated_pairs.insert(i, j, updated_pair); } - // output("Correlation energy with updated pairs"); - // CCOPS.compute_cc2_correlation_energy(singles,updated_pairs); doubles.swap(updated_pairs); - // output("Correlation energy with swapped pairs"); - // CCOPS.compute_cc2_correlation_energy(singles,updated_pairs); time.info(); } -void CC2::update_reg_residues_ex(const CC_vecfunction& singles, const CC_vecfunction& response, - Pairs& doubles) const { +void CC2::update_reg_residues_ex(World& world, const CC_vecfunction& singles, + const CC_vecfunction& response, Pairs& doubles, const Info& info) +{ CCTimer time(world, "Updated Regularization Residues of the Excited State"); MADNESS_ASSERT(singles.type == PARTICLE); MADNESS_ASSERT(response.type == RESPONSE); + CalcType ctype = doubles.allpairs.begin()->second.ctype; Pairs updated_pairs; for (auto& tmp:doubles.allpairs) { MADNESS_ASSERT(tmp.second.type == EXCITED_STATE); CCPair& pair = tmp.second; - const size_t i = pair.i; - const size_t j = pair.j; - CCPair updated_pair = CCOPS.make_pair_ex(pair.function(), singles, response, i, j, pair.ctype); - updated_pairs.insert(i, j, updated_pair); + // CCPair updated_pair = CCPotentials::make_pair_ex(pair.function(), singles, response, i, j, pair.ctype); + CCPair updated_pair = + CCPotentials::make_pair_lrcc2(world, ctype, pair.function(), singles, response, pair.i, pair.j, info); + updated_pairs.insert(pair.i, pair.j, updated_pair); } doubles.swap(updated_pairs); time.info(); diff --git a/src/madness/chem/CC2.h b/src/madness/chem/CC2.h index c5d13aba5c0..4720e927e76 100644 --- a/src/madness/chem/CC2.h +++ b/src/madness/chem/CC2.h @@ -21,6 +21,8 @@ #include #include +#include "BSHApply.h" + namespace madness { class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { @@ -128,13 +130,15 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { CCPotentials CCOPS; /// Formated Output (same as used in CC2Potentials structure) CCMessenger& output; + /// map Pair struct to vector + PairVectorMap triangular_map; /// solve the CC2 ground state equations, returns the correlation energy void solve(); std::vector - solve_ccs(); + solve_ccs() const; double compute_mp3(const Pairs& mp2pairs) const { MP3 mp3(CCOPS); @@ -143,45 +147,62 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { } double - solve_cc2(CC_vecfunction& tau, Pairs& u); + solve_cc2(CC_vecfunction& tau, Pairs& u, Info& info) const; + + /// solve the excited state LR-CC2 equations for a given excitation + + /// @param[in] gs_doubles: the ground state doubles + /// @param[in] gs_singles: the ground state singles + /// @param[in] cis: the CIS singles + /// @param[in] excitation: the excitation number + /// @return a tuple with the excited state doubles, the excited state singles and the excitation energy + std::tuple, CC_vecfunction, double> + solve_lrcc2(Pairs& gs_doubles, const CC_vecfunction& gs_singles, const CC_vecfunction& cis, + const std::size_t excitation, Info& info) const; double solve_cispd(Pairs& doubles, const Pairs& mp2_pairs, const CC_vecfunction& cis_singles); /// convencience function to iterate the CC2 ground state singles, /// makes the right call on the iterate_singles functions - bool - iterate_cc2_singles(CC_vecfunction& singles, Pairs& doubles) { - CCOPS.clear_potentials(singles); + static bool + iterate_cc2_singles(World& world, CC_vecfunction& singles, Pairs& doubles, Info& info) { + // CCOPS.clear_potentials(singles); + info.intermediate_potentials.clear_all(); Pairs empty; - return iterate_singles(singles, CC_vecfunction(RESPONSE), doubles, empty, CT_CC2, parameters.iter_max_3D()); + return iterate_singles(world, singles, CC_vecfunction(RESPONSE), doubles, + empty, CT_CC2, info.parameters.iter_max_3D(), info); } bool - iterate_adc2_singles(Pairs& mp2, CC_vecfunction& singles, Pairs& x) { + iterate_adc2_singles(Pairs& mp2, CC_vecfunction& singles, Pairs& x, Info& info) { MADNESS_ASSERT(singles.type == RESPONSE); - CCOPS.clear_potentials(singles); - return iterate_singles(singles, CC_vecfunction(UNDEFINED), mp2, x, CT_ADC2, parameters.iter_max_3D()); + // CCOPS.clear_potentials(singles); + info.intermediate_potentials.clear_response(); + return iterate_singles(world, singles, CC_vecfunction(UNDEFINED), mp2, x, CT_ADC2, parameters.iter_max_3D(), info); } - bool - iterate_lrcc2_singles(CC_vecfunction& cc2_s, Pairs& cc2_d, CC_vecfunction& lrcc2_s, Pairs lrcc2_d) { + static bool + iterate_lrcc2_singles(World& world, const CC_vecfunction& cc2_s, Pairs& cc2_d, CC_vecfunction& lrcc2_s, Pairs lrcc2_d, Info& info) { MADNESS_ASSERT(cc2_s.type == PARTICLE); MADNESS_ASSERT(lrcc2_s.type == RESPONSE); - CCOPS.clear_potentials(lrcc2_s); - return iterate_singles(lrcc2_s, cc2_s, cc2_d, lrcc2_d, CT_LRCC2, parameters.iter_max_3D()); + info.intermediate_potentials.clear_response(); + // CCOPS.clear_potentials(lrcc2_s); + return iterate_singles(world, lrcc2_s, cc2_s, cc2_d, lrcc2_d, + CT_LRCC2, info.parameters.iter_max_3D(), info); } /// convencience function to iterate the CCS Response singles, /// makes the right call on the iterate_singles functions bool - iterate_ccs_singles(CC_vecfunction& x) { + iterate_ccs_singles(CC_vecfunction& x, Info& info) const { Pairs empty; - CCOPS.clear_potentials(x); - return iterate_singles(x, CC_vecfunction(PARTICLE), empty, empty, CT_LRCCS, 1); + // CCOPS.clear_potentials(x); + info.intermediate_potentials.clear_response(); + return iterate_singles(world, x, CC_vecfunction(PARTICLE), empty, empty, CT_LRCCS, info.parameters.iter_max_3D(), info); } - bool + static bool /// Iterates the singles equations for CCS, CC2, LRCC2 /// The corresponding regulairzation tails of the doubles are updated in every iteration (therefore doubles are not marked as const) /// @param[in] : singles, the singles that are iterated @@ -191,8 +212,9 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { /// @param[in] : ctype: the calculation type: CCS, CC2, CC2_response_ /// @param[in] : maxiter: maxmial number of iterations /// @param[out]: true if the overall change of the singles is below 10*donv_6D - iterate_singles(CC_vecfunction& singles, const CC_vecfunction singles2, Pairs& gs_doubles, - Pairs& ex_doubles, const CalcType ctype, const std::size_t maxiter) { + iterate_singles(World& world, CC_vecfunction& singles, const CC_vecfunction singles2, Pairs& gs_doubles, + Pairs& ex_doubles, const CalcType ctype, const std::size_t maxiter, Info& info) { + CCMessenger output(world); output.subsection("Iterate " + assign_name(ctype) + "-Singles"); CCTimer time_all(world, "Overall Iteration of " + assign_name(ctype) + "-Singles"); bool converged = true; @@ -200,77 +222,85 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { CC_vecfunction old_singles(singles); for (auto& tmp : singles.functions) old_singles(tmp.first).function = copy(tmp.second.function); + double old_omega=0.0; // KAIN solver typedef vector_function_allocator allocT; typedef XNonlinearSolver >, double, allocT> solverT; - allocT alloc(world, singles.size()); solverT solver(allocT(world, singles.size())); solver.do_print = (world.rank() == 0); + print_size(world, singles.get_vecfunction(), "singles before iteration"); + for (size_t iter = 0; iter < maxiter; iter++) { - output.subsection("Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles"); - CCTimer time(world, "Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles"); + // output.subsection("Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles"); + // CCTimer time(world, "Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles"); double omega = 0.0; if (ctype == CT_LRCC2) omega = singles.omega; else if (ctype == CT_LRCCS) omega = singles.omega; else if (ctype == CT_ADC2) omega = singles.omega; + print("omega 1" ,omega); // consistency check switch (ctype) { - case CT_CC2: - if (singles.type != PARTICLE) - output.warning("iterate_singles: CC2 demanded but singles are not of type PARTICLE"); - break; - case CT_MP2: MADNESS_EXCEPTION("Demanded Singles Calculation for MP2 ????", 1); - break; - case CT_LRCC2: - if (singles.type != RESPONSE or singles2.type != PARTICLE) - output.warning("iterate_singles: CC2_response_ singles have wrong types"); - break; - case CT_LRCCS: - if (singles.type != RESPONSE) - output.warning("iterate_singles: CCS_response_ singles have wrong types"); - break; - case CT_CISPD: MADNESS_EXCEPTION("Demanded Singles Calculation for CIS(D)", 1); - break; - case CT_ADC2: - MADNESS_ASSERT(singles.type == RESPONSE); - break; - case CT_TEST: MADNESS_EXCEPTION("Iterate Singles not implemented for Experimental calculation", 1); - break; - default: MADNESS_EXCEPTION( - ("Unknown calculation type in iterate singles: " + assign_name(ctype)).c_str(), 1); + case CT_CC2: + if (singles.type != PARTICLE) + output.warning("iterate_singles: CC2 demanded but singles are not of type PARTICLE"); + break; + case CT_MP2: MADNESS_EXCEPTION("Demanded Singles Calculation for MP2 ????", 1); + break; + case CT_LRCC2: + if (singles.type != RESPONSE or singles2.type != PARTICLE) + output.warning("iterate_singles: CC2_response_ singles have wrong types"); + break; + case CT_LRCCS: + if (singles.type != RESPONSE) + output.warning("iterate_singles: CCS_response_ singles have wrong types"); + break; + case CT_CISPD: MADNESS_EXCEPTION("Demanded Singles Calculation for CIS(D)", 1); + break; + case CT_ADC2: + MADNESS_ASSERT(singles.type == RESPONSE); + break; + case CT_TEST: MADNESS_EXCEPTION("Iterate Singles not implemented for Experimental calculation", 1); + break; + default: MADNESS_EXCEPTION( + ("Unknown calculation type in iterate singles: " + assign_name(ctype)).c_str(), 1); } // get potentials CCTimer time_V(world, assign_name(ctype) + "-Singles Potential"); vector_real_function_3d V; - if (ctype == CT_CC2) V = CCOPS.get_CC2_singles_potential_gs(singles, gs_doubles); + if (ctype == CT_CC2) V = CCPotentials::get_CC2_singles_potential_gs(world, singles, gs_doubles, info); else if (ctype == CT_LRCC2) - V = CCOPS.get_CC2_singles_potential_ex(singles2, gs_doubles, singles, ex_doubles); - else if (ctype == CT_LRCCS) V = CCOPS.get_CCS_potential_ex(singles); - else if (ctype == CT_ADC2) V = CCOPS.get_ADC2_singles_potential(gs_doubles, singles, ex_doubles); + V = CCPotentials::get_CC2_singles_potential_ex(world, singles2, gs_doubles, singles, ex_doubles, info); + else if (ctype == CT_LRCCS) V = CCPotentials::get_CCS_potential_ex(world,singles,false, info); + // else if (ctype == CT_ADC2) V = CCOPS.get_ADC2_singles_potential(world, gs_doubles, singles, ex_doubles, info); else MADNESS_EXCEPTION("iterate singles: unknown type", 1); + + // add local coupling + V-=compute_local_coupling(singles.get_vecfunction(),info); + truncate(world, V); time_V.info(true, norm2(world, V)); - if (ctype == CT_LRCCS or ctype == CT_LRCC2 or ctype == CT_ADC2) { - omega = singles.omega; // computed with the potential + // update excitation energy + if (ctype==CT_LRCC2 or ctype==CT_LRCCS or ctype==CT_ADC2) { + old_omega=omega; + omega = CCPotentials::compute_cis_expectation_value(world, singles, V, true, info); + singles.omega = omega; } - - scale(world, V, -2.0); - truncate(world, V); + if (world.rank()==0 and info.parameters.debug()) + print("omega entering the update in the singles" ,omega); // make bsh operators - CCTimer time_makebsh(world, "Make G-Operators"); + scale(world, V, -2.0); // moved to BSHApply std::vector > > G(singles.size()); for (size_t i = 0; i < G.size(); i++) { - const double bsh_eps = CCOPS.get_orbital_energies()[i + parameters.freeze()] + omega; + const double bsh_eps = info.orbital_energies[i + info.parameters.freeze()] + omega; G[i] = std::shared_ptr >( - BSHOperatorPtr3D(world, sqrt(-2.0 * bsh_eps), parameters.lo(), parameters.thresh_bsh_3D())); + BSHOperatorPtr3D(world, sqrt(-2.0 * bsh_eps), info.parameters.lo(), info.parameters.thresh_bsh_3D())); } world.gop.fence(); - time_makebsh.info(); // apply bsh operators CCTimer time_applyG(world, "Apply G-Operators"); @@ -279,17 +309,13 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { time_applyG.info(); // apply Q-Projector to result - GV = CCOPS.apply_Qt(GV, CCOPS.mo_ket()); + QProjector Q(info.mo_bra,info.mo_ket); + GV = Q(GV); // Normalize Singles if it is excited state if (ctype == CT_LRCCS or ctype == CT_LRCC2 or ctype == CT_ADC2) { output("Normalizing new singles"); - const vector_real_function_3d x = GV; - const vector_real_function_3d xbra = mul(world, nemo->ncf->square(), GV); - const double norm = sqrt(inner(world, xbra, x).sum()); - if (world.rank() == 0) - std::cout << " Norm was " << std::fixed << std::setprecision(parameters.output_prec()) << norm - << "\n"; + const double norm=inner(GV,info.R_square*GV); scale(world, GV, 1.0 / norm); } else output("Singles not normalized"); @@ -297,24 +323,25 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { const vector_real_function_3d residual = sub(world, singles.get_vecfunction(), GV); // information - const Tensor R2xinnerx = inner(world, mul(world, nemo->ncf->square(), singles.get_vecfunction()), + const Tensor R2xinnerx = inner(world, info.R_square*singles.get_vecfunction(), singles.get_vecfunction()); - const Tensor R2GVinnerGV = inner(world, mul(world, nemo->ncf->square(), GV), GV); - const Tensor R2rinnerr = inner(world, mul(world, nemo->ncf->square(), residual), residual); + const Tensor R2GVinnerGV = inner(world, info.R_square*GV, GV); + const Tensor R2rinnerr = inner(world, info.R_square*residual, residual); const double R2vector_error = sqrt(R2rinnerr.sum()); + auto [rmsresidual, maxresidual]=CCPotentials::residual_stats(residual); // print information if (world.rank() == 0) std::cout << "\n\n-----Results of current interation:-----\n"; if (world.rank() == 0) std::cout << "\nName: ||" << singles.name(0) << "||, ||GV" << singles.name(0) << ", ||residual||" << "\n"; if (world.rank() == 0) - std::cout << singles.name(0) << ": " << std::scientific << std::setprecision(parameters.output_prec()) + std::cout << singles.name(0) << ": " << std::scientific << std::setprecision(info.parameters.output_prec()) << sqrt(R2xinnerx.sum()) << ", " << sqrt(R2GVinnerGV.sum()) << ", " << sqrt(R2rinnerr.sum()) << "\n----------------------------------------\n"; for (size_t i = 0; i < GV.size(); i++) { if (world.rank() == 0) - std::cout << singles(i + parameters.freeze()).name() << ": " << std::scientific - << std::setprecision(parameters.output_prec()) + std::cout << singles(i + info.parameters.freeze()).name() << ": " << std::scientific + << std::setprecision(info.parameters.output_prec()) << sqrt(R2xinnerx(i)) << ", " << sqrt(R2GVinnerGV(i)) << ", " << sqrt(R2rinnerr(i)) << "\n"; } @@ -322,80 +349,76 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { // make second order update (only for response) if (ctype == CT_LRCC2 or ctype == CT_LRCCS) { - output("\nMake 2nd order energy update:"); - // include nuclear factors - { - vector_real_function_3d bra_res = mul(world, nemo->ncf->square(), residual); - vector_real_function_3d bra_GV = mul(world, nemo->ncf->square(), GV); - double Rtmp = inner(world, bra_res, V).sum(); - double Rtmp2 = inner(world, bra_GV, GV).sum(); - const double Rdelta = (0.5 * Rtmp / Rtmp2); - double old_omega = omega; - output("Delta-Update is not used"); - if (world.rank() == 0) - std::cout << "omega, old_omega, delta" << std::fixed - << std::setprecision(parameters.output_prec() + 2) << omega << ", " << old_omega << ", " - << Rdelta << "\n\n"; - } - + double Rtmp = inner(world, info.R_square*residual, V).sum(); + double Rtmp2 = inner(world, info.R_square*GV, GV).sum(); + const double Rdelta = (0.5 * Rtmp / Rtmp2); + if (world.rank() == 0) std::cout << "omega, second-order update (FYI): " << std::fixed + << std::setprecision(info.parameters.output_prec() + 2) << omega << ", " << Rdelta << "\n\n"; } // update singles singles.omega = omega; - vector_real_function_3d new_singles = GV; - if (parameters.kain()) new_singles = solver.update(singles.get_vecfunction(), residual); - print_size(world, new_singles, "new_singles"); - truncate(world, new_singles); - print_size(world, new_singles, "new_singles"); + vector_real_function_3d new_singles = truncate(GV); + if (info.parameters.kain()) new_singles = solver.update(singles.get_vecfunction(), residual); + if (info.parameters.debug()) print_size(world, new_singles, "new_singles"); + // if (ctype == CT_LRCCS or ctype == CT_LRCC2 or ctype == CT_ADC2) Nemo::normalize(new_singles, info.R); + // if (info.parameters.debug()) print_size(world, new_singles, "new_singles normalized"); + for (size_t i = 0; i < GV.size(); i++) { - singles(i + parameters.freeze()).function = copy(new_singles[i]); + singles(i + info.parameters.freeze()).function = copy(new_singles[i]); } - // update intermediates - CCOPS.update_intermediates(singles); - // update reg_residues of doubles - //if(ctype==CC2_) update_reg_residues_gs(singles,gs_doubles); - //else if(ctype==LRCC2_) update_reg_residues_ex(singles2,singles,ex_doubles); + if (ctype==CT_CC2) update_reg_residues_gs(world, singles,gs_doubles, info); + else if(ctype==CT_LRCC2) update_reg_residues_ex(world, singles2,singles,ex_doubles, info); - converged = (R2vector_error < parameters.dconv_3D()); + if (world.rank()==0) CCPotentials::print_convergence(singles.name(0),rmsresidual, + rmsresidual,omega-old_omega,iter); + converged = (R2vector_error < info.parameters.dconv_3D()); - time.info(); + // time.info(); if (converged) break; if (ctype == CT_LRCCS) break; // for CCS just one iteration to check convergence } time_all.info(); + print_size(world, singles.get_vecfunction(), "singles after iteration"); // Assign the overall changes bool no_change = true; if (world.rank() == 0) - std::cout << "Change in Singles functions after all the CC2-Single-Microiterations" << std::endl; + std::cout << "Change in Singles functions after all the Microiterations" << std::endl; for (auto& tmp : singles.functions) { const double change = (tmp.second.function - old_singles(tmp.first).function).norm2(); tmp.second.current_error = change; - if (change > parameters.dconv_3D()) no_change = false; + if (change > info.parameters.dconv_3D()) no_change = false; if (world.rank() == 0) std::cout << "Change of " << tmp.second.name() << "=" << tmp.second.current_error << std::endl; } // update reg_residues of doubles - if (ctype == CT_CC2) update_reg_residues_gs(singles, gs_doubles); - else if (ctype == CT_LRCC2) update_reg_residues_ex(singles2, singles, ex_doubles); + if (ctype == CT_CC2) update_reg_residues_gs(world, singles, gs_doubles, info); + else if (ctype == CT_LRCC2) update_reg_residues_ex(world, singles2, singles, ex_doubles, info); //CCOPS.plot(singles); - if (no_change) output("Change of Singles was below = " + std::to_string(parameters.dconv_3D()) + "!"); + if (no_change) output("Change of Singles was below = " + std::to_string(info.parameters.dconv_3D()) + "!"); return no_change; } + /// store singles to file + void store_singles(const CC_vecfunction& singles, const int ex = -1) const; - bool initialize_singles(CC_vecfunction& singles, const FuncType type, const int ex = -1) const; + /// read singles from file or initialize new ones + CC_vecfunction initialize_singles(const FuncType type, const int ex = -1) const; + /// read pairs from file or initialize new ones bool initialize_pairs(Pairs& pairs, const CCState ftype, const CalcType ctype, const CC_vecfunction& tau, - const CC_vecfunction& x, const size_t extitation = 0) const; + const CC_vecfunction& x, const size_t extitation, const Info& info) const; - void update_reg_residues_gs(const CC_vecfunction& singles, Pairs& doubles) const; + static void + update_reg_residues_gs(World& world, const CC_vecfunction& singles, Pairs& doubles, const Info& info); - void - update_reg_residues_ex(const CC_vecfunction& singles, const CC_vecfunction& response, Pairs& doubles) const; + static void + update_reg_residues_ex(World& world, const CC_vecfunction& singles, const CC_vecfunction& response, Pairs& doubles, + const Info& info); /// Iterates a pair of the CC2 doubles equations bool @@ -404,9 +427,9 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { bool iterate_adc2_pairs(Pairs& cispd, const CC_vecfunction& ccs); - bool - iterate_lrcc2_pairs(const CC_vecfunction& cc2_s, const Pairs& cc2_d, const CC_vecfunction lrcc2_s, - Pairs& lrcc2_d); + static bool + iterate_lrcc2_pairs(World& world, const CC_vecfunction& cc2_s, const CC_vecfunction lrcc2_s, + Pairs& lrcc2_d, const Info& info); bool update_constant_part_cc2_gs(const CC_vecfunction& tau, CCPair& pair) { MADNESS_ASSERT(pair.ctype == CT_CC2); @@ -473,19 +496,36 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface { } /// forward to the other function (converting CCPair to real_function) - Pairs compute_local_coupling(const Pairs &pairs) const { + static Pairs compute_local_coupling(const std::vector &vpairs, const Info& info) { + // create new pairs structure + Pairs pairs; + for (auto& tmp_pair : vpairs) pairs.insert(tmp_pair.i, tmp_pair.j, tmp_pair); auto ccpair2function = [](const CCPair& a) {return a.function();}; - return compute_local_coupling(pairs.convert(pairs,ccpair2function)); - + return compute_local_coupling(pairs.convert(pairs,ccpair2function), info); }; + /// compute the coupling of singles function if orbitals are localized + + /// @return the coupling terms c_i = -\sum_(j\neq i) f_ij |\phi_j> (for whatever phi is) + static std::vector compute_local_coupling(const std::vector& singles, + const Info& info) { + + MADNESS_CHECK_THROW(singles.size()>0,"compute_local_coupling: singles vector is empty"); + World& world=singles.front().world(); + auto active=Slice(info.parameters.freeze(),-1); + Tensor Fact=info.fock(active,active); + for (int i=0; i + \sum_{l\neq j} f_lj |u_il> - Pairs compute_local_coupling(const Pairs& pairs) const; + static Pairs compute_local_coupling(const Pairs& pairs, const Info& info); - double solve_mp2_coupled(Pairs &doubles); + double solve_mp2_coupled(Pairs &doubles, Info& info); bool check_core_valence_separation(const Tensor& fmat) const; diff --git a/src/madness/chem/CCPotentials.cc b/src/madness/chem/CCPotentials.cc index a658f532c30..e524b0ed4bb 100644 --- a/src/madness/chem/CCPotentials.cc +++ b/src/madness/chem/CCPotentials.cc @@ -26,16 +26,16 @@ CCPotentials::CCPotentials(World& world_, std::shared_ptr nemo, const CCP //orbital_energies_(init_orbital_energies(nemo)) // g12(std::shared_ptrget_calc()->molecule), - get_potentials(world, param), + get_potentials(param), output(world) { g12=std::shared_ptr>(new CCConvolutionOperator(world,OpType::OT_G12,param)); f12=std::shared_ptr>(new CCConvolutionOperator(world,OpType::OT_F12,param)); output.debug = parameters.debug(); -// reset_nemo(nemo); -// g12.update_elements(mo_bra_, mo_ket_); -// g12.sanity(); -// f12.update_elements(mo_bra_, mo_ket_); -// f12.sanity(); + // reset_nemo(nemo); + // g12.update_elements(mo_bra_, mo_ket_); + // g12.sanity(); + // f12.update_elements(mo_bra_, mo_ket_); + // f12.sanity(); } madness::CC_vecfunction @@ -72,13 +72,108 @@ CCPotentials::init_orbital_energies(const Nemo& nemo) const { return eps; } +CCPair CCPotentials::make_pair_mp2(const real_function_6d& u, const size_t i, const size_t j, const Info& info) { + World& world=u.world(); + + // construct Q12 f12 |ij> + auto phi=info.mo_ket; + auto phi_bra=info.mo_bra; + StrongOrthogonalityProjector Q12(world); + Q12.set_spaces(phi_bra,phi,phi_bra,phi); + + auto f12=CCConvolutionOperatorPtr(world,OT_F12,info.parameters); + CCPairFunction fij(f12, phi[i], phi[j]); + std::vector> tmp=Q12(std::vector>(1,fij)); + + // first term is the 6d function u, then follows Q12 f12 |ij> + std::vector> functions; + functions+=CCPairFunction(u); + functions+=tmp; + + auto pair=CCPair(i,j,GROUND_STATE,CT_MP2,functions); + pair.bsh_eps=get_epsilon(i,j,info); + return pair; +} + +CCPair CCPotentials::make_pair_cc2(const real_function_6d& u, const CC_vecfunction& gs_singles, const size_t i, const size_t j, + const Info& info) { + World& world=u.world(); + + // construct Q12 f12 |ij> + auto phi=info.mo_ket; + auto phi_bra=info.mo_bra; + auto t=make_full_t_intermediate(gs_singles,info).get_vecfunction(); + StrongOrthogonalityProjector Q12(world); + Q12.set_spaces(phi_bra,t,phi_bra,t); + + auto f12=CCConvolutionOperatorPtr(world,OT_F12,info.parameters); + CCPairFunction fij(f12, t[i], t[j]); + std::vector> tmp=Q12(std::vector>(1,fij)); + + // first term is the 6d function u, then follows Q12 f12 |ij> + std::vector> functions; + functions+=CCPairFunction(u); + functions+=tmp; + + auto pair=CCPair(i,j,GROUND_STATE,CT_CC2,functions); + pair.bsh_eps=get_epsilon(i,j,info); + return pair; +} + +/// follow eq. (23) of Kottmann, JCTC 13, 5956 (2017) +CCPair CCPotentials::make_pair_lrcc2(World& world, const CalcType& ctype, const real_function_6d& u, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, const size_t i, const size_t j, const Info& info) { + MADNESS_ASSERT(gs_singles.type == PARTICLE || gs_singles.type == HOLE); + MADNESS_ASSERT(ex_singles.type == RESPONSE); + MADNESS_ASSERT(ctype == CT_CISPD || ctype == CT_LRCC2 || ctype == CT_ADC2); + MADNESS_ASSERT(!(i < info.parameters.freeze())); + MADNESS_ASSERT(!(j < info.parameters.freeze())); + + // compute the t intermediates for active orbitals only -- they go into the ansatz + const auto t = CC_vecfunction(info.get_active_mo_ket()+gs_singles.get_vecfunction(),MIXED,info.parameters.freeze()); + MADNESS_ASSERT(t.size() == (info.mo_ket.size()-info.parameters.freeze())); + + // compute the t intermediates for all orbitals -- they go into the projector + const CC_vecfunction pt = copy(make_full_t_intermediate(gs_singles,info)); + MADNESS_ASSERT(pt.size() == info.mo_ket.size()); + + auto f12=CCConvolutionOperatorPtr(world,OT_F12,info.parameters); + + // set up projectors -- they project out the occupied space from the response pair function + + // dQ12t = -(Qt(1) Ox(2) + Ox(1) Qt(2)) eq. (22) of the excited state paper + QProjector Qt(info.mo_bra,pt.get_vecfunction()); + Projector Ox(info.get_active_mo_bra(),ex_singles.get_vecfunction()); // this works on active orbitals only + auto dQt_1 = outer(Qt,Ox); + auto dQt_2 = outer(Ox,Qt); + + StrongOrthogonalityProjector Q12t(world); // eq. (21) of the ground state paper + Q12t.set_spaces(info.mo_bra,pt.get_vecfunction(),info.mo_bra,pt.get_vecfunction()); + + typedef CCPairFunction cpT; + auto functions=std::vector(1,cpT(u)); + + auto f_xt=std::vector(1,cpT(f12, ex_singles(i), t(j))); + auto f_tx=std::vector(1,cpT(f12, t(i), ex_singles(j))); + auto f_tt=std::vector(1,cpT(f12, t(i), t(j))); + + functions+=(Q12t(f_xt) + Q12t(f_tx) - dQt_1(f_tt) -dQt_2(f_tt)); // note the sign change in the last two terms + functions=consolidate(functions); + + CCPair pair(i, j, EXCITED_STATE, ctype, functions); + MADNESS_ASSERT(ex_singles.omega != 0.0); + const double bsh_eps = get_epsilon(i, j, info) + ex_singles.omega; + pair.bsh_eps = bsh_eps; + return pair; +} + madness::CCPair CCPotentials::make_pair_gs(const real_function_6d& u, const CC_vecfunction& tau, const size_t i, const size_t j) const { CCTimer time(world, "make pair u" + std::to_string(int(i)) + std::to_string(int(j))); MADNESS_ASSERT(tau.type == PARTICLE || tau.type == HOLE); // for MP2: tau is empty or Hole states, the function will give back mo_ket_ // for freeze!=0 the function will give back (mo0,mo1,...,t_freeze,t_freeze+1,...) - const CC_vecfunction t = make_t_intermediate(tau); + const CC_vecfunction t = make_t_intermediate(tau,parameters); // functions for the projector CC_vecfunction pt; if (!parameters.QtAnsatz()) pt = mo_ket_; @@ -222,12 +317,12 @@ CCPotentials::make_pair_ex(const real_function_6d& u, const CC_vecfunction& tau, MADNESS_ASSERT(!(j < parameters.freeze())); // for CIS(D): tau is empty or Hole states, the function will give back mo_ket_ // for freeze!=0 the function will give back (mo0,mo1,...,t_freeze,t_freeze+1,...) - const CC_vecfunction t = make_t_intermediate(tau).copy(); + const CC_vecfunction t = copy(make_t_intermediate(tau,parameters)); // functions for the projector CC_vecfunction pt; - if (!parameters.QtAnsatz()) pt = mo_ket_.copy(); + if (!parameters.QtAnsatz()) pt = copy(mo_ket_); else { - pt = make_full_t_intermediate(tau).copy(); + pt = copy(make_full_t_intermediate(tau)); } MADNESS_ASSERT(pt.size() == mo_ket_.size()); std::vector> functions; @@ -302,6 +397,8 @@ CCPotentials::make_pair_ex(const real_function_6d& u, const CC_vecfunction& tau, MADNESS_ASSERT(functions.size() == 7); } else MADNESS_ASSERT(functions.size() == 2); + functions=consolidate(functions); + MADNESS_ASSERT(functions.size() == 3); MADNESS_ASSERT(x.omega != 0.0); const double bsh_eps = get_epsilon(i, j) + x.omega; @@ -310,26 +407,31 @@ CCPotentials::make_pair_ex(const real_function_6d& u, const CC_vecfunction& tau, } double -CCPotentials::compute_pair_correlation_energy(const CCPair& u, const CC_vecfunction& singles) const { +CCPotentials::compute_pair_correlation_energy(World& world, const Info& info, + const CCPair& u, const CC_vecfunction& singles) { + CCTimer timer(world, "Compute Correlation Energy"); MADNESS_ASSERT(u.type == GROUND_STATE); if (singles.functions.empty()) MADNESS_ASSERT(u.ctype == CT_MP2); - const bool print_details=(world.rank()==0 and parameters.debug()); - if (parameters.debug()) output("Compute pair-correlation energy of pair " + u.name()); + const bool print_details=(world.rank()==0 and info.parameters.debug()); double result = 0.0; - const CCFunction& mobi = mo_bra_(u.i); - const CCFunction& mobj = mo_bra_(u.j); + const CCFunction& mobi = info.mo_bra[u.i]; + const CCFunction& mobj = info.mo_bra[u.j]; const bool symmetric = (u.i == u.j); + auto g12=CCConvolutionOperatorPtr(world,OpType::OT_G12,info.parameters); + CCPairFunction ij(mobi.f(),mobj.f()); + CCPairFunction ji(mobj.f(),mobi.f()); for (size_t mm = 0; mm < u.functions.size(); mm++) { double tmp = 0.0; - const double part1 = make_xy_op_u(mobi, mobj, *g12, u.functions[mm]); + // const double part1 = make_xy_op_u(mobi, mobj, *g12, u.functions[mm]); + const double part1 = inner(ij,g12*u.functions[mm]); if (symmetric) tmp = part1; - else //if(world.rank()==0) std::cout << std::fixed << std::setprecision(10) << part1 << "\n"; - { - const double part2 = make_xy_op_u(mobj, mobi, *g12, u.functions[mm]); + else { + // const double part2 = make_xy_op_u(mobj, mobi, *g12, u.functions[mm]); + const double part2 = inner(ji,g12*u.functions[mm]); tmp = 2.0 * (2.0 * part1 - part2); // non symmetric pairs -> offdiagonal -> count twice } result += tmp; @@ -349,20 +451,21 @@ CCPotentials::compute_pair_correlation_energy(const CCPair& u, const CC_vecfunct } // if (world.rank() == 0) std::cout << "------------\n" << std::fixed << std::setprecision(10) << result << "\n\n"; - timer.info(parameters.debug()); + timer.info(info.parameters.debug()); return result; } double -CCPotentials::compute_cc2_correlation_energy(const CC_vecfunction& singles, const Pairs& doubles) const { +CCPotentials::compute_cc2_correlation_energy(World& world, const CC_vecfunction& singles, const Pairs& doubles, const Info& info) +{ MADNESS_ASSERT(singles.type == PARTICLE); CCTimer time(world, "Computing CC2 Correlation Energy"); - output.section("Computing CC2 Correlation Energy"); + // output.section("Computing CC2 Correlation Energy"); double result = 0.0; for (const auto& tmp : doubles.allpairs) { const size_t i = tmp.second.i; const size_t j = tmp.second.j; - const double omega = compute_pair_correlation_energy(tmp.second, singles); + const double omega = compute_pair_correlation_energy(world, info, tmp.second, singles); result += omega; if (world.rank() == 0) std::cout << std::fixed << "omega " << i << j << " =" << std::setprecision(10) << omega << "\n"; @@ -374,7 +477,8 @@ CCPotentials::compute_cc2_correlation_energy(const CC_vecfunction& singles, cons } double -CCPotentials::compute_kinetic_energy(const vector_real_function_3d& xbra, const vector_real_function_3d& xket) const { +CCPotentials::compute_kinetic_energy(World& world, const vector_real_function_3d& xbra, const vector_real_function_3d& xket) +{ Kinetic T(world); double kinetic = 0.0; for (size_t k = 0; k < xket.size(); k++) @@ -382,16 +486,23 @@ CCPotentials::compute_kinetic_energy(const vector_real_function_3d& xbra, const return kinetic; } + double -CCPotentials::compute_cis_expectation_value(const CC_vecfunction& x, const vector_real_function_3d& V, - const bool print) const { - const vector_real_function_3d xbra = make_bra(x); +CCPotentials::compute_cis_expectation_value(World& world, const CC_vecfunction& x, + const vector_real_function_3d& V, const bool print, const Info& info) +{ + // following eq. (34) of the CIS paper Kottmann et al, PCCP, 17, 31453, (2015) + // doi: https://doi.org/10.1039/C5CP00345H + // the expectation value of the CIS wave function is computed by projecting the + // CIS wave function onto eq. (22) + // the potential V must contain the coupling term when using localized orbitals + const vector_real_function_3d xbra = info.R_square*(x.get_vecfunction()); const vector_real_function_3d xket = x.get_vecfunction(); - const double kinetic = compute_kinetic_energy(xbra, xket); + const double kinetic = compute_kinetic_energy(world, xbra, xket); const double norm = sqrt(inner(world, xbra, xket).sum()); double eps = 0.0; for (size_t k = 0; k < xket.size(); k++) - eps -= get_orbital_energies()[k + parameters.freeze()] * xbra[k].inner(xket[k]); + eps -= info.orbital_energies[k + info.parameters.freeze()] * xbra[k].inner(xket[k]); double potential = inner(world, xbra, V).sum(); const double result = 1.0 / (norm * norm) * (potential + kinetic + eps); if (world.rank() == 0 && print) { @@ -406,16 +517,22 @@ CCPotentials::compute_cis_expectation_value(const CC_vecfunction& x, const vecto } double -CCPotentials::compute_excited_pair_energy(const CCPair& d, const CC_vecfunction& x) const { - const CC_vecfunction xbra(make_bra(x), RESPONSE, parameters.freeze()); +CCPotentials::compute_excited_pair_energy(World& world, const CCPair& d, const CC_vecfunction& x, const Info& info) { + // const CC_vecfunction xbra(make_bra(x), RESPONSE, info.parameters.freeze()); + // for (const auto& f: d.functions) f.print_size("doubles functions in ex pair energy"); + MADNESS_CHECK_THROW(x.type == RESPONSE, "x must be of type RESPONSE"); + MADNESS_CHECK_THROW(x.size()==info.get_active_mo_bra().size(), "x must have the same size as the active space"); + const CC_vecfunction xbra(info.R_square*x.get_vecfunction(), RESPONSE, info.parameters.freeze()); const CCFunction& xbi = xbra(d.i); - const CCFunction& mobj = mo_bra_(d.j); + const CCFunction& mobj = info.mo_bra[d.j]; + auto g12=CCConvolutionOperatorPtr(world,OT_G12,info.parameters); double result = 0.0; double s2b = 2.0 * make_xy_op_u(xbi, mobj, *g12, d.functions) - make_xy_op_u(mobj, xbi, *g12, d.functions); double s2c = 0.0; for (const auto& ktmp : x.functions) { const size_t k = ktmp.first; - const real_function_3d j_igk = (*g12)(mo_bra_(d.i), mo_ket_(k)) * mo_bra_(d.j).function; + // const real_function_3d j_igk = (*g12)(info.mo_bra[d.i], info.mo_ket[k]) * info.mo_bra[d.j].function; + const real_function_3d j_igk = (*g12)(info.mo_bra[d.i]* info.mo_ket[k]) * info.mo_bra[d.j]; s2c -= 2.0 * make_xy_u(xbra(k), j_igk, d.functions) - make_xy_u(j_igk, xbra(k), d.functions); } result = s2b + s2c; @@ -483,17 +600,17 @@ CCPotentials::compute_cc2_excitation_energy(const CC_vecfunction& stau, const CC truncate(world, tmp); CC_vecfunction xbra(tmp, RESPONSE, parameters.freeze()); const double xbrax = inner(world, xbra.get_vecfunction(), sx.get_vecfunction()).sum(); - double result = potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s3a_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s3b_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s3c_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s5b_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s5c_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s6_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s2b_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s2c_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s4a_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s4b_); - result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s4c_); + double result = potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s3a_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s3b_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s3c_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s5b_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s5c_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s6_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s2b_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s2c_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s4a_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s4b_); + result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s4c_); return 1.0 / xbrax * result; } @@ -661,7 +778,7 @@ CCPotentials::fock_residue_6d_macrotask(World& world, const CCPair& u, const CCP double tight_thresh = parameters.thresh_6D(); real_function_6d x = CompositeFactory(world).ket(copy(Du)).V_for_particle2( copy(U1_axis)).thresh(tight_thresh).special_points(sp6d); - x.fill_nuclear_cuspy_tree(op_mod, 2); + x.fill_nuclear_cuspy_tree(op_mod, 2); if (parameters.debug()) x.print_size("Un_axis_" + stringify(axis)); Un2 += x; } @@ -679,6 +796,180 @@ CCPotentials::fock_residue_6d_macrotask(World& world, const CCPair& u, const CCP return vphi; } +/// the constant part is the contribution to the doubles that are independent of the doubles + +/// CC-equations from Kottmann et al., JCTC 13, 5956 (2017) +/// MP2: +/// cp = G Q g~ |ij> +/// g~ = Ue - KffK +/// GS-CC2: eqs. (6,7) +/// cp = G Qt g~ |t_i t_j> +/// g~ = Ue - KffK - Fock_commutator - reduced_Fock +/// LRCC2: eqs. (24-29) +/// cp = G d(Qt g~ d|t_i t_j>) +/// = G (Qt g~ d|t_i t_j> + Qt dg~ |t_i t_j> + dQt g~ |t_i t_j>) +madness::real_function_6d +CCPotentials::make_constant_part_macrotask(World& world, const CCPair& pair, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info) { + const CalcType targetstate=pair.ctype; + const auto& parameters=info.parameters; + std::string msg="compute constant part of pair "+std::to_string(pair.i) + " " + std::to_string(pair.j); + print_header3(msg); + timer t1(world); + // construct the projectors + // Q12 = (1-|i> Q12(world); + Q12.set_spaces(info.mo_bra,info.mo_ket,info.mo_bra,info.mo_ket); + + // Q12t = (1-|t_i> Q12t(world); + + // t1-transformed orbitals + CC_vecfunction t(MIXED); + if (targetstate==CT_CC2 or targetstate==CT_LRCC2) { + t=CCPotentials::make_full_t_intermediate(gs_singles,info); + Q12t.set_spaces(info.mo_bra,t.get_vecfunction(),info.mo_bra,t.get_vecfunction()); + } + + + // dQ12t = -(Qt(1) Ox(2) + Ox(1) Qt(2)) eq. (22) + QProjector Qt; + Projector Ox; + if (targetstate==CT_LRCC2) { + Qt.set_spaces(info.mo_bra,t.get_vecfunction()); + Ox.set_spaces(info.get_active_mo_bra(),ex_singles.get_vecfunction()); + } + auto dQt_1 = outer(Qt,Ox); + auto dQt_2 = outer(Ox,Qt); + + std::size_t i=pair.i; + std::size_t j=pair.j; + auto phi = [&](size_t i) { return CCFunction(info.mo_ket[i],i,HOLE); }; + // auto t = [&](size_t i) { return CCFunction(info.mo_ket[i]+gs_singles(i).function); }; + auto x = [&](size_t i) { return ex_singles(i); }; + + // save memory: + // split application of the BSH operator into high-rank, local part U|ij>, and + // low-rank, delocalized part (-O1 -O2 +O1O2) U|ij> by splitting the SO operator + auto apply_in_separated_form = [](const StrongOrthogonalityProjector& Q, + const std::vector>& ccp) { + + std::vector> result; + for (const auto& cc : ccp) { + if (cc.is_pure()) { + auto [left,right]=Q.get_vectors_for_outer_product(cc.get_function()); + result.push_back(cc); + result.push_back(CCPairFunction(left,right)); + } else if (cc.is_decomposed()) { + result.push_back(Q(cc)); + } + } + return result; + }; + + auto GG = BSHOperator<6>(world, sqrt(-2.0 * pair.bsh_eps), parameters.lo(), parameters.thresh_bsh_6D()); + GG.destructive() = true; + GG.print_timings=false; + auto apply_G_and_print = [&](const std::vector>& cc, std::string name) { + std::vector> tmp1; + print("cc in apply_G_and_print:",name,cc.size()); + for (const auto& tt : cc) { + print(tt.name()); + tt.print_size(); + } + for (const auto& tt : cc) tmp1 += GG(copy(tt)); + print("tmp1 after apply G"); + for (const auto& tt : tmp1) { + print(tt.name()); + tt.print_size(); + } + tmp1=consolidate(tmp1); + tmp1=-2.0*tmp1; + MADNESS_CHECK(tmp1.size()==1); + tmp1[0].get_function().print_size(name); + }; + + // compute all 6d potentials without applying the SO projector + std::vector> V; + if (targetstate==CT_MP2) { + std::vector argument={"Ue","KffK"}; + auto Vreg=apply_Vreg(world,phi(i),phi(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + V=consolidate(apply_in_separated_form(Q12,Vreg)); + } else if (targetstate==CT_CC2) { // Eq. (42) of Kottmann, JCTC 13, 5945 (2017) + std::vector argument={"Ue","KffK","comm_F_Qt_f12","reduced_Fock"}; + auto Vreg=apply_Vreg(world,t(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + V=consolidate(Q12t(Vreg)); + } else if (targetstate==CT_LRCC2) { + // Eq. (25) of Kottmann, JCTC 13, 5956 (2017) + // eq. (25) Q12t (g~ - omega f12) (|x_i t_j> + |t_i x_j> ) + // note the term omega f12 is included in the reduced_Fock term, see eq. (34) + if (1) + { + print_header3("Q12t g~ |x_i t_j + t_i x_j>"); + std::vector argument={"Ue","KffK","comm_F_Qt_f12","reduced_Fock"}; + auto Vreg=apply_Vreg(world,x(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + Vreg+=apply_Vreg(world,t(i),x(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + V=consolidate(apply_in_separated_form(Q12t,Vreg)); + // apply_G_and_print(V,"functional response"); + } + + if (0) { + print_header3("[F12,Qt] f12 |x_i t_j + t_i x_j>"); + std::vector argument={"comm_F_Qt_f12"}; + auto Vreg=apply_Vreg(world,x(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + Vreg+=apply_Vreg(world,t(i),x(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + // auto Q12V=Q12t(Vreg); + // apply_G_and_print(Q12V,"commutator response in old terminology: Q12V direct"); + } + + // eq. (29) first term: dQt g~ |t_i t_j> + if (1) { + print_header3("dQt g~ |t_i t_j> "); + const std::vector argument={"Ue","KffK","comm_F_Qt_f12","reduced_Fock"}; + // const std::vector argument={"Ue","KffK","reduced_Fock"}; + auto Vreg1=apply_Vreg(world,t(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + + auto tmp=consolidate(dQt_1(Vreg1) + dQt_2(Vreg1)); + V-=tmp; + + // MADNESS_CHECK_THROW(tmp.size()==1,"tmp size is incorrect"); + // for (auto& t : tmp) t.print_size("dQt g~ |t_i t_j>"); + // apply_G_and_print(tmp,"projector response"); + } + + + // eq. (29) second term = eq. (31): [F12, dQt] f12 |t_i t_j> + omega dQ12t f12 |t_i t_j> + if (1) { + print_header3("[F12, dQt] f12 |t_i t_j>"); + const std::vector argument={"comm_F_dQt_f12"}; + auto tmp=apply_Vreg(world,t(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps); + tmp=consolidate(tmp); + V+=tmp; + // apply_G_and_print(tmp,"commutator projector response"); + } + } + + V=consolidate(V); + MADNESS_CHECK(V.size()==2); // term 1: 6d, hi-rank, local; term 2: 3d, low-rank, delocalized + t1.end("finished computing potential for constant part"); + + // the Green's function + auto G = BSHOperator<6>(world, sqrt(-2.0 * pair.bsh_eps), parameters.lo(), parameters.thresh_bsh_6D()); + G.destructive() = true; + + real_function_6d GV=real_factory_6d(world).empty(); + for (const auto& vv : V) GV+= (G(vv)).get_function(); // note V is destroyed here + GV=-2.0*Q12(GV).truncate().reduce_rank(); + + GV.print_size("GVreg"); + t1.end("finished applying G on potential for constant part"); + return GV; +} + + + + madness::real_function_6d CCPotentials::make_constant_part_mp2_macrotask(World& world, const CCPair& pair, const std::vector& mo_ket, @@ -719,13 +1010,13 @@ CCPotentials::make_constant_part_mp2_macrotask(World& world, const CCPair& pair, StrongOrthogonalityProjector Q(world); Q.set_spaces(mo_bra, mo_ket, mo_bra, mo_ket); -// V = Q(V); -// -// V.print_size("QVreg"); + // V = Q(V); + // + // V.print_size("QVreg"); real_convolution_6d G = BSHOperator<6>(world, sqrt(-2.0 * epsilon), parameters.lo(), parameters.thresh_bsh_6D()); G.destructive() = true; -// real_function_6d GV = -2.0 * G(V); + // real_function_6d GV = -2.0 * G(V); // save memory: // split application of the BSH operator into high-rank, local part U|ij>, and @@ -783,6 +1074,7 @@ CCPotentials::update_pair_mp2_macrotask(World& world, const CCPair& pair, const CCTimer timer_G(world, "Apply Greens Operator on MP2-Potential of pair " + pair.name()); const real_function_6d GVmp2 = G(mp2_potential); + if (parameters.debug()) GVmp2.print_size("GVmp2"); timer_G.info(true, GVmp2.norm2()); //CCTimer timer_addup(world, "Add constant parts and update pair " + pair.name()); @@ -793,7 +1085,7 @@ CCPotentials::update_pair_mp2_macrotask(World& world, const CCPair& pair, const Q.set_spaces(mo_bra, mo_ket, mo_bra, mo_ket); unew = Q(unew); - if (parameters.debug())unew.print_size("truncated-unew"); + if (parameters.debug())unew.print_size("Q12(unew)"); timer_mp2.info(); real_function_6d residue = (pair.function() - unew); @@ -801,9 +1093,111 @@ CCPotentials::update_pair_mp2_macrotask(World& world, const CCPair& pair, const residue.truncate(FunctionDefaults<6>::get_thresh()*0.1); if (parameters.debug()) residue.print_size("bsh residual, truncated"); - return residue; + // return residue; + return unew; } + +CCPair CCPotentials::iterate_pair_macrotask(World& world, + const CCPair& pair, + const CC_vecfunction& gs_singles, + const CC_vecfunction& ex_singles, + const real_function_6d& coupling, + const Info& info, + const long maxiter) { + if (world.rank()==0) print_header2("Iterate Pair " + pair.name()); + if (pair.ctype == CT_CC2) MADNESS_ASSERT(gs_singles.type == PARTICLE); + if (pair.ctype == CT_CISPD) MADNESS_ASSERT(ex_singles.type == RESPONSE); + if (pair.ctype == CT_MP2) MADNESS_ASSERT(gs_singles.get_vecfunction().empty()); + if (pair.ctype == CT_MP2) MADNESS_ASSERT(ex_singles.get_vecfunction().empty()); + if (pair.ctype == CT_ADC2)MADNESS_ASSERT(ex_singles.type == RESPONSE); + + real_function_6d constant_part = pair.constant_part; + constant_part.truncate().reduce_rank(); + pair.function().truncate().reduce_rank(); + + StrongOrthogonalityProjector Q12(world); + Q12.set_spaces(info.mo_bra,info.mo_ket,info.mo_bra,info.mo_ket); + + double bsh_eps = pair.bsh_eps; //CCOPS.get_epsilon(pair.i,pair.j)+omega; + real_convolution_6d G = BSHOperator<6>(world, sqrt(-2.0 * bsh_eps), info.parameters.lo(), info.parameters.thresh_bsh_6D()); + G.destructive() = true; + + NonlinearSolverND<6> solver(info.parameters.kain_subspace()); + solver.do_print = (world.rank() == 0); + + CCPair result=pair; + + // only the u-part of omega + double omega_partial=0.0; + if (result.ctype == CT_MP2) omega_partial = CCPotentials::compute_pair_correlation_energy(world, info, result); + else if (result.type == EXCITED_STATE) omega_partial = CCPotentials::compute_excited_pair_energy(world, result, ex_singles, info); + + for (size_t iter = 0; iter < maxiter; iter++) { + if (world.rank()==0) print_header3(assign_name(result.ctype) + "-Microiteration"); + CCTimer timer_mp2(world, "MP2-Microiteration of pair " + result.name()); + + + CCTimer timer_mp2_potential(world, "MP2-Potential of pair " + result.name()); + // real_function_6d mp2_potential = -2.0 * CCOPS.fock_residue_6d(result); + real_function_6d mp2_potential = -2.0 * fock_residue_6d_macrotask(world,result,info.parameters, + info.molecular_coordinates,info.mo_ket,info.mo_bra, + info.U1,info.U2); + mp2_potential += 2.0 * coupling; + + if (info.parameters.debug()) mp2_potential.print_size(assign_name(result.ctype) + " Potential"); + mp2_potential.truncate().reduce_rank(); + timer_mp2_potential.info(true, mp2_potential.norm2()); + + CCTimer timer_G(world, "Apply Greens Operator on MP2-Potential of pair " + result.name()); + const real_function_6d GVmp2 = G(mp2_potential); + if (info.parameters.debug()) GVmp2.print_size("GVmp2"); + timer_G.info(true, GVmp2.norm2()); + + CCTimer timer_addup(world, "Add constant parts and update pair " + result.name()); + real_function_6d unew = Q12(GVmp2 + constant_part); + if (info.parameters.debug()) unew.print_size("Q12(unew)"); + + const real_function_6d residual = result.function() - unew; + double rmsresidual=residual.norm2(); + + if (info.parameters.kain()) { + + real_function_6d kain_update = copy(solver.update(result.function(), residual)); + // kain_update = CCOPS.apply_Q12t(kain_update, CCOPS.mo_ket()); + kain_update = Q12(kain_update); + if (info.parameters.debug()) kain_update.print_size("Kain-Update-Function"); + result.update_u(copy(kain_update)); + } else { + result.update_u(unew); + } + + timer_addup.info(true, result.function().norm2()); + + double omega_new = 0.0; + if (result.ctype == CT_MP2) omega_new = CCPotentials::compute_pair_correlation_energy(world, info, result); + else if (result.type == EXCITED_STATE) omega_new = CCPotentials::compute_excited_pair_energy(world, result, ex_singles, info); + double delta = omega_partial - omega_new; + omega_partial = omega_new; + + if (world.rank()==0) + print_convergence(pair.name(),rmsresidual,rmsresidual,delta,iter); + + // output("\n--Iteration " + stringify(iter) + " ended--"); + // save(result.function(), result.name()); + // timer_mp2.info(); + bool converged=(rmsresidual < info.parameters.dconv_6D()) and (fabs(delta) < info.parameters.econv_pairs()); + if (converged) { + if (world.rank()==0) print("Iteration converged after",iter,"iterations"); + break; + } else { + if (world.rank()==0) print("Iteration not converged after",iter,"iterations"); + } + } + return result; +} + + madness::real_function_6d CCPotentials::make_constant_part_cc2_gs(const CCPair& u, const CC_vecfunction& tau, const real_convolution_6d *Gscreen) const { @@ -904,6 +1298,8 @@ CCPotentials::make_constant_part_cc2_Qt_gs(const CCPair& u, const CC_vecfunction real_convolution_6d G = BSHOperator<6>(world, sqrt(-2.0 * get_epsilon(ti.i, tj.i)), parameters.lo(), parameters.thresh_bsh_6D()); G.destructive() = true; + G.particle_=1; + // G.particle_=-1; // calculate [F,Qt] commutator which is [F1,Q1t]Q2t + Q1t [F2,Q2t] // and [F1,Q1t] = - [F1,O1t] = - (F-e_k) |tk> @@ -1294,12 +1690,14 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction CCTimer time_cpr(world, "Commutator-Projector Response"); // Make functional response part: G(QtVreg|xitj + tixj>) real_function_6d functional_response; - { + if (1) { time_fr.start(); const real_function_6d Vxt = (apply_Vreg(xi, tj, Gscreen)).truncate().reduce_rank(); if (symmetric) { real_function_6d V = apply_Q12t(Vxt, t); + V.print_size("Q12tVreg"); const real_function_6d tmp = -2.0 * G(V); + tmp.print_size("G(Q12tVreg)"); functional_response = tmp + swap_particles(tmp); } else { const real_function_6d Vtx = apply_Vreg(ti, xj, Gscreen); @@ -1308,22 +1706,28 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction functional_response = -2.0 * G(V); } time_fr.stop(); - } // make Projector Response: -G(OxQt+QtOx)Vreg|titj> + } + functional_response.print_size("G functional response"); + + // make Projector Response: -G(OxQt+QtOx)Vreg|titj> real_function_6d projector_response; - { + if (1) { time_pr.start(); // here is an inconsistency: The Vreg potential will apply (F12-eij) to the |titj> state but we have here (F12-eij-omega) // in the future this part here is supposed to be entirely 3D and not use the 6D apply_Vreg function, so right now this is a workaround // however, we have to add the missing -omega|titj> real_function_6d Vtt_tmp = apply_Vreg(ti, tj, Gscreen); real_function_6d titj = make_f_xy(ti, tj); - Vtt_tmp = Vtt_tmp - x.omega * titj; + print("skipping omega term 1"); + // Vtt_tmp = Vtt_tmp - x.omega * titj; CCPairFunction Vtt(Vtt_tmp); real_function_6d tmp1; real_function_6d tmp2; { CCPairFunction Ox = apply_Ot(Vtt, x, 1); CCPairFunction OxQt = apply_Qt(Ox, t, 2); + OxQt.convert_to_pure_no_op_inplace(); + OxQt.get_function().print_size("Q12t_FQtQtF_f12"); tmp1 = -2.0 * apply_G(OxQt, G); } if (symmetric) tmp2 = swap_particles(tmp1); @@ -1335,9 +1739,12 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction projector_response = tmp1 + tmp2; time_pr.stop(); } + projector_response.print_size("G projector response"); + // make commutator response: [F12,Qt12]f12|xitj+tixj> = (O1VQ2t + Q1tO2V)f12|xitj+tixj> real_function_6d commutator_response; { + print_header3("[F12,Qt12]f12|xitj+tixj> = (Ov Qt + Qt Ov) f12 |xitj+tixj>"); time_cr.start(); real_function_6d part1; // the xt parts const vector_real_function_3d Vtmp = get_potentials(tau, POT_singles_); @@ -1367,6 +1774,8 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction commutator_response = part1 + part2; time_cr.stop(); } + commutator_response.print_size("G commutator response"); + // make Commutator Projector response Response: [F,d/dtau(Qt)] part of d/dtau{([F,Qt])f12|xitj + tixj>} // {-O1x[F,Q2t] - Q1t[F,O2x] - [F,O1x]Q2t - [F,Q1t]O2x , used d/dtau(Qt) = -Ox // O1x[F,O2t] - Q1t[F,O2x] - [F,O1x]Q2t + [F,O1t]O2x , used [F,Qt] = -[F,Ot] @@ -1377,10 +1786,14 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction // }f12|titj> real_function_6d commutator_projector_response; { + print_header3("[F12,dQt] f12 |t_i t_j> = (Ox OVt + Qt OVx) f12 |t_i t_j>"); time_cpr.start(); - const vector_real_function_3d Vxtmp = sub(world, get_potentials(x, POT_singles_), - x.omega * x.get_vecfunction()); + print("skipping omega term 2"); + // const vector_real_function_3d Vxtmp = sub(world, get_potentials(x, POT_singles_), + // x.omega * x.get_vecfunction()); + const vector_real_function_3d Vxtmp = get_potentials(x, POT_singles_); const vector_real_function_3d Vttmp = get_potentials(tau, POT_singles_); + const CC_vecfunction Vx(Vxtmp, UNDEFINED, parameters.freeze()); const CC_vecfunction Vt(Vttmp, UNDEFINED, parameters.freeze()); CCPairFunction ftt(f12, ti, tj); @@ -1408,9 +1821,13 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction commutator_projector_response = tmp1 + tmp2; time_cpr.stop(); } + commutator_projector_response.print_size("G commutator projector response"); + print_header3("add all up"); real_function_6d result = functional_response - projector_response + commutator_response + commutator_projector_response; + result.print_size("result"); result = apply_Q12t(result, mo_ket_); + result.print_size("Q12t result"); output.section("Constant Term for Pair " + u.name() + " ended"); time_fr.info(true, functional_response.norm2()); time_pr.info(true, projector_response.norm2()); @@ -1425,7 +1842,7 @@ CCPotentials::apply_Vreg(const CCFunction& ti, const CCFunction"); CCTimer timer(world, "Vreg|" + ti.name() + tj.name() + ">"); CCTimer time_f(world, "F-Part"); - const real_function_6d F_part = apply_reduced_F(ti, tj, Gscreen); + const real_function_6d F_part = apply_reduced_F1(ti, tj, Gscreen); time_f.stop(); CCTimer time_u(world, "U-Part"); const real_function_6d U_part = apply_transformed_Ue(ti, tj, Gscreen); @@ -1449,6 +1866,69 @@ CCPotentials::apply_Vreg(const CCFunction& ti, const CCFunction \f$ +/// - Ue = [T,f12] +/// - [K,f12] +/// - [F12,Q12t] f12 or [F12,dQ12t] f12 +/// - f12 (F - e_ij - omega) or f12 (F - e_ij) +/// the last terms are computed using the converged singles potential, i.e. we assume that the following equation holds +/// (see Kottmann et al., JCTC 13, 5945 (2017) eqs (30), (31), (44) +/// (see Kottmann et al., JCTC 13, 5956 (2017) eqs (17), (19), (32) +/// CC2: (F - e_i ) |t_i t_j> = | Vtau > +/// LRCC2: (F - e_i - omega) |x_i> = | Vx > +/// @param[in] ti first function in the ket, for MP2 it is the Orbital, for CC2 the relaxed Orbital t_i=\phi_i + \tau_i +/// @param[in] tj second function in the ket ... +/// @param[in] gs_singles the converged ground state singles: with (F - e_i ) |t_i t_j> = | Vtau > +/// @param[in] ex_singles the converged excited state singles: with (F - e_i - omega) |x_i> = | Vx > +/// @param[in] info Info structure holding the applied singles potentials Vtau and Vx and reference orbitals +/// @param[out] the regularization potential (unprojected), see equation above +std::vector> + CCPotentials::apply_Vreg(World& world, const CCFunction& ti, const CCFunction& tj, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info, const std::vector& argument, const double bsh_eps) { + + const auto parameters=info.parameters; + if (parameters.debug() and (world.rank()==0)) { + print("computing the following terms in constant_part for pair: (",ti.name(),",", tj.name(),"):" , argument); + } + + real_convolution_6d Gscreen = BSHOperator<6>(world, sqrt(-2.0 * bsh_eps), + parameters.lo(), parameters.thresh_bsh_6D()); + Gscreen.modified() = true; + + auto exists=[&](const std::string term) { + return std::find(argument.begin(), argument.end(), term) != argument.end(); + }; + + // calculate the regularized potential + real_function_6d V=real_factory_6d(world); + std::vector> V_lowrank; + if (exists("Ue")) V += apply_Ue(world,ti,tj,info,&Gscreen); + if (exists("KffK")) V -= apply_KffK(world,ti,tj,info,&Gscreen); + if (exists("reduced_Fock")) V += apply_reduced_F(world,ti,tj,info,&Gscreen); + if (exists("comm_F_Qt_f12")) { + V_lowrank += apply_commutator_F_Qt_f12(world,ti,tj,gs_singles,ex_singles,info,&Gscreen); + } + if (exists("comm_F_dQt_f12")) { + V_lowrank += apply_commutator_F_dQt_f12(world,ti,tj,gs_singles,ex_singles,info,&Gscreen); + } + V.truncate().reduce_rank(); + if (parameters.debug()) { + V.print_size("Vreg -- pure component"); + print("V_lowrank.size()",V_lowrank.size()); + } + + std::vector> result; + if (V.tree_size()>0) result+=CCPairFunction(V); + result+=V_lowrank; + return result; + +} + madness::real_function_6d CCPotentials::apply_Vreg_macrotask(World& world, const std::vector& mo_ket, const std::vector& mo_bra, @@ -1509,7 +1989,7 @@ CCPotentials::apply_Vreg_macrotask(World& world, const std::vector& ti, const CCFunction& tj, const real_convolution_6d *Gscreen) const { +CCPotentials::apply_reduced_F1(const CCFunction& ti, const CCFunction& tj, const real_convolution_6d *Gscreen) const { //CC_Timer time(world,"(F-eij)|"+ti.name()+tj.name()+">"); // get singles potential const bool symmetric = (ti.type == tj.type && ti.i == tj.i); @@ -1526,6 +2006,26 @@ CCPotentials::apply_reduced_F(const CCFunction& ti, const CCFunction +/// f12 (F12 - e_ij - omega) |ti xj> +madness::real_function_6d +CCPotentials::apply_reduced_F(World& world, const CCFunction& ti, const CCFunction& tj, + const Info& info, const real_convolution_6d *Gscreen) { + //CC_Timer time(world,"(F-eij)|"+ti.name()+tj.name()+">"); + // get singles potential + const bool symmetric = (ti == tj); + const real_function_3d Vti = info.intermediate_potentials(ti, POT_singles_); + const real_function_3d Vtj = info.intermediate_potentials(tj, POT_singles_); + const real_function_6d Vt = make_f_xy(world, Vti, tj, info, Gscreen); + real_function_6d tV; + if (symmetric) tV = madness::swap_particles(Vt); + else tV = make_f_xy(world, ti, Vtj, info, Gscreen); + + const real_function_6d result = -1.0 * (Vt + tV); + return result; +} madness::real_function_6d CCPotentials::apply_transformed_Ue(const CCFunction& x, const CCFunction& y, const real_convolution_6d *Gscreen) const { @@ -1610,6 +2110,101 @@ CCPotentials::apply_transformed_Ue(const CCFunction& x, const CCFuncti } +madness::real_function_6d +CCPotentials::apply_Ue(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const Info& info, const real_convolution_6d *Gscreen) { + + const std::string x_name = phi_i.name(); + const std::string y_name = phi_j.name(); + const auto& parameters=info.parameters; + + if (parameters.debug()) print("Computing Ue|" + x_name + y_name + ">"); + + real_function_3d x_function=phi_i.function; + real_function_3d y_function=phi_j.function; + CorrelationFactor corrfac(world, parameters.gamma(), 1.e-7, parameters.lo()); + + const bool symmetric = (phi_i.type == phi_j.type && phi_i.i == phi_j.i); + CCTimer time_Ue(world, "Ue|" + x_name + y_name + ">"); + double tight_thresh = parameters.thresh_6D(); // right now this is the std. thresh + // check if screening operator is in modified NS Form + if (Gscreen != NULL) { + if (!Gscreen->modified()) error("Demanded Screening for Ue but given BSH Operator is not in modified NS form"); + } + if (parameters.debug()) print("Applying transformed Ue to \n" + x_name + y_name); + + if (parameters.debug() && symmetric) print("Exploiting Pair Symmetry\n"); + + real_function_6d Uxy = real_factory_6d(world); + Uxy.set_thresh(tight_thresh); + // Apply the untransformed U Potential + Uxy = corrfac.apply_U(x_function, y_function, *Gscreen, symmetric); + Uxy.set_thresh(tight_thresh); + // Apply the double commutator R^{-1}[[T,f,R] + for (size_t axis = 0; axis < 3; axis++) { + // Make the local parts of the Nuclear and electronic U potentials + const real_function_3d Un_local = info.U1[axis]; + const real_function_3d Un_local_x = (Un_local * x_function).truncate(); + real_function_3d Un_local_y; + if (symmetric) Un_local_y = copy(Un_local_x); + else Un_local_y = (Un_local * y_function).truncate(); + + const real_function_6d Ue_local = corrfac.U1(axis); + // Now add the Un_local_x part to the first particle of the Ue_local potential + real_function_6d UeUnx = CompositeFactory(world).g12(Ue_local).particle1(Un_local_x).particle2( + copy(y_function)).thresh(tight_thresh); + // Fill the Tree where it will be necessary + UeUnx.fill_cuspy_tree(*Gscreen); + // Set back the thresh + UeUnx.set_thresh(FunctionDefaults<6>::get_thresh()); +// print_size(UeUnx, "UeUnx", parameters.debug()); + // Now add the Un_local_y part to the second particle of the Ue_local potential + real_function_6d UeUny; + if (symmetric) UeUny = -1.0 * madness::swap_particles(UeUnx); // Ue_local is antisymmetric + else { + UeUny = CompositeFactory(world).g12(Ue_local).particle1(copy(x_function)).particle2( + Un_local_y).thresh(tight_thresh); + // Fill the Tree were it will be necessary + UeUny.fill_cuspy_tree(*Gscreen); + // Set back the thresh + UeUny.set_thresh(FunctionDefaults<6>::get_thresh()); + } +// print_size(UeUny, "UeUny", parameters.debug()); + // Construct the double commutator part and add it to the Ue part + real_function_6d diff = (UeUnx - UeUny).scale(-1.0); + diff.truncate(); + Uxy = (Uxy + diff).truncate(); + } + if (parameters.debug()) time_Ue.info(); + + // sanity check: = - = 0 + CCTimer time_sane(world, "Ue-Sanity-Check"); + real_function_6d tmp = CompositeFactory(world).particle1( + copy(x_function * info.R_square)).particle2(copy(y_function * info.R_square)); + const double a = inner(Uxy, tmp); + const real_function_3d xx = (x_function * x_function * info.R_square); + const real_function_3d yy = (y_function * y_function * info.R_square); +// const real_function_3d gxx = g12(xx); + real_convolution_3d poisson= CoulombOperator(world,parameters.lo(),parameters.thresh_3D()); + const real_function_3d gxx= poisson(xx); + + const double aa = inner(yy, gxx); + const double error = std::fabs(a - aa); + const double diff = a - aa; + time_sane.info(parameters.debug(), error); + if (world.rank() == 0) { + std::cout << std::fixed << std::setprecision(10) << "<" << x_name + y_name << "|U_R|" << x_name + y_name + << "> =" << a << ", <" << x_name + y_name << "|g12|" << x_name + y_name + << "> =" << aa << ", diff=" << error << "\n"; + //printf(" %12.8f\n",a); + //printf(" %12.8f\n",aa); + if (error > FunctionDefaults<6>::get_thresh() * 10.0) std::cout << ("Ue Potential plain wrong!\n"); + else if (error > FunctionDefaults<6>::get_thresh()) std::cout << ("Ue Potential wrong!!!!\n"); + else std::cout << ("Ue seems to be sane, diff=" + std::to_string(diff)) << std::endl; + } + return Uxy; +} + madness::real_function_6d CCPotentials::apply_transformed_Ue_macrotask(World& world, const std::vector& mo_ket, const CCParameters& parameters, const real_function_3d& Rsquare, @@ -1705,6 +2300,161 @@ CCPotentials::apply_transformed_Ue_macrotask(World& world, const std::vector + +/// From Eqs. (42) - (44) of Kottmann et al. JCTC 13, 5945 (2017) +/// and eq. (30) of Kottmann et al. JCTC 13, 5956 (2017) +/// [F,Qt] = [F1,Q1t]Q2t + Q1t [F2,Q2t] +/// and [F1,Q1t] = - [F1,O1t] = - (F-e_k) |tk> +/// @return the commutator [F,Qt] f12 |phi_i phi_j> +madness::CCPairFunction +CCPotentials::apply_commutator_F_Qt_f12(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info, const real_convolution_6d *Gscreen) { + const auto& parameters=info.parameters; + + // if ground-state use Eqs (43)-(44) of Kottmann et al. JCTC 13, 5945 (2017) + auto f12=CCConvolutionOperatorPtr(world,OT_F12,parameters); + auto ftt=std::vector>({CCPairFunction(f12, phi_i.function, phi_j.function)}); + + const vector_real_function_3d Vtau=info.intermediate_potentials(gs_singles, POT_singles_); + Projector OVtau(info.get_active_mo_bra(),Vtau); + QProjector Qt(info.get_active_mo_bra(),gs_singles.get_vecfunction()); + + auto p1=outer(OVtau,Qt); + auto p2=outer(Qt,OVtau); + + // result=Qt2(Ov1(ftt)) + Qt1(Ov2(ftt)); + auto result=p1(ftt) + p2(ftt); + + result=consolidate(result,{}); // will collect similar terms only + MADNESS_CHECK_THROW(result.size()==1 and result[0].is_decomposed(),"apply_Fock_commutator should return a single CCPairFunction"); + return result[0]; +} + +/// calculate [F,dQt] f12 |rhs> + +/// Using eq. (31) of Kottmann et al. JCTC 13, 5956 (2017) +/// note that we leave the omega dQ12t term out, as it cancels with eq. (29) +/// @return [F,Qt] f12 |rhs> - omega dQ12 f12 |phi_i phi_j> +madness::CCPairFunction +CCPotentials::apply_commutator_F_dQt_f12(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info, const real_convolution_6d *Gscreen) { + const auto& parameters=info.parameters; + + auto f12=CCConvolutionOperatorPtr(world,OT_F12,parameters); + auto ftt=std::vector>({CCPairFunction(f12, phi_i.function, phi_j.function)}); + + auto t=CCPotentials::make_active_t_intermediate(gs_singles,info); + const vector_real_function_3d Vtau=info.intermediate_potentials(gs_singles, POT_singles_); + const vector_real_function_3d Vx=info.intermediate_potentials(ex_singles, POT_singles_); + auto bra=info.get_active_mo_bra(); + + Projector OVtau(bra,Vtau); + Projector Ox(bra,ex_singles.get_vecfunction()); + Projector OVx(bra,Vx); + QProjector Qt(bra,t.get_vecfunction()); + + auto OvxQt=outer(OVx,Qt); + auto QtOvx=outer(Qt,OVx); + auto OxOvt=outer(Ox,OVtau); + auto OvtOx=outer(OVtau,Ox); + + auto result=OvxQt(ftt) + QtOvx(ftt) - OxOvt(ftt) - OvtOx(ftt); + result=consolidate(result); // will collect similar terms only + MADNESS_CHECK_THROW(result.size()==1 and result[0].is_decomposed(),"apply_Fock_commutator should return a single CCPairFunction"); + return result[0]; +} + + +madness::real_function_6d +CCPotentials::apply_KffK(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const Info& info, const real_convolution_6d *Gscreen) { + real_function_3d x_ket = phi_i.function; + real_function_3d y_ket = phi_j.function; + real_function_3d x_bra = (info.R_square*phi_i.function).truncate(); + real_function_3d y_bra = (info.R_square*phi_j.function).truncate(); + const std::string x_name = phi_i.name(); + const std::string y_name = phi_j.name(); + + const auto& parameters=info.parameters; + + //apply Kf + if (parameters.debug()) print("\nComputing [K,f]|" + x_name + y_name + ">\n"); + + CCTimer time(world, "[K,f]|" + x_name + y_name + ">"); + CCTimer part1_time(world, "Kf" + x_name + y_name + ">"); + + bool symmetric_kf = false; + if ((phi_i.type == phi_j.type) && (phi_i.i == phi_j.i)) symmetric_kf = true; + + // First make the 6D function f12|x,y> + real_function_6d f12xy = make_f_xy_macrotask(world, x_ket, y_ket, x_bra, y_bra, phi_i.i, phi_j.i, + parameters, phi_i.type, phi_j.type, Gscreen); + f12xy.truncate().reduce_rank(); + // Apply the Exchange Operator + real_function_6d Kfxy = K_macrotask(world, info.mo_ket, info.mo_bra, f12xy, symmetric_kf, parameters); + + if (parameters.debug()) part1_time.info(); + + //apply fk + CCTimer part2_time(world, "fK" + x_name + y_name + ">"); + + const bool symmetric_fk = (phi_i==phi_j); + const real_function_3d Kx = K_macrotask(world, info.mo_ket, info.mo_bra, x_ket, parameters); + const FuncType Kx_type = UNDEFINED; + const real_function_6d fKphi0b = make_f_xy_macrotask(world, Kx, y_ket, x_bra, y_bra, phi_i.i, phi_j.i, + parameters, Kx_type, phi_j.type, Gscreen); + real_function_6d fKphi0a; + if (symmetric_fk) fKphi0a = madness::swap_particles(fKphi0b); + else { + real_function_3d Ky = K_macrotask(world, info.mo_ket, info.mo_bra, y_ket, parameters); + const FuncType Ky_type = UNDEFINED; + fKphi0a = make_f_xy_macrotask(world, x_ket, Ky, x_bra, y_bra, phi_i.i, phi_j.i, + parameters, phi_i.type, Ky_type, Gscreen); + } + const real_function_6d fKxy = (fKphi0a + fKphi0b); + + if (parameters.debug()) part2_time.info(); + + //final result + Kfxy.print_size("Kf" + x_name + y_name); + Kfxy.set_thresh(parameters.thresh_6D()); + Kfxy.truncate().reduce_rank(); + Kfxy.print_size("Kf after truncation" + x_name + y_name); + fKxy.print_size("fK" + x_name + y_name); + real_function_6d result = (Kfxy - fKxy); + result.set_thresh(parameters.thresh_6D()); + result.print_size("[K,f]" + x_name + y_name); + result.truncate().reduce_rank(); + result.print_size("[K,f]" + x_name + y_name); + + //sanity check + CCTimer sanity(world, "[K,f] sanity check"); + // make the =" << test << "\n"; + } + if (world.rank() == 0 && fabs(diff) > parameters.thresh_6D()) print("Exchange Commutator Plain Wrong"); + else print("Exchange Commutator seems to be sane, diff=" + std::to_string(diff)); + + if (parameters.debug()) sanity.info(diff); + + if (parameters.debug()) print("\n"); + + return result; +} + + madness::real_function_6d CCPotentials::apply_exchange_commutator_macrotask(World& world, const std::vector& mo_ket, const std::vector& mo_bra, const real_function_3d& Rsquare, @@ -1845,22 +2595,25 @@ CCPotentials::apply_exchange_commutator1(const CCFunction& x, const CC double CCPotentials::make_xy_gf_ab(const CCFunction& x, const CCFunction& y, const CCFunction& a, const CCFunction& b) const { const real_function_3d xa = (x.function * a.function).truncate(); - const real_function_3d x_gf_a = apply_gf(xa); + const real_function_3d x_gf_a = apply_gf(world, xa, info); const double result = y.function.inner(x_gf_a * b.function); return result; } madness::real_function_3d -CCPotentials::apply_gf(const real_function_3d& f) const { - std::shared_ptr fBSH = std::shared_ptr( - BSHOperatorPtr3D(world, parameters.gamma(), parameters.lo(), parameters.thresh_poisson())); - double bsh_prefactor = 4.0 * constants::pi; - double prefactor = 1.0 / (2.0 * parameters.gamma()); - return prefactor * ((*g12)(f) - bsh_prefactor * (*fBSH)(f)).truncate(); +CCPotentials::apply_gf(World& world, const real_function_3d& f, const Info& info) { + // std::shared_ptr fBSH = std::shared_ptr( + // BSHOperatorPtr3D(world, info.parameters.gamma(), info.parameters.lo(), info.parameters.thresh_poisson())); + auto fg=CCConvolutionOperator(world,OpType::OT_FG12,info.parameters); + + // double bsh_prefactor = 4.0 * constants::pi; + // double prefactor = 1.0 / (2.0 * info.parameters.gamma()); + return fg(f).truncate(); + // return prefactor * ((*g12)(f) - bsh_prefactor * (*fBSH)(f)).truncate(); } double -CCPotentials::make_xy_u(const CCFunction& x, const CCFunction& y, const std::vector>& u) const { +CCPotentials::make_xy_u(const CCFunction& x, const CCFunction& y, const std::vector>& u) { double result = 0.0; for (size_t mm = 0; mm < u.size(); mm++) { result += u[mm].make_xy_u(x, y); @@ -1870,33 +2623,36 @@ CCPotentials::make_xy_u(const CCFunction& x, const CCFunction& x, const CCFunction& y, const CCConvolutionOperator& op, - const CCPairFunction& u) const { - double result = 0.0; - if (u.component->is_pure()) { - real_function_6d xy_op = CompositeFactory(world).particle1(copy(x.function)).particle2( - copy(y.function)).g12(op.get_kernel()); - result = inner(u.get_function(), xy_op); - } else if (u.component->is_decomposed()) { - if (u.component->has_operator()) { - if (op.type() == OpType::OT_G12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12) - result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]); - else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_G12) - result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]); - else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12) - result = make_xy_ff_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]); - else MADNESS_EXCEPTION(("xy_" + op.name() + u.name() + " not implemented").c_str(), 1); - } else { - for (size_t i = 0; i < u.decomposed().get_a().size(); i++) - result += (x.function * u.decomposed().get_a()[i]).inner(op(y, u.decomposed().get_b()[i])); - } - } else error("Unknown CCPairFunction type in make_xy_op_u"); - - return result; + const CCPairFunction& u) { + auto ket=CCPairFunction(x.f(),y.f()); + auto bra=std::make_shared>(op)*u; + return inner(bra,ket); +// double result = 0.0; +// if (u.component->is_pure()) { +// real_function_6d xy_op = CompositeFactory(world).particle1(copy(x.function)).particle2( +// copy(y.function)).g12(op.get_kernel()); +// result = inner(u.get_function(), xy_op); +// } else if (u.component->is_decomposed()) { +// if (u.component->has_operator()) { +// if (op.type() == OpType::OT_G12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12) +// result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]); +// else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_G12) +// result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]); +// else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12) +// result = make_xy_ff_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]); +// else MADNESS_EXCEPTION(("xy_" + op.name() + u.name() + " not implemented").c_str(), 1); +// } else { +// for (size_t i = 0; i < u.decomposed().get_a().size(); i++) +// result += (x.function * u.decomposed().get_a()[i]).inner(op(y, u.decomposed().get_b()[i])); +// } +// } else error("Unknown CCPairFunction type in make_xy_op_u"); +// +// return result; } double CCPotentials::make_xy_op_u(const CCFunction& x, const CCFunction& y, const CCConvolutionOperator& op, - const std::vector>& u) const { + const std::vector>& u) { double result = 0.0; for (size_t mm = 0; mm < u.size(); mm++) { const double tmp = make_xy_op_u(x, y, op, u[mm]); @@ -1920,7 +2676,7 @@ CCPotentials::make_xy_op_ab(const CCFunction& x, const CCFunction> -CCPotentials::get_pair_function(const Pairs& pairs, const size_t i, const size_t j) const { +CCPotentials::get_pair_function(const Pairs& pairs, const size_t i, const size_t j) { if (i > j) { return swap_particles(pairs(j, i).functions); } else { @@ -1929,8 +2685,11 @@ CCPotentials::get_pair_function(const Pairs& pairs, const size_t i, cons } madness::real_function_3d -CCPotentials::apply_s2b_operation(const CCFunction& bra, const CCPairFunction& u, const size_t particle) const { +CCPotentials::apply_s2b_operation(World& world, const CCFunction& bra, const CCPairFunction& u, + const size_t particle, const Info& info) { real_function_3d result; + auto g12=std::shared_ptr>(new CCConvolutionOperator(world,OpType::OT_G12,info.parameters)); + MADNESS_ASSERT(particle == 1 || particle == 2); if (u.is_pure()) { result = u.dirac_convolution(bra, *g12, particle); @@ -1938,20 +2697,24 @@ CCPotentials::apply_s2b_operation(const CCFunction& bra, const CCPairF result = u.dirac_convolution(bra, *g12, particle); } else if (u.is_op_decomposed()) { // retunrns _particle - CCFunction a; - CCFunction b; - if (particle == 1) { - a = u.get_a()[0]; - b = u.get_b()[0]; - } else { - a = u.get_b()[0]; - b = u.get_a()[0]; - } - const real_function_3d tmp = (bra.function * a.function).truncate(); - const real_function_3d tmp2 = apply_gf(tmp); - real_function_3d tmp3 = tmp2 * b.function; - tmp3.truncate(); - result = tmp3; + std::array p1={0,1,2}; + std::array p2={3,4,5}; + auto p = (particle == 1) ? p1 : p2; + result=inner(g12*u,bra.f(),p,p1); +// CCFunction a; +// CCFunction b; +// if (particle == 1) { +// a = u.get_a()[0]; +// b = u.get_b()[0]; +// } else { +// a = u.get_b()[0]; +// b = u.get_a()[0]; +// } +// const real_function_3d tmp = (bra.function * a.function).truncate(); +// const real_function_3d tmp2 = apply_gf(world, tmp, info); +// real_function_3d tmp3 = tmp2 * b.function; +// tmp3.truncate(); +// result = tmp3; } else MADNESS_EXCEPTION("apply_s2b_operation: unknown type", 1) ; @@ -2032,6 +2795,9 @@ CCPotentials::apply_Ot(const CCPairFunction& f, const CC_vecfunction& CC_vecfunction mbra; if (t.size() == mo_bra_.size()) mbra = CC_vecfunction(copy(world, mo_bra_.get_vecfunction()), HOLE); else mbra = CC_vecfunction(copy(world, get_active_mo_bra()), HOLE, parameters.freeze()); + Projector O(mbra.get_vecfunction(), t.get_vecfunction()); + O.set_particle(particle-1); // shift particle index + return O(f); MADNESS_ASSERT(mbra.size() == t.size()); if (f.is_pure()) { @@ -2091,107 +2857,118 @@ CCPotentials::apply_G(const CCPairFunction& u, const real_convolution_ } madness::vector_real_function_3d -CCPotentials::get_CC2_singles_potential_gs(const CC_vecfunction& singles, const Pairs& doubles) const { +CCPotentials::get_CC2_singles_potential_gs(World& world, const CC_vecfunction& singles, + const Pairs& doubles, Info& info) +{ CCTimer time(world, "CC2 Singles potential"); - vector_real_function_3d fock_residue = potential_singles_gs(singles, doubles, POT_F3D_); + vector_real_function_3d fock_residue = potential_singles_gs(world, singles, doubles, POT_F3D_, info); + Projector Otau(info.get_active_mo_bra(), singles.get_vecfunction()); + QProjector Q(info.mo_bra, info.mo_ket); // CC2 Singles potential: Q(S4c) + Qt(ccs+s2b+s2c) - vector_real_function_3d Vccs = potential_singles_gs(singles, doubles, POT_ccs_); - vector_real_function_3d Vs2b = potential_singles_gs(singles, doubles, POT_s2b_); - vector_real_function_3d Vs2c = potential_singles_gs(singles, doubles, POT_s2c_); - vector_real_function_3d Vs4b = potential_singles_gs(singles, doubles, POT_s4b_); - vector_real_function_3d Vs4c = potential_singles_gs(singles, doubles, POT_s4c_); - vector_real_function_3d Vs4a = apply_projector(Vs2b, singles); // need to subtract + vector_real_function_3d Vccs = potential_singles_gs(world, singles, doubles, POT_ccs_, info); + vector_real_function_3d Vs2b = potential_singles_gs(world, singles, doubles, POT_s2b_, info); + vector_real_function_3d Vs2c = potential_singles_gs(world, singles, doubles, POT_s2c_, info); + vector_real_function_3d Vs4b = potential_singles_gs(world, singles, doubles, POT_s4b_, info); + vector_real_function_3d Vs4c = potential_singles_gs(world, singles, doubles, POT_s4c_, info); + // vector_real_function_3d Vs4a = apply_projector(Vs2b, singles); // need to subtract + vector_real_function_3d Vs4a = Otau(Vs2b); // need to subtract vector_real_function_3d unprojected = add(world, Vccs, add(world, Vs2b, add(world, Vs2c, add(world, Vs4b, sub(world, Vs4c, Vs4a))))); - vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_); + // vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_); + vector_real_function_3d potential = Q(unprojected); truncate(world, potential); - get_potentials.insert(copy(world, potential), singles, POT_singles_); + info.intermediate_potentials.insert(copy(world, potential), singles, POT_singles_); time.info(true, norm2(world, potential)); const vector_real_function_3d result = add(world, potential, fock_residue); return result; } madness::vector_real_function_3d -CCPotentials::get_CCS_potential_ex(CC_vecfunction& x, const bool print) const { +CCPotentials::get_CCS_potential_ex(World& world, const CC_vecfunction& x, const bool print, Info& info) { if (x.type != RESPONSE) error("get_CCS_response_potential: Wrong type of input singles"); Pairs empty_doubles; CC_vecfunction empty_singles(PARTICLE); - const vector_real_function_3d fock_residue = potential_singles_ex(empty_singles, empty_doubles, x, empty_doubles, - POT_F3D_); - vector_real_function_3d potential = potential_singles_ex(empty_singles, empty_doubles, x, empty_doubles, POT_cis_); + const vector_real_function_3d fock_residue = potential_singles_ex(world, empty_singles, empty_doubles, x, + empty_doubles, POT_F3D_, info); + vector_real_function_3d potential = potential_singles_ex(world, empty_singles, empty_doubles, x, empty_doubles, POT_cis_, info); // the fock residue does not get projected, but all the rest - potential = apply_Qt(potential, mo_ket_); + QProjector Q(info.mo_bra, info.mo_ket); + // potential = apply_Qt(potential, mo_ket_); + potential=Q(potential); truncate(world, potential); - get_potentials.insert(copy(world, potential), x, POT_singles_); + info.intermediate_potentials.insert(copy(world, potential), x, POT_singles_); vector_real_function_3d result = add(world, fock_residue, potential); truncate(world, result); - const double omega = compute_cis_expectation_value(x, result, print); - x.omega = omega; return result; } madness::vector_real_function_3d -CCPotentials::get_CC2_singles_potential_ex(const CC_vecfunction& gs_singles, const Pairs& gs_doubles, - CC_vecfunction& ex_singles, const Pairs& response_doubles) const { +CCPotentials::get_CC2_singles_potential_ex(World& world, const CC_vecfunction& gs_singles, + const Pairs& gs_doubles, const CC_vecfunction& ex_singles, + const Pairs& response_doubles, Info& info) +{ MADNESS_ASSERT(gs_singles.type == PARTICLE); MADNESS_ASSERT(ex_singles.type == RESPONSE); - const vector_real_function_3d fock_residue = potential_singles_ex(gs_singles, gs_doubles, ex_singles, - response_doubles, POT_F3D_); - vector_real_function_3d Vccs = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_ccs_); - vector_real_function_3d Vs2b = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s2b_); - vector_real_function_3d Vs2c = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s2c_); - vector_real_function_3d Vs4b = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s4b_); - vector_real_function_3d Vs4c = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s4c_); + Projector Ox(info.get_active_mo_bra(),ex_singles.get_vecfunction()); + Projector Ot(info.get_active_mo_bra(),gs_singles.get_vecfunction()); + const vector_real_function_3d fock_residue = potential_singles_ex(world, gs_singles, gs_doubles, + ex_singles, response_doubles, POT_F3D_, info); + vector_real_function_3d Vccs = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_ccs_, info); + vector_real_function_3d Vs2b = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s2b_, info); + vector_real_function_3d Vs2c = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s2c_, info); + vector_real_function_3d Vs4b = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s4b_, info); + vector_real_function_3d Vs4c = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s4c_, info); // make low scaling s4a potential // -Otau(s2b_response) + -Ox(s2b_gs) // maybe store full s2b potential of gs // both need to be subtracted - vector_real_function_3d s2b_gs = potential_singles_gs(gs_singles, gs_doubles, POT_s2b_); - vector_real_function_3d Vs4a = - -1.0 * add(world, apply_projector(s2b_gs, ex_singles), apply_projector(Vs2b, gs_singles)); + vector_real_function_3d s2b_gs = potential_singles_gs(world, gs_singles, gs_doubles, POT_s2b_, info); + // vector_real_function_3d Vs4a = + // -1.0 * add(world, apply_projector(s2b_gs, ex_singles), apply_projector(Vs2b, gs_singles)); + vector_real_function_3d Vs4a = -1.0 * (Ox(s2b_gs)+ Ot(Vs2b)); //add up vector_real_function_3d unprojected = add(world, Vccs, add(world, Vs2b, add(world, Vs2c, add(world, Vs4a, add(world, Vs4b, Vs4c))))); - vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_); - if (parameters.debug()) { + QProjector Q(info.mo_bra, info.mo_ket); + // vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_); + vector_real_function_3d potential = Q(unprojected); + if (info.parameters.debug()) { // debug - vector_real_function_3d xbra = mul(world, nemo_->ncf->square(), ex_singles.get_vecfunction()); + vector_real_function_3d xbra = info.R_square* ex_singles.get_vecfunction(); const double ccs = inner(world, xbra, Vccs).sum(); const double s2b = inner(world, xbra, Vs2b).sum(); const double s2c = inner(world, xbra, Vs2c).sum(); const double s4a = inner(world, xbra, Vs4a).sum(); const double s4b = inner(world, xbra, Vs4b).sum(); const double s4c = inner(world, xbra, Vs4c).sum(); - std::cout << std::fixed << std::setprecision(10) << "functional response energies:" << "\n=" << ccs + if (world.rank()==0) std::cout << std::fixed << std::setprecision(10) << "functional response energies:" << "\n=" << ccs << "\n=" << s2b << "\n=" << s2c << "\n=" << s4a << "\n=" << s4b << "\n=" << s4c << "\n"; // debug end } // storing potential - get_potentials.insert(copy(world, potential), ex_singles, POT_singles_); + info.intermediate_potentials.insert(copy(world, potential), ex_singles, POT_singles_); vector_real_function_3d result = add(world, fock_residue, potential); truncate(world, result); - const double omega = compute_cis_expectation_value(ex_singles, result); - ex_singles.omega = omega; return result; } madness::vector_real_function_3d -CCPotentials::get_ADC2_singles_potential(const Pairs& gs_doubles, CC_vecfunction& ex_singles, - const Pairs& response_doubles) const { +CCPotentials::get_ADC2_singles_potential(World& world, const Pairs& gs_doubles, + CC_vecfunction& ex_singles, const Pairs& response_doubles, Info& info) const { MADNESS_ASSERT(ex_singles.type == RESPONSE); vector_real_function_3d zero = zero_functions(world, get_active_mo_ket().size()); CC_vecfunction tau(zero, PARTICLE, parameters.freeze()); - const vector_real_function_3d result = get_CC2_singles_potential_ex(tau, gs_doubles, ex_singles, response_doubles); + const vector_real_function_3d result = get_CC2_singles_potential_ex(world, tau, gs_doubles, ex_singles, response_doubles, info); return result; } double -CCPotentials::potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunction& singles, - const Pairs& doubles, const PotentialType& name) const { +CCPotentials::potential_energy_gs(World& world, const CC_vecfunction& bra, + const CC_vecfunction& singles, const Pairs& doubles, const PotentialType& name) const { // sanity check MADNESS_ASSERT(singles.type == PARTICLE); CCTimer timer(world, "potential energy of " + assign_name(name)); @@ -2209,7 +2986,7 @@ CCPotentials::potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunctio } else if (name == POT_s6_) { result = x_s6(bra, singles, singles, singles); } else if (name == POT_F3D_) { - result = x_s3a(bra, singles) - compute_kinetic_energy(bra.get_vecfunction(), singles.get_vecfunction()); + result = x_s3a(bra, singles) - compute_kinetic_energy(world, bra.get_vecfunction(), singles.get_vecfunction()); } else if (name == POT_ccs_) { result = x_s3c(bra, singles) + x_s5b(bra, singles, singles) + x_s5c(bra, singles, singles) + x_s6(bra, singles, singles, singles); @@ -2236,17 +3013,20 @@ CCPotentials::potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunctio } madness::vector_real_function_3d -CCPotentials::potential_singles_gs(const CC_vecfunction& singles, const Pairs& doubles, - const PotentialType& name) const { +CCPotentials::potential_singles_gs(World& world, const CC_vecfunction& singles, + const Pairs& doubles, const PotentialType& name, Info& info) +{ MADNESS_ASSERT(singles.type == PARTICLE); vector_real_function_3d result; CCTimer timer(world, "Singles-Potential:" + assign_name(name)); if (name == POT_F3D_) { - result = fock_residue_closed_shell(singles); + result = fock_residue_closed_shell(world, singles, info); } else if (name == POT_ccs_) { - const CC_vecfunction t = make_t_intermediate(singles); - result = apply_Qt(ccs_unprojected(t, singles), - t); // this is not the full t projector, but the potential will be projeted afterwards and this will unclude th frozen mos + const CC_vecfunction t = make_active_t_intermediate(singles,info); + QProjector Qt(info.get_active_mo_bra(),t.get_vecfunction()); + result = Qt(ccs_unprojected(world, t, singles, info)); + // result = apply_Qt(ccs_unprojected(world, t, singles, info), t); + // this is not the full t projector, but the potential will be projeted afterwards and this will unclude th frozen mos } else if (name == POT_s2b_) { // // calculate the s2b potential and afterwards the s4a potential from the s2b potential // // because: Qt(S2b) = S2b + S4a @@ -2261,15 +3041,15 @@ CCPotentials::potential_singles_gs(const CC_vecfunction& singles, const Pairs this is calculated along with the s2b potential"); } else if (name == POT_s4b_) { - result = s4b(singles, doubles); + result = s4b(world, singles, doubles, info); } else if (name == POT_s4c_) { - result = s4c(singles, doubles); + result = s4c(world, singles, doubles, info); } else MADNESS_EXCEPTION(("potential_singles: Unknown potential " + assign_name(name)).c_str(), 1) ; @@ -2286,10 +3066,10 @@ CCPotentials::potential_singles_gs(const CC_vecfunction& singles, const Pairs& doubles_gs, const CC_vecfunction& singles_ex, - const Pairs& doubles_ex, - const PotentialType& name) const { +CCPotentials::potential_energy_ex(World& world, const CC_vecfunction& bra, + const CC_vecfunction& singles_gs, const Pairs& doubles_gs, + const CC_vecfunction& singles_ex, + const Pairs& doubles_ex, const PotentialType& name) const { // sanity check MADNESS_ASSERT(singles_gs.type == PARTICLE); MADNESS_ASSERT(singles_ex.type == RESPONSE); @@ -2309,7 +3089,7 @@ CCPotentials::potential_energy_ex(const CC_vecfunction& bra, const CC_vecfunctio result = x_s6(bra, singles_ex, singles_gs, singles_gs) + x_s6(bra, singles_gs, singles_ex, singles_gs) + x_s6(bra, singles_gs, singles_gs, singles_ex); } else if (name == POT_F3D_) { - result = x_s3a(bra, singles_ex) - compute_kinetic_energy(bra.get_vecfunction(), singles_ex.get_vecfunction()); + result = x_s3a(bra, singles_ex) - compute_kinetic_energy(world, bra.get_vecfunction(), singles_ex.get_vecfunction()); } else if (name == POT_ccs_) { result = x_s3c(bra, singles_ex) + x_s5b(bra, singles_ex, singles_gs) + x_s5c(bra, singles_ex, singles_gs) + x_s6(bra, singles_ex, singles_gs, singles_gs) + x_s5b(bra, singles_gs, singles_ex) @@ -2338,40 +3118,49 @@ CCPotentials::potential_energy_ex(const CC_vecfunction& bra, const CC_vecfunctio } madness::vector_real_function_3d -CCPotentials::potential_singles_ex(const CC_vecfunction& singles_gs, const Pairs& doubles_gs, - const CC_vecfunction& singles_ex, const Pairs& doubles_ex, - const PotentialType& name) const { +CCPotentials::potential_singles_ex(World& world, const CC_vecfunction& singles_gs, + const Pairs& doubles_gs, const CC_vecfunction& singles_ex, + const Pairs& doubles_ex, const PotentialType& name, Info& info) +{ //if(mo_ket_.size()>1) output.warning("Potential for ExSingles is not ready for more than one orbital"); // sanity check MADNESS_ASSERT(singles_gs.type == PARTICLE); MADNESS_ASSERT(singles_ex.type == RESPONSE); + + Projector Ox(info.get_active_mo_bra(),singles_ex.get_vecfunction()); + vector_real_function_3d result; CCTimer timer(world, "timer-ex-potential"); if (name == POT_F3D_) { - result = fock_residue_closed_shell(singles_ex); + result = fock_residue_closed_shell(world, singles_ex, info); } else if (name == POT_ccs_) { - const CC_vecfunction t = make_t_intermediate(singles_gs); - vector_real_function_3d part1 = apply_Qt(ccs_unprojected(t, singles_ex), t); - vector_real_function_3d part2 = apply_Qt(ccs_unprojected(singles_ex, singles_gs), t); - vector_real_function_3d part3 = apply_projector(ccs_unprojected(t, singles_gs), singles_ex); + // const CC_vecfunction t = make_t_intermediate(singles_gs,info.parameters); + const CC_vecfunction t = make_active_t_intermediate(singles_gs,info); + QProjector Qt(info.get_active_mo_bra(),t.get_vecfunction()); + // vector_real_function_3d part1 = apply_Qt(ccs_unprojected(world, t, singles_ex, info), t); + // vector_real_function_3d part2 = apply_Qt(ccs_unprojected(world, singles_ex, singles_gs, info), t); + vector_real_function_3d part1 = Qt(ccs_unprojected(world, t, singles_ex, info)); + vector_real_function_3d part2 = Qt(ccs_unprojected(world, singles_ex, singles_gs, info)); + // vector_real_function_3d part3 = apply_projector(ccs_unprojected(world, t, singles_gs, info), singles_ex); + vector_real_function_3d part3 = Ox(ccs_unprojected(world, t, singles_gs, info)); vector_real_function_3d tmp = add(world, part1, part2); result = sub(world, tmp, part3); } else if (name == POT_s2b_) { - result = s2b(singles_ex, doubles_ex); + result = s2b(world, singles_ex, doubles_ex, info); } else if (name == POT_s2c_) { - result = s2c(singles_ex, doubles_ex); + result = s2c(world, singles_ex, doubles_ex, info); } else if (name == POT_s4a_) { error("potential_singles: Demanded s4a potential -> this is calculated from the s2b potential"); } else if (name == POT_s4b_) { - vector_real_function_3d s4b_part1 = s4b(singles_gs, doubles_ex); - vector_real_function_3d s4b_part2 = s4b(singles_ex, doubles_gs); + vector_real_function_3d s4b_part1 = s4b(world, singles_gs, doubles_ex, info); + vector_real_function_3d s4b_part2 = s4b(world, singles_ex, doubles_gs, info); result = add(world, s4b_part1, s4b_part2); } else if (name == POT_s4c_) { - vector_real_function_3d s4c_part1 = s4c(singles_gs, doubles_ex); - vector_real_function_3d s4c_part2 = s4c(singles_ex, doubles_gs); + vector_real_function_3d s4c_part1 = s4c(world, singles_gs, doubles_ex, info); + vector_real_function_3d s4c_part2 = s4c(world, singles_ex, doubles_gs, info); result = add(world, s4c_part1, s4c_part2); } else if (name == POT_cis_) { - result = ccs_unprojected(CC_vecfunction(get_active_mo_ket(), HOLE, parameters.freeze()), singles_ex); + result = ccs_unprojected(world, CC_vecfunction(info.get_active_mo_ket(), HOLE, info.parameters.freeze()), singles_ex, info); } else MADNESS_EXCEPTION(("potential_singles: Unknown potential " + assign_name(name)).c_str(), 1) ; @@ -2389,19 +3178,24 @@ CCPotentials::potential_singles_ex(const CC_vecfunction& singles_gs, const Pairs } madness::vector_real_function_3d -CCPotentials::fock_residue_closed_shell(const CC_vecfunction& singles) const { +CCPotentials::fock_residue_closed_shell(World& world, const CC_vecfunction& singles, const Info& info) +{ // vecfuncT tau = singles.get_vecfunction(); + auto g12=CCConvolutionOperator(world,OT_G12,info.parameters); CCTimer timer_J(world, "J"); // vecfuncT J = mul(world, intermediates_.get_hartree_potential(), tau); - vector_real_function_3d J; - for (const auto& tmpi : singles.functions) { - const CCFunction& taui = tmpi.second; - real_function_3d hartree_potential = real_function_3d(world); - for (const auto& tmpk : mo_ket_.functions) - hartree_potential += (*g12)(mo_bra_(tmpk.first), tmpk.second); - const real_function_3d Ji = hartree_potential * taui.function; - J.push_back(Ji); - } + // vector_real_function_3d J; + real_function_3d density=dot(world, info.mo_bra,info.mo_ket); + real_function_3d hartree_potential=g12(density); + // for (const auto& tmpi : singles.functions) { + // const CCFunction& taui = tmpi.second; + // real_function_3d hartree_potential = real_function_3d(world); + // for (const auto& tmpk : mo_ket_.functions) + // hartree_potential += (g12)(info.mo_bra[tmpk.first], tmpk.second); + // const real_function_3d Ji = hartree_potential * taui.function; + // J.push_back(Ji); + // } + vector_real_function_3d J = hartree_potential* singles.get_vecfunction(); truncate(world, J); scale(world, J, 2.0); timer_J.info(true, norm2(world, J)); @@ -2409,13 +3203,14 @@ CCPotentials::fock_residue_closed_shell(const CC_vecfunction& singles) const { vector_real_function_3d vK; for (const auto& tmpi : singles.functions) { const CCFunction& taui = tmpi.second; - const real_function_3d Ki = K(taui); + const real_function_3d Ki = K(world, taui, info); vK.push_back(Ki); } scale(world, vK, -1.0); timer_K.info(true, norm2(world, vK)); // apply nuclear potential - Nuclear Uop(world, nemo_.get()); + auto ncf=std::shared_ptr(new AdhocNuclearCorrelationFactor(world, info.U2, info.U1)); + Nuclear Uop(world, ncf); vector_real_function_3d Upot = Uop(singles.get_vecfunction()); vector_real_function_3d KU = add(world, vK, Upot); return add(world, J, KU); @@ -2452,10 +3247,11 @@ CCPotentials::K_macrotask(World& world, const std::vector& mo_ } madness::real_function_3d -CCPotentials::K(const CCFunction& f) const { +CCPotentials::K(World& world, const CCFunction& f, const Info& info) { + auto g12=CCConvolutionOperator(world,OT_G12,info.parameters); real_function_3d result = real_factory_3d(world); - for (const auto& k_iterator : mo_ket_.functions) { - result += (*g12)(mo_bra_(k_iterator.first), f) * mo_ket_(k_iterator.first).function; + for (size_t k = 0; k < info.mo_ket.size(); k++) { + result += ((g12)(info.mo_bra[k] * f.f()).truncate()) *info.mo_ket[k]; } return result; } @@ -2526,12 +3322,12 @@ CCPotentials::apply_Kf(const CCFunction& x, const CCFunction madness::real_function_6d CCPotentials::apply_fK(const CCFunction& x, const CCFunction& y, const real_convolution_6d *Gscreen) const { const bool symmetric = (x.type == y.type && x.i == y.i); - const real_function_3d Kx = K(x); + const real_function_3d Kx = K(world, x, info); const real_function_6d fKphi0b = make_f_xy(CCFunction(Kx, x.i, UNDEFINED), y, Gscreen); real_function_6d fKphi0a; if (symmetric) fKphi0a = swap_particles(fKphi0b); else { - real_function_3d Ky = K(y); + real_function_3d Ky = K(world, y, info); fKphi0a = make_f_xy(x, CCFunction(Ky, y.i, UNDEFINED), Gscreen); } const real_function_6d fKphi0 = (fKphi0a + fKphi0b); @@ -2567,6 +3363,20 @@ CCPotentials::make_f_xy(const CCFunction& x, const CCFunction& phi_i, const CCFunction& phi_j, + const Info& info, const real_convolution_6d *Gscreen) { + const auto& parameters=info.parameters; + CorrelationFactor corrfac(world, parameters.gamma(), 1.e-7, parameters.lo()); + + real_function_6d fxy = CompositeFactory(world).g12(corrfac.f()). + particle1(copy(phi_i.function)).particle2(copy(phi_j.function)); + if (Gscreen == NULL) fxy.fill_tree().truncate().reduce_rank(); + else fxy.fill_cuspy_tree(*Gscreen).truncate().reduce_rank(); + return fxy; +} + + madness::real_function_6d CCPotentials::make_f_xy_macrotask(World& world, const real_function_3d& x_ket, const real_function_3d& y_ket, const real_function_3d& x_bra, const real_function_3d& y_bra, @@ -2609,33 +3419,23 @@ CCPotentials::make_f_xy_macrotask(World& world, const real_function_3d& x_ket, c } madness::vector_real_function_3d -CCPotentials::ccs_unprojected(const CC_vecfunction& ti, const CC_vecfunction& tk) const { +CCPotentials::ccs_unprojected(World& world, const CC_vecfunction& ti, const CC_vecfunction& tk, const Info& info) { + auto g12=CCConvolutionOperator(world,OT_G12,info.parameters); vector_real_function_3d result; for (const auto& itmp : ti.functions) { real_function_3d kgtk = real_factory_3d(world); for (const auto& ktmp : tk.functions) - kgtk += (*g12)(mo_bra_(ktmp.first), ktmp.second); + kgtk += (g12)(info.mo_bra[ktmp.first], ktmp.second); const real_function_3d kgtk_ti = kgtk * ti(itmp.first).function; real_function_3d kgti_tk = real_factory_3d(world); for (const auto& ktmp : tk.functions) - kgti_tk += (*g12)(mo_bra_(ktmp.first), ti(itmp.first)) * tk(ktmp.first).function; + kgti_tk += (g12)(info.mo_bra[ktmp.first], ti(itmp.first)) * tk(ktmp.first).function; const real_function_3d resulti = 2.0 * kgtk_ti - kgti_tk; result.push_back(resulti); } return result; } -madness::real_function_3d -CCPotentials::make_density(const CC_vecfunction& x) const { - real_function_3d result = real_factory_3d(world); - for (const auto& ktmp : x.functions) { - const size_t k = ktmp.first; - result += 2.0 * mo_bra_(k).function * (x(k).function); - } - result.truncate(); - return result; -} - double CCPotentials::x_s3a(const CC_vecfunction& x, const CC_vecfunction& t) const { MADNESS_ASSERT(x.size() == t.size()); @@ -2653,7 +3453,7 @@ CCPotentials::x_s3a(const CC_vecfunction& x, const CC_vecfunction& t) const { pot += (2.0 * gpart - xpart); } } - double kinetic = compute_kinetic_energy(x.get_vecfunction(), t.get_vecfunction()); + double kinetic = compute_kinetic_energy(world, x.get_vecfunction(), t.get_vecfunction()); return kinetic + pot + nuc; } @@ -2838,10 +3638,14 @@ CCPotentials::x_s4c(const CC_vecfunction& x, const CC_vecfunction& t, const Pair } madness::vector_real_function_3d -CCPotentials::s2b(const CC_vecfunction& singles, const Pairs& doubles) const { +CCPotentials::s2b(World& world, const CC_vecfunction& singles, const Pairs& doubles, Info& info) +{ vector_real_function_3d result; + // madness::print_size(world,singles.get_vecfunction(),"singles upon entry"); + // auto functions=doubles.allpairs.begin()->second.functions; + // for (const auto& f : functions) f.print_size("functions"); // see if we can skip the recalculation of the pure 6D part since this does not change during the singles iteration - vector_real_function_3d result_u = get_potentials(singles, POT_s2b_); + vector_real_function_3d result_u = info.intermediate_potentials(singles, POT_s2b_); bool recalc_u_part = false; if (result_u.empty()) recalc_u_part = true; @@ -2855,35 +3659,32 @@ CCPotentials::s2b(const CC_vecfunction& singles, const Pairs& doubles) c // check if the first function in the vector is really the pure 6D part MADNESS_ASSERT(uik[0].is_pure()); if (recalc_u_part) { - resulti_u += 2.0 * apply_s2b_operation(mo_bra_(k), uik[0], - 2); //2.0*uik[0].dirac_convolution(mo_bra_(k),g12,2); - resulti_u -= apply_s2b_operation(mo_bra_(k), uik[0], - 1); //uik[0].dirac_convolution(mo_bra_(k),g12,1); + resulti_u += 2.0 * apply_s2b_operation(world, info.mo_bra[k], uik[0], 2, info); //2.0*uik[0].dirac_convolution(mo_bra_(k),g12,2); + resulti_u -= apply_s2b_operation(world, info.mo_bra[k], uik[0], 1, info); //uik[0].dirac_convolution(mo_bra_(k),g12,1); } else { - resulti_u = result_u[i - parameters.freeze()]; + resulti_u = result_u[i - info.parameters.freeze()]; } for (size_t mm = 1; mm < uik.size(); mm++) { - resulti_r += 2.0 * apply_s2b_operation(mo_bra_(k), uik[mm], - 2); //2.0*uik[mm].dirac_convolution(mo_bra_(k),g12,2); - resulti_r -= apply_s2b_operation(mo_bra_(k), uik[mm], - 1); //uik[mm].dirac_convolution(mo_bra_(k),g12,1); + resulti_r += 2.0 * apply_s2b_operation(world, info.mo_bra[k], uik[mm], 2, info); //2.0*uik[mm].dirac_convolution(mo_bra_(k),g12,2); + resulti_r -= apply_s2b_operation(world, info.mo_bra[k], uik[mm], 1, info); //uik[mm].dirac_convolution(mo_bra_(k),g12,1); } } result.push_back(resulti_r + resulti_u); if (recalc_u_part) result_u.push_back(resulti_u); } - if (recalc_u_part) get_potentials.insert(result_u, singles, POT_s2b_); + if (recalc_u_part) info.intermediate_potentials.insert(result_u, singles, POT_s2b_); return result; } madness::vector_real_function_3d -CCPotentials::s2c(const CC_vecfunction& singles, const Pairs& doubles) const { +CCPotentials::s2c(World& world, const CC_vecfunction& singles, const Pairs& doubles, Info& info) { vector_real_function_3d result; // see if we can skip the recalculation of the pure 6D part since this does not change during the singles iteration - vector_real_function_3d result_u = get_potentials(singles, POT_s2c_); + vector_real_function_3d result_u = info.intermediate_potentials(singles, POT_s2c_); bool recalc_u_part = false; if (result_u.empty()) recalc_u_part = true; + auto g12=CCConvolutionOperator(world,OT_G12,info.parameters); for (const auto& itmp : singles.functions) { const size_t i = itmp.first; @@ -2891,10 +3692,10 @@ CCPotentials::s2c(const CC_vecfunction& singles, const Pairs& doubles) c real_function_3d resulti_r = real_factory_3d(world); for (const auto& ktmp : singles.functions) { const size_t k = ktmp.first; - const real_function_3d kgi = (*g12)(mo_bra_(k), mo_ket_(i)); + const real_function_3d kgi = (g12)(info.mo_bra[k], info.mo_ket[i]); for (const auto& ltmp : singles.functions) { const size_t l = ltmp.first; - const real_function_3d l_kgi = mo_bra_(l).function * kgi; + const real_function_3d l_kgi = info.mo_bra[l] * kgi; std::vector> ukl = get_pair_function(doubles, k, l); // check if the first function in the vector is really the pure 6D part MADNESS_ASSERT(ukl[0].is_pure()); @@ -2902,7 +3703,7 @@ CCPotentials::s2c(const CC_vecfunction& singles, const Pairs& doubles) c resulti_u += -2.0 * ukl[0].project_out(l_kgi, 2); resulti_u += ukl[0].project_out(l_kgi, 1); } else { - resulti_u = result_u[i - parameters.freeze()]; + resulti_u = result_u[i - info.parameters.freeze()]; } for (size_t mm = 1; mm < ukl.size(); mm++) { resulti_r += -2.0 * ukl[mm].project_out(l_kgi, 2); @@ -2913,7 +3714,7 @@ CCPotentials::s2c(const CC_vecfunction& singles, const Pairs& doubles) c result.push_back(resulti_r + resulti_u); if (recalc_u_part) result_u.push_back(resulti_u); } - if (recalc_u_part) get_potentials.insert(result_u, singles, POT_s2c_); + if (recalc_u_part) info.intermediate_potentials.insert(result_u, singles, POT_s2c_); return result; } @@ -2947,23 +3748,25 @@ CCPotentials::s4a_from_s2b(const vector_real_function_3d& s2b, const CC_vecfunct } madness::vector_real_function_3d -CCPotentials::s4b(const CC_vecfunction& singles, const Pairs& doubles) const { +CCPotentials::s4b(World& world, const CC_vecfunction& singles, const Pairs& doubles, const Info& info) +{ + auto g12=CCConvolutionOperator(world,OT_G12,info.parameters); vector_real_function_3d result; - const vector_real_function_3d active_mo_bra = get_active_mo_bra(); + const vector_real_function_3d active_mo_bra = info.get_active_mo_bra(); for (const auto& itmp : singles.functions) { const size_t i = itmp.first; real_function_3d resulti = real_factory_3d(world); for (const auto& ktmp : singles.functions) { const size_t k = ktmp.first; - const real_function_3d kgi = (*g12)(mo_bra_(k), singles(i)); - vector_real_function_3d l_kgi = mul_sparse(world, kgi, active_mo_bra, parameters.thresh_3D()); + const real_function_3d kgi = (g12)(info.mo_bra[k], singles(i)); + vector_real_function_3d l_kgi = mul_sparse(world, kgi, active_mo_bra, info.parameters.thresh_3D()); truncate(world, l_kgi); for (const auto& ltmp : singles.functions) { const size_t l = ltmp.first; const std::vector> ukl = get_pair_function(doubles, k, l); for (size_t mm = 0; mm < ukl.size(); mm++) { - resulti += -2.0 * ukl[mm].project_out(l_kgi[l - parameters.freeze()], 2); - resulti += ukl[mm].project_out(l_kgi[l - parameters.freeze()], 1); + resulti += -2.0 * ukl[mm].project_out(l_kgi[l - info.parameters.freeze()], 2); + resulti += ukl[mm].project_out(l_kgi[l - info.parameters.freeze()], 1); } } } @@ -2973,9 +3776,11 @@ CCPotentials::s4b(const CC_vecfunction& singles, const Pairs& doubles) c } madness::vector_real_function_3d -CCPotentials::s4c(const CC_vecfunction& singles, const Pairs& doubles) const { +CCPotentials::s4c(World& world, const CC_vecfunction& singles, const Pairs& doubles, const Info& info) +{ vector_real_function_3d result; - const vector_real_function_3d active_mo_bra = get_active_mo_bra(); + auto g12=CCConvolutionOperator(world,OT_G12,info.parameters); + const vector_real_function_3d active_mo_bra = info.get_active_mo_bra(); for (const auto& itmp : singles.functions) { const size_t i = itmp.first; real_function_3d resulti = real_factory_3d(world); @@ -2986,7 +3791,7 @@ CCPotentials::s4c(const CC_vecfunction& singles, const Pairs& doubles) c real_function_3d kgtauk = real_factory_3d(world); for (const auto& ktmp : singles.functions) { const size_t k = ktmp.first; - kgtauk += (*g12)(mo_bra_(k), singles(k)); + kgtauk += (g12)(info.mo_bra[k], singles(k)); } vector_real_function_3d l_kgtauk = mul(world, kgtauk, active_mo_bra); truncate(world, l_kgtauk); @@ -2994,12 +3799,12 @@ CCPotentials::s4c(const CC_vecfunction& singles, const Pairs& doubles) c const size_t l = ltmp.first; const std::vector> uil = get_pair_function(doubles, i, l); for (size_t mm = 0; mm < uil.size(); mm++) { - part1 += uil[mm].project_out(l_kgtauk[l - parameters.freeze()], 2); - part2 += uil[mm].project_out(l_kgtauk[l - parameters.freeze()], 1); + part1 += uil[mm].project_out(l_kgtauk[l - info.parameters.freeze()], 2); + part2 += uil[mm].project_out(l_kgtauk[l - info.parameters.freeze()], 1); } for (const auto& ktmp : singles.functions) { const size_t k = ktmp.first; - const real_function_3d k_lgtauk = (mo_bra_(k).function * (*g12)(mo_bra_(l), singles(k))).truncate(); + const real_function_3d k_lgtauk = (info.mo_bra[k] * (g12)(info.mo_bra[l], singles(k))).truncate(); for (size_t mm = 0; mm < uil.size(); mm++) { part3 += uil[mm].project_out(k_lgtauk, 2); part4 += uil[mm].project_out(k_lgtauk, 1); @@ -3029,12 +3834,12 @@ void CCPotentials::plot(const real_function_3d& f, const std::string& msg, const /// makes the t intermediates /// t_i = mo_ket_(i) + factor*tau(i) /// if factor!=1 then we can not use intermediates and set the type to UNDEFINED -CC_vecfunction CCPotentials::make_t_intermediate(const CC_vecfunction& tau, const double factor) const { +CC_vecfunction CCPotentials::make_t_intermediate(const CC_vecfunction& tau, const CCParameters& parameters) const { + FuncType returntype = MIXED; - if (factor != 1.0) returntype = UNDEFINED; if (tau.type == HOLE) { - output("make_t_intermediate: returning hole states"); + // output("make_t_intermediate: returning hole states"); return CC_vecfunction(get_active_mo_ket(), HOLE, parameters.freeze()); } if (tau.size() == 0) { @@ -3045,7 +3850,7 @@ CC_vecfunction CCPotentials::make_t_intermediate(const CC_vecfunction& tau, cons CC_vecfunction result(returntype); for (const auto& itmp:tau.functions) { const size_t i = itmp.first; - CCFunction t(mo_ket_(i).function + factor * tau(i).function, i, MIXED); + CCFunction t(mo_ket_(i).function + tau(i).function, i, MIXED); result.insert(i, t); } @@ -3079,6 +3884,43 @@ CC_vecfunction CCPotentials::make_full_t_intermediate(const CC_vecfunction& tau) return result; } +/// makes the t intermediates + +/// t_i = mo_ket_(i) + tau(i) +/// if the core is frozen the core ti will just be mo_ket_ +CC_vecfunction CCPotentials::make_full_t_intermediate(const CC_vecfunction& tau, const Info& info) { + + if (tau.type == HOLE or tau.size()==0) return CC_vecfunction(info.mo_ket,HOLE); + + CC_vecfunction result(MIXED); + for (size_t i = 0; i < info.mo_ket.size(); i++) { + if (int(i) < info.parameters.freeze()) { + result.insert(i, CCFunction(info.mo_ket[i],i,MIXED)); + } else { + CCFunction t(info.mo_ket[i] + tau(i).function, i, MIXED); + result.insert(i, t); + } + } + return result; +} + +/// makes the t intermediates + +/// t_i = mo_ket_(i) + tau(i) +/// skip frozen core orbitals +CC_vecfunction CCPotentials::make_active_t_intermediate(const CC_vecfunction& tau, const Info& info) { + + if (tau.type == HOLE or tau.size()==0) return CC_vecfunction(info.mo_ket,HOLE); + + CC_vecfunction result(MIXED); + for (size_t i = info.parameters.freeze(); i < info.mo_ket.size(); i++) { + CCFunction t(info.mo_ket[i] + tau(i).function, i, MIXED); + result.insert(i, t); + } + return result; +} + + /// makes the t intermediates /// t_i = mo_ket_(i) + tau /// i = tau.i @@ -3287,7 +4129,7 @@ void CCPotentials::test_pairs() { } -void CCPotentials::test_singles_potential() const { +void CCPotentials::test_singles_potential(Info& info) const { output("Test LRCC2 Singles Potential with empty doubles and compare to CIS"); { @@ -3298,10 +4140,10 @@ void CCPotentials::test_singles_potential() const { Pairs gs_doubles; Pairs ex_doubles; - vector_real_function_3d cis_potential = potential_singles_ex(gs_singles, gs_doubles, ex_singles, ex_doubles, - POT_cis_); - vector_real_function_3d ccs_potential = potential_singles_ex(gs_singles, gs_doubles, ex_singles, ex_doubles, - POT_ccs_); + vector_real_function_3d cis_potential = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, + ex_doubles, POT_cis_, info); + vector_real_function_3d ccs_potential = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, + ex_doubles, POT_ccs_, info); vector_real_function_3d diff = sub(world, cis_potential, ccs_potential); const double d = norm2(world, diff); madness::print_size(world, diff, "difference in potentials"); @@ -3338,9 +4180,9 @@ void CCPotentials::test_singles_potential() const { const CC_vecfunction xbra(tmp, RESPONSE, parameters.freeze()); for (const auto pot:pots) { - const vector_real_function_3d potential = potential_singles_gs(gs_singles, gs_doubles, pot); + const vector_real_function_3d potential = potential_singles_gs(world, gs_singles, gs_doubles, pot, info); const double xpot1 = inner(world, xbra.get_vecfunction(), potential).sum(); - const double xpot2 = potential_energy_gs(xbra, gs_singles, gs_doubles, pot); + const double xpot2 = potential_energy_gs(world, xbra, gs_singles, gs_doubles, pot); const double diff = xpot1 - xpot2; if (world.rank() == 0) std::cout << std::fixed << std::setprecision(10) << @@ -3353,7 +4195,7 @@ void CCPotentials::test_singles_potential() const { if (pot == POT_s2b_) { const vector_real_function_3d pot_s4a = -1.0 * apply_projector(potential, gs_singles); const double xxpot1 = inner(world, xbra.get_vecfunction(), pot_s4a).sum(); - const double xxpot2 = potential_energy_gs(xbra, gs_singles, gs_doubles, POT_s4a_); + const double xxpot2 = potential_energy_gs(world, xbra, gs_singles, gs_doubles, POT_s4a_); const double xdiff = xxpot1 - xxpot2; if (world.rank() == 0) std::cout << @@ -3371,10 +4213,10 @@ void CCPotentials::test_singles_potential() const { CCTimer time_ex(world, "CC2 Singles Response Test"); for (const auto pot:pots) { - const vector_real_function_3d potential = potential_singles_ex(gs_singles, gs_doubles, ex_singles, ex_doubles, - pot); + const vector_real_function_3d potential = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, + ex_doubles, pot, info); const double xpot1 = inner(world, xbra.get_vecfunction(), potential).sum(); - const double xpot2 = potential_energy_ex(xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, pot); + const double xpot2 = potential_energy_ex(world, xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, pot); const double diff = xpot1 - xpot2; if (world.rank() == 0) std::cout << std::fixed << std::setprecision(10) << @@ -3385,11 +4227,11 @@ void CCPotentials::test_singles_potential() const { if (fabs(diff) > parameters.thresh_6D()) output.warning("Test Failed"); else output("Test Passed"); if (pot == POT_s2b_) { - const vector_real_function_3d potential_gs = potential_singles_gs(gs_singles, gs_doubles, pot); + const vector_real_function_3d potential_gs = potential_singles_gs(world, gs_singles, gs_doubles, pot, info); const vector_real_function_3d pot_s4a = -1.0 * add(world, apply_projector(potential, gs_singles), apply_projector(potential_gs, ex_singles)); const double xxpot1 = inner(world, xbra.get_vecfunction(), pot_s4a).sum(); - const double xxpot2 = potential_energy_ex(xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, POT_s4a_); + const double xxpot2 = potential_energy_ex(world, xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, POT_s4a_); const double xdiff = xxpot1 - xxpot2; if (world.rank() == 0) std::cout << @@ -3415,7 +4257,7 @@ void CCPotentials::test() { assign_name(test5); assign_name(test6); - test_singles_potential(); + test_singles_potential(info); output.section("Testing Scalar Multiplication"); { CC_vecfunction test = mo_ket_ * 2.0; diff --git a/src/madness/chem/CCPotentials.h b/src/madness/chem/CCPotentials.h index 05a1fe202d7..5abf3043af1 100644 --- a/src/madness/chem/CCPotentials.h +++ b/src/madness/chem/CCPotentials.h @@ -16,30 +16,46 @@ #include namespace madness { - /// Class which calculates all types of CC2 Potentials class CCPotentials { public: CCPotentials(World& world_, const std::shared_ptr nemo, const CCParameters& param); - void reset_nemo(const std::shared_ptr nemo){ - nemo_=nemo; - mo_ket_=(make_mo_ket(*nemo)); - mo_bra_=(make_mo_bra(*nemo)); - orbital_energies_=init_orbital_energies(*nemo); + void reset_nemo(const std::shared_ptr nemo) { + nemo_ = nemo; + mo_ket_ = (make_mo_ket(*nemo)); + mo_bra_ = (make_mo_bra(*nemo)); + orbital_energies_ = init_orbital_energies(*nemo); }; + Info update_info(const CCParameters& parameters, const std::shared_ptr nemo) const { + Info info; + info.mo_bra = mo_bra().get_vecfunction(); + info.mo_ket = mo_ket().get_vecfunction(); + info.molecular_coordinates = nemo->get_calc()->molecule.get_all_coords_vec(); + info.parameters = parameters; + info.R_square = nemo->R_square; + info.R = nemo->R; + info.U1 = nemo->ncf->U1vec(); + info.U2 = nemo->ncf->U2(); + info.intermediate_potentials = get_potentials; + info.orbital_energies = orbital_energies_; + info.fock=nemo->compute_fock_matrix(nemo->get_calc()->amo, nemo->get_calc()->aocc); + return info; + } + virtual ~CCPotentials() {}; /// forms the regularized functions from Q and Qt Ansatz for CIS(D) where tau=0 and t=mo so that Qt=Q - void test_pair_consistency(const CCPairFunction& u, const size_t i, const size_t j, const CC_vecfunction& x) const; + void test_pair_consistency(const CCPairFunction& u, const size_t i, const size_t j, + const CC_vecfunction& x) const; bool test_compare_pairs(const CCPair& pair1, const CCPair& pair2) const; void test_pairs(); - void test_singles_potential() const; + void test_singles_potential(Info& info) const; void test(); @@ -47,12 +63,13 @@ class CCPotentials { real_function_6d make_6D_pair(const CCPair& pair) const; /// Function to load a function from disc - /// @param[in] the function which will be loaded + /// @param[in] f the function which will be loaded /// @param[in] name of the file in which the function was stored /// @return true or false depending on if the data was found on disc - template + template bool load_function(Function& f, const std::string name) const { - bool exists = archive::ParallelInputArchive::exists(world, name.c_str()); + bool exists = archive::ParallelInputArchive< + archive::BinaryFstreamInputArchive>::exists(world, name.c_str()); if (exists) { if (world.rank() == 0) print("loading function", name); archive::ParallelInputArchive ar(world, name.c_str()); @@ -62,7 +79,10 @@ class CCPotentials { f.truncate(); f.print_size(name); return true; - } else return false; + } else { + if (world.rank()==0) print("could not find function",name); + } + return false; } /// Plotting (convenience) @@ -72,7 +92,7 @@ class CCPotentials { void plot(const real_function_3d& f, const std::string& msg, const bool doprint = true) const; /// print size of a function - template + template void print_size(const Function& f, const std::string& msg, const bool print = true) const { if (print) f.print_size(msg); } @@ -85,6 +105,11 @@ class CCPotentials { return orbital_energies_[i] + orbital_energies_[j]; } + /// returns epsilon_i + epsilon_j (needed for bsh operator of pairs) + static double get_epsilon(const size_t i, const size_t j, const Info& info) { + return info.orbital_energies[i] + info.orbital_energies[j]; + } + /// returns a vector of all active mos without nuclear correlation factor (nemos) vector_real_function_3d get_active_mo_ket() const { vector_real_function_3d result; @@ -102,14 +127,14 @@ class CCPotentials { /// get the corresponding mo bra vectors to a ket vector vector_real_function_3d get_mo_bra(const CC_vecfunction& ket) const { vector_real_function_3d result; - for (const auto& ktmp:ket.functions) { + for (const auto& ktmp : ket.functions) { result.push_back(mo_bra_(ktmp.first).function); } return result; } /// returns a specific mo - CCFunction mo_ket(const size_t& i) const { + CCFunction mo_ket(const size_t& i) const { return mo_ket_(i); } @@ -119,7 +144,7 @@ class CCPotentials { } /// returns a specific mo multiplied with the squared nuclear correlation factor - CCFunction mo_bra(const size_t& i) const { + CCFunction mo_bra(const size_t& i) const { return mo_bra_(i); } @@ -142,17 +167,28 @@ class CCPotentials { /// makes the t intermediates /// t_i = mo_ket_(i) + factor*tau(i) /// if factor!=1 then we can not use intermediates and set the type to UNDEFINED - CC_vecfunction make_t_intermediate(const CC_vecfunction& tau, const double factor = 1.0) const; + CC_vecfunction make_t_intermediate(const CC_vecfunction& tau, const CCParameters& parameters) const; /// makes the t intermediates /// t_i = mo_ket_(i) + factor*tau(i) /// if the core is frozen the core ti will just be mo_ket_ CC_vecfunction make_full_t_intermediate(const CC_vecfunction& tau) const; + /// makes the t intermediates + /// t_i = mo_ket_(i) + tau(i) + /// if the core is frozen the core ti will just be mo_ket_ + static CC_vecfunction make_full_t_intermediate(const CC_vecfunction& tau, const Info& info); + + /// makes the t intermediates + + /// t_i = mo_ket_(i) + tau(i) + /// skip frozen orbitals + static CC_vecfunction make_active_t_intermediate(const CC_vecfunction& tau, const Info& info); + /// makes the t intermediates /// t_i = mo_ket_(i) + tau /// i = tau.i - CCFunction make_t_intermediate(const CCFunction& tau) const; + CCFunction make_t_intermediate(const CCFunction& tau) const; private: /// Helper function to initialize the const mo_bra and ket elements adn orbital energies @@ -168,6 +204,17 @@ class CCPotentials { init_orbital_energies(const Nemo& nemo) const; public: + /// return the regularized MP2 ansatz: |\tau_ij> = |u_ij> + Q12 f12 |ij> + static CCPair make_pair_mp2(const real_function_6d& u, const size_t i, const size_t j, const Info& info); + + /// return the regularized CC2 ansatz: |\tau_ij> = |u_ij> + Q12t f12 |t_i t_j> + static CCPair make_pair_cc2(const real_function_6d& u, const CC_vecfunction& gs_singles, + const size_t i, const size_t j, const Info& info); + + /// return the regularized CC2 ansatz: |x_ij> = |u_ij> + Q12t f12 |t_i t_j> + ????? + static CCPair make_pair_lrcc2(World& world, const CalcType& ctype, const real_function_6d& u, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const size_t i, const size_t j, const Info& info); // Pair functions @@ -207,33 +254,39 @@ class CCPotentials { /// Compute pair correlation energies of MP2 or CC2 Ground State // Off diagonal pair energies are multiplied with 2.0 to acount for their permuted partners - /// @param[in] The Pair_function - /// @param[in] The Singles (for MP2 give empty function) for the energy contribution over disconnected doubles + /// @param[in] u the Pair_function + /// @param[in] singles the Singles (for MP2 give empty function) for the energy contribution over disconnected doubles /// @param[out] 2* - , where i and j are determined by u (see CC_Pair class) - double - compute_pair_correlation_energy(const CCPair& u, const CC_vecfunction& singles = CC_vecfunction(PARTICLE)) const; + static double + compute_pair_correlation_energy(World& world, + const Info& info, + const CCPair& u, + const CC_vecfunction& singles = CC_vecfunction(PARTICLE)); /// Compute CC2 correlation energy /// @param[in] The Pair_function /// @param[out] \sum_{ij} 2* - + 2* - , where i and j are determined by u (see CC_Pair class) /// since we do not compute all pairs (symmetry reasons) the off diagonal pair energies are conted twice /// the cc2 pair functions are dependent on the doubles (see CC_Pair structure, and make_pair function) so make shure they are updated - double - compute_cc2_correlation_energy(const CC_vecfunction& singles, const Pairs& doubles) const; + /// @param world + /// @param info + static double + compute_cc2_correlation_energy(World& world, const CC_vecfunction& singles, const Pairs& doubles, + const Info& info); - double - compute_kinetic_energy(const vector_real_function_3d& xbra, const vector_real_function_3d& xket) const; + static double + compute_kinetic_energy(World& world, const vector_real_function_3d& xbra, const vector_real_function_3d& xket); - /// returns \f$ + \f$ - double - compute_cis_expectation_value(const CC_vecfunction& x, const vector_real_function_3d& V, - const bool print = true) const; + /// compute the expectation value excitation energy using the CIS/CCS/CC2 singles + static double + compute_cis_expectation_value(World& world, const CC_vecfunction& x, + const vector_real_function_3d& V, const bool print, const Info& info); /// Something like a pair energy for CIS(D)/LRCC2 to estimate energy convergence /// calculates the response part of s2b and s2c which are independent of the mp2 amplitudes - double - compute_excited_pair_energy(const CCPair& d, const CC_vecfunction& x) const; + static double + compute_excited_pair_energy(World& world, const CCPair& d, const CC_vecfunction& x, const Info& info); /// Compute the CIS(D) Energy Correction to CIS double @@ -253,7 +306,7 @@ class CCPotentials { /// Static function for the 6D Fock residue for use in macrotask static madness::real_function_6d fock_residue_6d_macrotask(World& world, const CCPair& u, const CCParameters& parameters, - const std::vector< madness::Vector >& all_coords_vec, + const std::vector>& all_coords_vec, const std::vector& mo_ket, const std::vector& mo_bra, const std::vector& U1, @@ -262,19 +315,40 @@ class CCPotentials { /// Static version of make_constant_part_mp2 to be called from macrotask. static madness::real_function_6d make_constant_part_mp2_macrotask(World& world, const CCPair& pair, const std::vector& mo_ket, - const std::vector& mo_bra, - const CCParameters& parameters, const real_function_3d& Rsquare, - const std::vector& U1, - const std::vector argument); + const std::vector& mo_bra, + const CCParameters& parameters, const real_function_3d& Rsquare, + const std::vector& U1, + const std::vector argument); + + /// Compute the constant part of MP2, CC2 or LR-CC2 + /// + /// depending on pair.calc_type different terms are included in the constant part. + /// @param[in] pair the (empty) pair function, determines the terms in the constant part, contains some bookkeeping information (bsh_eps, i, j) + /// @param[in] gs_singles the ground-state singles for CC2 (used for the T1-transformed SO projector), may be left empty for MP2 + /// @param[in] ex_singles the excited-state singles for CC2 (used for the T1-transformed SO projector), may be left empty for MP2 and GS-CC2 + /// @param[in] info the Info object, containing the some basic quantities (MOs, parameters, etc) + /// @return the constant part of the MP2, CC2 or LR-CC2: G(Q12(g~|titj>)) + static madness::real_function_6d + make_constant_part_macrotask(World& world, const CCPair& pair, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info); + /// Static function to iterate the mp2 pairs from macrotask static madness::real_function_6d update_pair_mp2_macrotask(World& world, const CCPair& pair, const CCParameters& parameters, - const std::vector< madness::Vector >& all_coords_vec, - const std::vector& mo_ket, - const std::vector& mo_bra, - const std::vector& U1, - const real_function_3d& U2, const real_function_6d& mp2_coupling); + const std::vector>& all_coords_vec, + const std::vector& mo_ket, + const std::vector& mo_bra, + const std::vector& U1, + const real_function_3d& U2, const real_function_6d& mp2_coupling); + + + /// iterate a pair for MP2, CC2, LRCC2 on constant singles + static CCPair iterate_pair_macrotask(World& world, + const CCPair& pair, const CC_vecfunction& gs_singles, + const CC_vecfunction& ex_singles, + const real_function_6d& coupling, const Info& info, const long maxiter); /// Function evaluates the consant part of the ground state for CC2 @@ -288,7 +362,7 @@ class CCPotentials { /// where t(1/2) = |i> + 1/2|tau_i> , t(1/2) = th real_function_6d make_constant_part_cc2_gs(const CCPair& u, const CC_vecfunction& tau, - const real_convolution_6d *Gscreen = NULL) const; + const real_convolution_6d* Gscreen = NULL) const; /// Function evaluates the consant part of the ground state for CC2 if the Qt Ansatz is used /// @param[out]The result is \f$ Q12(G(Qt12((Vreg+V_{coupling})|titj> + [F,Qt]f12|titj>))) \f$ with \f$ |t_k> = |tau_k> + |k> and Qt = Q - \sum_k |tau_k> + 1/2|tau_i> , t(1/2) = th real_function_6d make_constant_part_cc2_Qt_gs(const CCPair& u, const CC_vecfunction& tau, - const real_convolution_6d *Gscreen = NULL) const; + const real_convolution_6d* Gscreen = NULL) const; /// Function evaluates the consant part of the Excited state for CIS(D) if the Q Ansatz is used real_function_6d - make_constant_part_cispd(const CCPair& u, const CC_vecfunction& x, const real_convolution_6d *Gscreen = NULL) const; + make_constant_part_cispd(const CCPair& u, const CC_vecfunction& x, + const real_convolution_6d* Gscreen = NULL) const; /// Function evaluates the consant part of the Excited state for CIS(D) if the Qt Ansatz is used real_function_6d make_constant_part_cispd_Qt(const CCPair& u, const CC_vecfunction& x, - const real_convolution_6d *Gscreen = NULL) const; + const real_convolution_6d* Gscreen = NULL) const; /// Function evaluates the consant part of the Excited state for CC2 if the Q Ansatz is used real_function_6d make_constant_part_cc2_ex(const CCPair& u, const CC_vecfunction& tau, const CC_vecfunction& x, - const real_convolution_6d *Gscreen = NULL); + const real_convolution_6d* Gscreen = NULL); /// Function evaluates the consant part of the Excited state for CC2 if the Qt Ansatz is used real_function_6d make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction& tau, const CC_vecfunction& x, - const real_convolution_6d *Gscreen = NULL); + const real_convolution_6d* Gscreen = NULL); /// Apply the Regularization potential /// \f$ V_{reg} = [ U_e - [K,f12] + f12(F12-eij) ]|titj> \f$ @@ -329,18 +404,30 @@ class CCPotentials { /// @param[in] pointer to bsh operator (in order to screen) /// @param[out] the regularization potential (unprojected), see equation above real_function_6d - apply_Vreg(const CCFunction& ti, const CCFunction& tj, const real_convolution_6d *Gscreen = NULL) const; + apply_Vreg(const CCFunction& ti, const CCFunction& tj, + const real_convolution_6d* Gscreen = NULL) const; + + /// Apply the Regularization potential + /// \f$ V_{reg} = [ U_e - [K,f12] + f12(F12-eij) + [F,Qt] ]|titj> \f$ + /// @param[in] ti, first function in the ket, for MP2 it is the Orbital, for CC2 the relaxed Orbital t_i=\phi_i + \tau_i + /// @param[in] tj, second function in the ket ... + /// @param[in] pointer to bsh operator (in order to screen) + /// @param[out] the regularization potential (unprojected), see equation above + std::vector> + static apply_Vreg(World& world, const CCFunction& ti, const CCFunction& tj, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info, const std::vector& argument, const double bsh_eps); /// Static version of apply_Vreg to be used from a macrotask. Will eventually replace former. madness::real_function_6d static apply_Vreg_macrotask(World& world, const std::vector& mo_ket, - const std::vector& mo_bra, - const CCParameters& parameters, const real_function_3d& Rsquare, - const std::vector& U1, const size_t& i, const size_t& j, - const FuncType& x_type, const FuncType& y_type, - const std::vector argument, - const real_convolution_6d *Gscreen = NULL); + const std::vector& mo_bra, + const CCParameters& parameters, const real_function_3d& Rsquare, + const std::vector& U1, const size_t& i, const size_t& j, + const FuncType& x_type, const FuncType& y_type, + const std::vector argument, + const real_convolution_6d* Gscreen = NULL); /// evaluates: \f$ (F(1)-ei)|ti> (x) |tj> + |ti> (x) (F(2)-ej)|tj> \f$ with the help of the singles potential /// singles equation is: (F-ei)|ti> = - V(ti) @@ -351,8 +438,20 @@ class CCPotentials { /// @param[in] tj, second function in the ket ... /// @param[in] pointer to bsh operator (in order to screen) real_function_6d - apply_reduced_F(const CCFunction& ti, const CCFunction& tj, const real_convolution_6d *Gscreen = NULL) const; + apply_reduced_F1(const CCFunction& ti, const CCFunction& tj, + const real_convolution_6d* Gscreen = NULL) const; + /// evaluates: \f$ (F(1)-ei)|ti> (x) |tj> + |ti> (x) (F(2)-ej)|tj> \f$ with the help of the singles potential + /// singles equation is: (F-ei)|ti> = - V(ti) + /// response singles equation: (F-ei-omega)|xi> = - V(xi) + /// response: \f$ (F12-ei-ej-omega)|xitj> = (F1 - ei - omega)|xi> (x) |tj> + |xi> (x) (F2-ej)|tj> \f$ + /// so in both cases the result will be: |V(ti),tj> + |ti,V(tj)> + /// @param[in] ti, first function in the ket, for MP2 it is the Orbital, for CC2 the relaxed Orbital t_i=\phi_i + \tau_i + /// @param[in] tj, second function in the ket ... + /// @param[in] pointer to bsh operator (in order to screen) + real_function_6d + static apply_reduced_F(World& world, const CCFunction& ti, const CCFunction& tj, + const Info& info, const real_convolution_6d* Gscreen = NULL); /// Apply Ue on a tensor product of two 3d functions: Ue(1,2) |x(1)y(2)> (will be either |ij> or |\tau_i\tau_j> or mixed forms) /// The Transformed electronic regularization potential (Kutzelnigg) is R_{12}^{-1} U_e R_{12} with R_{12} = R_1*R_2 @@ -365,7 +464,8 @@ class CCPotentials { /// @param[in] The BSH operator to screen: Has to be in NS form, Gscreen->modified == true /// @return R^-1U_eR|x,y> the transformed electronic smoothing potential applied on |x,y> : real_function_6d - apply_transformed_Ue(const CCFunction& x, const CCFunction& y, const real_convolution_6d *Gscreen = NULL) const; + apply_transformed_Ue(const CCFunction& x, const CCFunction& y, + const real_convolution_6d* Gscreen = NULL) const; /// Static version of apply_transformed_Ue for the use in a macrotask. /// Will eventually replace the former. @@ -374,7 +474,25 @@ class CCPotentials { const CCParameters& parameters, const real_function_3d& Rsquare, const std::vector& U1, const size_t& i, const size_t& j, const FuncType& x_type, const FuncType& y_type, - const real_convolution_6d *Gscreen = NULL); + const real_convolution_6d* Gscreen = NULL); + + real_function_6d + static apply_Ue(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const Info& info, const real_convolution_6d* Gscreen); + + + static real_function_6d + apply_KffK(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const Info& info, const real_convolution_6d* Gscreen); + static CCPairFunction + apply_commutator_F_Qt_f12(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info, const real_convolution_6d* Gscreen); + + static CCPairFunction + apply_commutator_F_dQt_f12(World& world, const CCFunction& phi_i, const CCFunction& phi_j, + const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, + const Info& info, const real_convolution_6d* Gscreen); /// Apply Ue on a tensor product of two 3d functions: Ue(1,2) |x(1)y(2)> (will be either |ij> or |\tau_i\tau_j> or mixed forms) /// The Transformed electronic regularization potential (Kutzelnigg) is R_{12}^{-1} U_e R_{12} with R_{12} = R_1*R_2 @@ -388,20 +506,21 @@ class CCPotentials { /// the f12K|xy> part will be screened with the BSH while the Kf12|xy> can not be screened with the BSH operator but maybe with the coulomb /// @return R^-1U_eR|x,y> the transformed electronic smoothing potential applied on |x,y> : real_function_6d - apply_exchange_commutator(const CCFunction& x, const CCFunction& y, - const real_convolution_6d *Gscreen = NULL) const; + apply_exchange_commutator(const CCFunction& x, const CCFunction& y, + const real_convolution_6d* Gscreen = NULL) const; - real_function_6d - static apply_exchange_commutator_macrotask(World& world, const std::vector& mo_ket, - const std::vector& mo_bra, const real_function_3d& Rsquare, - const size_t& i, const size_t& j, const CCParameters& parameters, - const FuncType& x_type, const FuncType& y_type, - const real_convolution_6d *Gscreen = NULL); + real_function_6d + static apply_exchange_commutator_macrotask(World& world, const std::vector& mo_ket, + const std::vector& mo_bra, + const real_function_3d& Rsquare, + const size_t& i, const size_t& j, const CCParameters& parameters, + const FuncType& x_type, const FuncType& y_type, + const real_convolution_6d* Gscreen = NULL); /// This applies the exchange commutator, see apply_exchange_commutator function for information real_function_6d - apply_exchange_commutator1(const CCFunction& x, const CCFunction& y, - const real_convolution_6d *Gscreen = NULL) const; + apply_exchange_commutator1(const CCFunction& x, const CCFunction& y, + const real_convolution_6d* Gscreen = NULL) const; /// Helper Function which performs the operation \f$ \f$ /// @param[in] function x, if nuclear correlation is used make sure this is the correct bra function @@ -409,38 +528,43 @@ class CCPotentials { /// @param[in] function a, /// @param[in] function b, double - make_xy_gf_ab(const CCFunction& x, const CCFunction& y, const CCFunction& a, const CCFunction& b) const; + make_xy_gf_ab(const CCFunction& x, const CCFunction& y, const CCFunction& a, + const CCFunction& b) const; - double make_xy_ff_ab(const CCFunction& x, const CCFunction& y, const CCFunction& a, const CCFunction& b) const { + double make_xy_ff_ab(const CCFunction& x, const CCFunction& y, + const CCFunction& a, const CCFunction& b) const { error("xy_ff_ab not yet implemented"); return 0.0; } /// apply the operator gf = 1/(2\gamma)*(Coulomb - 4\pi*BSH_\gamma) /// works only if f = (1-exp(-\gamma*r12))/(2\gamma) - real_function_3d - apply_gf(const real_function_3d& f) const; + static real_function_3d + apply_gf(World& world, const real_function_3d& f, const Info& info); /// returns /// loops over every entry in the vector and accumulates results /// helper function for CIS(D) energy - double - make_xy_op_u(const CCFunction& x, const CCFunction& y, const CCConvolutionOperator& op, - const std::vector>& u) const; + static double + make_xy_op_u(const CCFunction& x, const CCFunction& y, + const CCConvolutionOperator& op, + const std::vector>& u); /// returns for a vector of CCPairFunction /// the result is accumulated for every vercotr /// helper functions for CIS(D) energy - double - make_xy_u(const CCFunction& x, const CCFunction& y, const std::vector>& u) const; + static double + make_xy_u(const CCFunction& x, const CCFunction& y, + const std::vector>& u); /// Functions which operate with the CCPairFunction structure /// @param[in] function x, if nuclear correlation is used make sure this is the correct bra function /// @param[in] function y, if nuclear correlation is used make sure this is the correct bra function /// @param[in] CCPairFunction u, - double - make_xy_op_u(const CCFunction& x, const CCFunction& y, const CCConvolutionOperator& op, - const CCPairFunction& u) const; + static double + make_xy_op_u(const CCFunction& x, const CCFunction& y, + const CCConvolutionOperator& op, + const CCPairFunction& u); /// Helper Function which returns /// @return @@ -449,19 +573,21 @@ class CCPotentials { /// @param[in] function a, /// @param[in] function b, double - make_xy_op_ab(const CCFunction& x, const CCFunction& y, const CCConvolutionOperator& op, const CCFunction& a, - const CCFunction& b) const; + make_xy_op_ab(const CCFunction& x, const CCFunction& y, + const CCConvolutionOperator& op, const CCFunction& a, + const CCFunction& b) const; /// get the correct pair function as vector of CCPairFunction functions /// @param[in] The pair functions /// @param[out] The demanded pair function as vector of CCPairFunction functions (includes regularization tails) - std::vector> - get_pair_function(const Pairs& pairs, const size_t i, const size_t j) const; + static std::vector> + get_pair_function(const Pairs& pairs, const size_t i, const size_t j); /// returns _2 - real_function_3d - apply_s2b_operation(const CCFunction& bra, const CCPairFunction& u, const size_t particle) const; + static real_function_3d + apply_s2b_operation(World& world, const CCFunction& bra, const CCPairFunction& u, + const size_t particle, const Info& info); /// dummy to avoid confusion and for convenience real_function_6d swap_particles(const real_function_6d& f) const { @@ -469,8 +595,8 @@ class CCPotentials { } /// swap the particles of the CCPairFunction and return a new vector of swapped functions - std::vector> swap_particles(const std::vector>& f) const { - std::vector> swapped; + static std::vector> swap_particles(const std::vector>& f) { + std::vector> swapped; for (size_t i = 0; i < f.size(); i++) swapped.push_back(f[i].swap_particles()); return swapped; } @@ -479,8 +605,8 @@ class CCPotentials { /// @param[in] 6D function 1 /// @param[in] 6D function 2 double - overlap(const CCPairFunction& f1, const CCPairFunction& f2) const { - return inner(f1,f2,nemo_->ncf->square()); + overlap(const CCPairFunction& f1, const CCPairFunction& f2) const { + return inner(f1, f2, nemo_->ncf->square()); }; /// Computes the squared norm of the pair function @@ -513,8 +639,9 @@ class CCPotentials { /// Apply the Qt projector on a CCPairFunction /// works in principle like apply_Ot - CCPairFunction - apply_Qt(const CCPairFunction& f, const CC_vecfunction& t, const size_t particle, const double c = 1.0) const; + CCPairFunction + apply_Qt(const CCPairFunction& f, const CC_vecfunction& t, const size_t particle, + const double c = 1.0) const; /// Apply Ot projector on decomposed or op_decomposed 6D function /// The function does not work with type==pure right now (not needed) @@ -525,14 +652,14 @@ class CCPotentials { /// for CCPairFunction type == op_decomposd the function si f=op|xy> and we have for particle==1 /// \f$ a_k = t_k \f$ /// \f$ b_k = *y \f$ - CCPairFunction - apply_Ot(const CCPairFunction& f, const CC_vecfunction& t, const size_t particle) const; + CCPairFunction + apply_Ot(const CCPairFunction& f, const CC_vecfunction& t, const size_t particle) const; /// Apply the Greens Operator to a CCPairFunction /// For CCPairFunction only type pure and type decomposed is supported /// for the op_decomposed type a pure function can be constructed (not needed therefore not implemented yet) real_function_6d - apply_G(const CCPairFunction& u, const real_convolution_6d& G) const; + apply_G(const CCPairFunction& u, const real_convolution_6d& G) const; /// Apply BSH Operator and count time real_function_6d apply_G(const real_function_6d& f, const real_convolution_6d& G) const { @@ -548,26 +675,28 @@ class CCPotentials { /// Calculates the CC2 singles potential for the ground state: result = Fock_residue + V /// the V part is stored in the intermediate_potentials structure - vector_real_function_3d - get_CC2_singles_potential_gs(const CC_vecfunction& singles, const Pairs& doubles) const; + static vector_real_function_3d + get_CC2_singles_potential_gs(World& world, const CC_vecfunction& singles, const Pairs& doubles, + Info& info); /// Calculates the CCS/CIS singles potential for the excited state: result = Fock_residue + V /// the V part is stored in the intermediate_potentials structure /// the expectation value is calculated and updated - vector_real_function_3d - get_CCS_potential_ex(CC_vecfunction& x, const bool print = false) const; + static vector_real_function_3d + get_CCS_potential_ex(World& world, const CC_vecfunction& x, const bool print, Info& info); /// Calculates the CC2 singles potential for the Excited state: result = Fock_residue + V /// the V part is stored in the intermediate_potentials structure - vector_real_function_3d - get_CC2_singles_potential_ex(const CC_vecfunction& gs_singles, const Pairs& gs_doubles, - CC_vecfunction& ex_singles, const Pairs& response_doubles) const; + static vector_real_function_3d + get_CC2_singles_potential_ex(World& world, const CC_vecfunction& gs_singles, + const Pairs& gs_doubles, const CC_vecfunction& ex_singles, + const Pairs& response_doubles, Info& info); /// Calculates the CC2 singles potential for the Excited state: result = Fock_residue + V /// the V part is stored in the intermediate_potentials structure vector_real_function_3d - get_ADC2_singles_potential(const Pairs& gs_doubles, CC_vecfunction& ex_singles, - const Pairs& response_doubles) const; + get_ADC2_singles_potential(World& world, const Pairs& gs_doubles, + CC_vecfunction& ex_singles, const Pairs& response_doubles, Info& info) const; /// The potential manager for the ground state potential /// CC2 singles potential parts of the ground state @@ -577,9 +706,10 @@ class CCPotentials { /// @param[in] Doubles of the Ground State /// @param[in] Name of the potential /// @param[out] the potential (without Q application) + /// @param world double - potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunction& singles, const Pairs& doubles, - const PotentialType& name) const; + potential_energy_gs(World& world, const CC_vecfunction& bra, const CC_vecfunction& singles, + const Pairs& doubles, const PotentialType& name) const; /// The potential manager for the ground state potential /// CC2 singles potential parts of the ground state @@ -588,8 +718,10 @@ class CCPotentials { /// @param[in] Doubles of the Ground State /// @param[in] Name of the potential /// @param[out] the potential (without Q application) - vector_real_function_3d - potential_singles_gs(const CC_vecfunction& singles, const Pairs& doubles, const PotentialType& name) const; + /// @param world + static vector_real_function_3d + potential_singles_gs(World& world, const CC_vecfunction& singles, const Pairs& doubles, + const PotentialType& name, Info& info); /// The integra manager for the excited state potential /// CC2 singles potential parts of the ground state @@ -601,10 +733,11 @@ class CCPotentials { /// @param[in] Doubles of the Excited State /// @param[in] Name of the potential /// @param[out] the potential (without Q application) + /// @param world double - potential_energy_ex(const CC_vecfunction& bra, const CC_vecfunction& singles_gs, const Pairs& doubles_gs, - const CC_vecfunction& singles_ex, const Pairs& doubles_ex, - const PotentialType& name) const; + potential_energy_ex(World& world, const CC_vecfunction& bra, const CC_vecfunction& singles_gs, + const Pairs& doubles_gs, const CC_vecfunction& singles_ex, + const Pairs& doubles_ex, const PotentialType& name) const; /// The potential manager for the excited state potential /// CC2 singles potential parts of the ground state @@ -615,21 +748,22 @@ class CCPotentials { /// @param[in] Doubles of the Excited State /// @param[in] Name of the potential /// @param[out] the potential (without Q application) - vector_real_function_3d - potential_singles_ex(const CC_vecfunction& singles_gs, const Pairs& doubles_gs, - const CC_vecfunction& singles_ex, const Pairs& doubles_ex, - const PotentialType& name) const; + /// @param world + static vector_real_function_3d + potential_singles_ex(World& world, const CC_vecfunction& singles_gs, + const Pairs& doubles_gs, const CC_vecfunction& singles_ex, + const Pairs& doubles_ex, const PotentialType& name, Info& info); /// The Fock operator is partitioned into F = T + Vn + R /// the fock residue R= 2J-K+Un for closed shell is computed here /// J_i = \sum_k |tau_i> /// K_i = \sum_k |k> - vector_real_function_3d - fock_residue_closed_shell(const CC_vecfunction& singles) const; + static vector_real_function_3d + fock_residue_closed_shell(World& world, const CC_vecfunction& singles, const Info& info); /// the K operator runs over ALL orbitals (also the frozen ones) - real_function_3d - K(const CCFunction& f) const; + static real_function_3d + K(World& world, const CCFunction& f, const Info& info); /// static version of k above for access from macrotask. will eventually replace former. real_function_3d @@ -661,13 +795,13 @@ class CCPotentials { /// Static version of apply_K above for access from macrotask. Will eventually replace former. real_function_6d static apply_K_macrotask(World& world, const std::vector& mo_ket, - const std::vector& mo_bra, - const real_function_6d& u, const size_t& particle, const CCParameters& parameters); + const std::vector& mo_bra, + const real_function_6d& u, const size_t& particle, const CCParameters& parameters); /// Apply the Exchange operator on a tensor product multiplied with f12 /// !!! Prefactor of (-1) is not inclued in K here !!!! real_function_6d - apply_Kf(const CCFunction& x, const CCFunction& y) const; + apply_Kf(const CCFunction& x, const CCFunction& y) const; /// Apply fK on a tensor product of two 3D functions /// fK|xy> = fK_1|xy> + fK_2|xy> @@ -675,28 +809,56 @@ class CCPotentials { /// @param[in] y, the second 3D function in |xy> structure holds index i and type (HOLE, PARTICLE, MIXED, UNDEFINED) /// @param[in] BSH operator to screen, has to be in modified NS form, Gscreen->modified()==true; real_function_6d - apply_fK(const CCFunction& x, const CCFunction& y, const real_convolution_6d *Gscreen = NULL) const; + apply_fK(const CCFunction& x, const CCFunction& y, + const real_convolution_6d* Gscreen = NULL) const; /// Creates a 6D function with the correlation factor and two given CCFunctions real_function_6d - make_f_xy(const CCFunction& x, const CCFunction& y, const real_convolution_6d *Gscreen = NULL) const; + make_f_xy(const CCFunction& x, const CCFunction& y, + const real_convolution_6d* Gscreen = NULL) const; + + /// Creates a 6D function with the correlation factor and two given CCFunctions + real_function_6d + static make_f_xy(World& world, const CCFunction& x, const CCFunction& y, + const Info& info, const real_convolution_6d* Gscreen = NULL); real_function_6d - static make_f_xy_macrotask( World& world, const real_function_3d& x_ket, const real_function_3d& y_ket, - const real_function_3d& x_bra, const real_function_3d& y_bra, - const size_t& i, const size_t& j, const CCParameters& parameters, - const FuncType& x_type, const FuncType& y_type, - const real_convolution_6d *Gscreen = NULL); + static make_f_xy_macrotask(World& world, const real_function_3d& x_ket, const real_function_3d& y_ket, + const real_function_3d& x_bra, const real_function_3d& y_bra, + const size_t& i, const size_t& j, const CCParameters& parameters, + const FuncType& x_type, const FuncType& y_type, + const real_convolution_6d* Gscreen = NULL); /// unprojected ccs potential /// returns 2kgtk|ti> - kgti|tk> /// the ccs potential: ti = ti and tk = tauk - vector_real_function_3d - ccs_unprojected(const CC_vecfunction& ti, const CC_vecfunction& tk) const; - + static vector_real_function_3d + ccs_unprojected(World& world, const CC_vecfunction& ti, const CC_vecfunction& tk, const Info& info); + + /// return RMS norm and max norm of residuals + template + static std::pair residual_stats(const std::vector>& residual) { + if (residual.size() == 0) return std::make_pair(0.0, 0.0); + World& world = residual.front().world(); + auto errors = norm2s(world, residual); + double rnorm = 0.0, maxrnorm = 0.0; + for (double& e : errors) { + maxrnorm = std::max(maxrnorm, e); + rnorm += e * e; + } + rnorm = sqrt(rnorm / errors.size()); + return std::make_pair(rnorm, maxrnorm); + } - real_function_3d - make_density(const CC_vecfunction& x) const; + static void print_convergence(const std::string name, const double rmsresidual, const double maxresidual, + const double energy_diff, const int iteration) { + const std::size_t bufsize = 255; + char msg[bufsize]; + std::snprintf(msg, bufsize, + "convergence of %s in iteration %2d at time %8.1fs: rms/max residual, energy change %.1e %.1e %.1e", + name.c_str(), iteration, wall_time(), rmsresidual, maxresidual,energy_diff); + print(msg); + } // integrals from singles potentials @@ -722,7 +884,8 @@ class CCPotentials { /// -(2 - )* double - x_s6(const CC_vecfunction& x, const CC_vecfunction& t1, const CC_vecfunction& t2, const CC_vecfunction& t3) const; + x_s6(const CC_vecfunction& x, const CC_vecfunction& t1, const CC_vecfunction& t2, + const CC_vecfunction& t3) const; /// 2.0 - double @@ -746,22 +909,26 @@ class CCPotentials { // result: \sum_k( 2_2 - _1 ) // singles are not needed explicitly but to determine if it is response or ground state + ///@param world ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed ///@param[in] doubles:Pairs of CC_Pairs (GS or Response) + ///@param info ///@param[out] \f$ \sum_k( 2_2 - _1 ) \f$ /// Q-Projector is not applied, sign is correct /// if the s2b potential has already been calculated it will be loaded from the intermediate_potentials structure - vector_real_function_3d - s2b(const CC_vecfunction& singles, const Pairs& doubles) const; + static vector_real_function_3d + s2b(World& world, const CC_vecfunction& singles, const Pairs& doubles, Info& info); // result: -\sum_k( _2 - _1) // singles are not needed explicitly but to determine if it is response or ground state + ///@param world ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed ///@param[in] doubles:Pairs of CC_Pairs (GS or Response) + ///@param info ///@param[out] \f$ -\sum_k( _2 - _1) \f$ /// Q-Projector is not applied, sign is correct - vector_real_function_3d - s2c(const CC_vecfunction& singles, const Pairs& doubles) const; + static vector_real_function_3d + s2c(World& world, const CC_vecfunction& singles, const Pairs& doubles, Info& info); /// the S4a potential can be calcualted from the S2b potential /// result is \f$ s4a_i = - *|tau_l> \f$ @@ -769,43 +936,47 @@ class CCPotentials { s4a_from_s2b(const vector_real_function_3d& s2b, const CC_vecfunction& singles) const; // result: -\sum_k( _2 - _1) | kgtaui = + ///@param world ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed ///@param[in] doubles:Pairs of CC_Pairs (GS or Response) + ///@param info ///@param[out] \f$ -( _2 - _1) | kgtaui = | taui=singles_i \f$ /// Q-Projector is not applied, sign is correct - vector_real_function_3d - s4b(const CC_vecfunction& singles, const Pairs& doubles) const; + static vector_real_function_3d + s4b(World& world, const CC_vecfunction& singles, const Pairs& doubles, const Info& info); + ///@param world ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed ///@param[in] doubles:Pairs of CC_Pairs (GS or Response) + ///@param info ///@param[out] \f$ ( 4_2 - 2_1 - 2_2 + _1 ) \f$ /// Q-Projector is not applied, sign is correct - vector_real_function_3d - s4c(const CC_vecfunction& singles, const Pairs& doubles) const; + static vector_real_function_3d + s4c(World& world, const CC_vecfunction& singles, const Pairs& doubles, const Info& info); // update the intermediates void update_intermediates(const CC_vecfunction& t) { g12->update_elements(mo_bra_, t); -// g12.sanity(); + // g12.sanity(); f12->update_elements(mo_bra_, t); -// f12.sanity(); + // f12.sanity(); } /// clear stored potentials /// if a response function is given only the response potentials are cleared (the GS potentials dont change anymore) void clear_potentials(const CC_vecfunction& t) const { - if (t.type == RESPONSE) { output("Clearing Response Singles-Potentials"); get_potentials.clear_response(); - } else { + } + else { output("Clearing all stored Singles-Potentials"); get_potentials.clear_all(); } } -protected: +public: // member variables /// MPI World World& world; @@ -821,19 +992,20 @@ class CCPotentials { std::vector orbital_energies_; /// the coulomb operator with all intermediates public: - std::shared_ptr> g12; + std::shared_ptr> g12; /// the f12 operator with all intermediates - std::shared_ptr> f12; + std::shared_ptr> f12; /// the correlation factor, holds necessary regularized potentials CorrelationFactor corrfac; /// Manager for stored intermediate potentials which are s2c, s2b and the whole singles potentials without fock-residue for GS and EX state mutable CCIntermediatePotentials get_potentials; + /// POD for basis and intermediates + Info info; + public: /// Messenger structure for formated output and to store warnings CCMessenger output; - }; - } /* namespace madness */ #endif /* SRC_APPS_CHEM_CCPOTENTIALS_H_ */ diff --git a/src/madness/chem/CCStructures.cc b/src/madness/chem/CCStructures.cc index 218337c7f30..ffc38ebbd51 100644 --- a/src/madness/chem/CCStructures.cc +++ b/src/madness/chem/CCStructures.cc @@ -64,26 +64,6 @@ CCTimer::info(const bool debug, const double norm) { } -madness::CC_vecfunction -CC_vecfunction::copy() const { - std::vector> vn; - for (auto x : functions) { - const CCFunction fn(madness::copy(x.second.function), x.second.i, x.second.type); - vn.push_back(fn); - } - CC_vecfunction result(vn, type); - result.irrep = irrep; - return result; -} - -std::string -CC_vecfunction::name(const int ex) const { - if (type == PARTICLE) return "tau"; - else if (type == HOLE) return "phi"; - else if (type == MIXED) return "t"; - else if (type == RESPONSE) return std::to_string(ex) + "_" + "x"; - else return "UNKNOWN"; -} void CC_vecfunction::print_size(const std::string& msg) const { @@ -117,21 +97,26 @@ madness::vector_real_function_3d CCIntermediatePotentials::operator()(const CC_vecfunction& f, const PotentialType& type) const { output("Getting " + assign_name(type) + " for " + f.name(0)); vector_real_function_3d result; - if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED)) return current_singles_potential_gs_; - else if (type == POT_singles_ and f.type == RESPONSE) return current_singles_potential_ex_; - else if (type == POT_s2b_ and f.type == PARTICLE) return current_s2b_potential_gs_; - else if (type == POT_s2b_ and f.type == RESPONSE) return current_s2b_potential_ex_; - else if (type == POT_s2c_ and f.type == PARTICLE) return current_s2c_potential_gs_; - else if (type == POT_s2c_ and f.type == RESPONSE) return current_s2c_potential_ex_; + if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED)) result= current_singles_potential_gs_; + else if (type == POT_singles_ and f.type == RESPONSE) result= current_singles_potential_ex_; + else if (type == POT_s2b_ and f.type == PARTICLE) result= current_s2b_potential_gs_; + else if (type == POT_s2b_ and f.type == RESPONSE) result= current_s2b_potential_ex_; + else if (type == POT_s2c_ and f.type == PARTICLE) result= current_s2c_potential_gs_; + else if (type == POT_s2c_ and f.type == RESPONSE) result= current_s2c_potential_ex_; else if (f.type == HOLE) { output(assign_name(type) + " is zero for HOLE states"); - result = zero_functions(world, f.size()); + // result = zero_functions(f.size()); } else { output("ERROR: Potential was not supposed to be stored"); MADNESS_EXCEPTION("Potential was not supposed to be stored", 1); } - if (result.empty()) output("!!!WARNING: Potential is empty!!!"); + if (result.empty()) { + output("!!!WARNING: Potential is empty!!!"); + } else { + World& world=result.front().world(); + if (parameters.debug()) print_size(world,result, "potential"); + } return result; } @@ -139,28 +124,30 @@ CCIntermediatePotentials::operator()(const CC_vecfunction& f, const PotentialTyp madness::real_function_3d CCIntermediatePotentials::operator()(const CCFunction& f, const PotentialType& type) const { output("Getting " + assign_name(type) + " for " + f.name()); - real_function_3d result = real_factory_3d(world); - if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED)) - return current_singles_potential_gs_[f.i - parameters.freeze()]; - else if (type == POT_singles_ and f.type == RESPONSE) return current_singles_potential_ex_[f.i - parameters.freeze()]; - else if (type == POT_s2b_ and f.type == PARTICLE) return current_s2b_potential_gs_[f.i - parameters.freeze()]; - else if (type == POT_s2b_ and f.type == RESPONSE) return current_s2b_potential_ex_[f.i - parameters.freeze()]; - else if (type == POT_s2c_ and f.type == PARTICLE) return current_s2c_potential_gs_[f.i - parameters.freeze()]; - else if (type == POT_s2c_ and f.type == RESPONSE) return current_s2c_potential_ex_[f.i - parameters.freeze()]; + std::vector result; + if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED)) result= current_singles_potential_gs_; + else if (type == POT_singles_ and f.type == RESPONSE) result= current_singles_potential_ex_; + else if (type == POT_s2b_ and f.type == PARTICLE) result= current_s2b_potential_gs_; + else if (type == POT_s2b_ and f.type == RESPONSE) result= current_s2b_potential_ex_; + else if (type == POT_s2c_ and f.type == PARTICLE) result= current_s2c_potential_gs_; + else if (type == POT_s2c_ and f.type == RESPONSE) result= current_s2c_potential_ex_; else if (f.type == HOLE) output(assign_name(type) + " is zero for HOLE states"); - else MADNESS_EXCEPTION("Potential was not supposed to be stored", 1) + else MADNESS_EXCEPTION("Potential was not supposed to be stored", 1); - ; - if (result.norm2() < FunctionDefaults<3>::get_thresh()) - output("WARNING: Potential seems to be zero ||V||=" + std::to_string(double(result.norm2()))); - - return result; + std::string errmsg="CCIntermediatePotential was not computed/stored "+assign_name(type) + " " +assign_name(f.type); + errmsg+="\n --> you might need to iterate the corresponding singles"; + MADNESS_CHECK_THROW(result.size()>(f.i-parameters.freeze()),errmsg.c_str()); + return result[f.i-parameters.freeze()]; } void CCIntermediatePotentials::insert(const vector_real_function_3d& potential, const CC_vecfunction& f, const PotentialType& type) { output("Storing potential: " + assign_name(type) + " for " + f.name(0)); + if (parameters.debug()) { + World& world=potential.front().world(); + print_size(world, potential, "potential"); + } MADNESS_ASSERT(!potential.empty()); if (type == POT_singles_ && (f.type == PARTICLE || f.type == MIXED)) current_singles_potential_gs_ = potential; else if (type == POT_singles_ && f.type == RESPONSE) current_singles_potential_ex_ = potential; @@ -182,19 +169,13 @@ void CCParameters::set_derived_values() { set_derived_value("tight_thresh_6d",thresh_6D()*0.1); set_derived_value("thresh_3d",thresh_6D()*0.01); set_derived_value("tight_thresh_3d",thresh_3D()*0.1); -// if (thresh_operators == uninitialized) thresh_operators = 1.e-6; -// if (thresh_operators_3D == uninitialized) thresh_operators_3D = thresh_operators; -// if (thresh_operators_6D == uninitialized) thresh_operators_6D = thresh_operators; -// if (thresh_bsh_3D == uninitialized) thresh_bsh_3D = thresh_operators_3D; -// if (thresh_bsh_6D == uninitialized) thresh_bsh_6D = thresh_operators_6D; -// if (thresh_poisson == uninitialized) thresh_poisson = thresh_operators_3D; -// if (thresh_f12 == uninitialized) thresh_f12 = thresh_operators_3D; set_derived_value("thresh_ue",tight_thresh_6D()); - set_derived_value("dconv_6d",thresh_6D()); - set_derived_value("dconv_3d",thresh_6D()); + set_derived_value("dconv_6d",3.0*thresh_6D()); + set_derived_value("dconv_3d",0.3*thresh_6D()); set_derived_value("econv",0.1*dconv_6D()); set_derived_value("econv_pairs",econv()); + set_derived_value("no_compute_gs",no_compute()); set_derived_value("no_compute_mp2",no_compute() and no_compute_gs()); set_derived_value("no_compute_cc2",no_compute() and no_compute_gs()); @@ -527,36 +508,79 @@ assign_name(const FuncType& inp) { std::vector -MacroTaskMp2ConstantPart::operator() (const std::vector& pair, const std::vector& mo_ket, - const std::vector& mo_bra, const CCParameters& parameters, - const real_function_3d& Rsquare, const std::vector& U1, +//MacroTaskMp2ConstantPart::operator() (const std::vector& pair, const std::vector& mo_ket, +// const std::vector& mo_bra, const CCParameters& parameters, +// const real_function_3d& Rsquare, const std::vector& U1, +// const std::vector& argument) const { +MacroTaskMp2ConstantPart::operator() (const std::vector& pair, const Info& info, const std::vector& argument) const { - World& world = mo_ket[0].world(); + World& world =info.mo_ket[0].world(); + resultT result = zero_functions_compressed(world, pair.size()); + for (size_t i = 0; i < pair.size(); i++) { + result[i] = CCPotentials::make_constant_part_mp2_macrotask(world, pair[i], info.mo_ket, info.mo_bra, + info.parameters, info.R_square, info.U1, argument); + } + return result; +} + +std::vector +MacroTaskConstantPart::operator() (const std::vector& pair, + const std::vector> & gs_singles, + const std::vector> & ex_singles, + const Info& info) const { + + World& world =info.mo_ket[0].world(); + CC_vecfunction singles(gs_singles, PARTICLE, info.parameters.freeze()); + CC_vecfunction exsingles(ex_singles, RESPONSE, info.parameters.freeze()); + + resultT result = zero_functions_compressed(world, pair.size()); for (size_t i = 0; i < pair.size(); i++) { - result[i] = CCPotentials::make_constant_part_mp2_macrotask(world, pair[i], mo_ket, mo_bra, parameters, - Rsquare, U1, argument); + result[i] = CCPotentials::make_constant_part_macrotask(world, pair[i], singles, exsingles, info); } return result; } + std::vector +//MacroTaskMp2UpdatePair::operator() (const std::vector &pair, +// const std::vector &mp2_coupling, +// const CCParameters ¶meters, +// const std::vector> &all_coords_vec, +// const std::vector &mo_ket, +// const std::vector &mo_bra, +// const std::vector &U1, const real_function_3d &U2) const { MacroTaskMp2UpdatePair::operator() (const std::vector &pair, const std::vector &mp2_coupling, - const CCParameters ¶meters, const std::vector> &all_coords_vec, - const std::vector &mo_ket, - const std::vector &mo_bra, - const std::vector &U1, const real_function_3d &U2) const { - World& world = mo_ket[0].world(); + const Info& info) const { + World& world = info.mo_ket[0].world(); resultT result = zero_functions_compressed(world, pair.size()); for (size_t i = 0; i < pair.size(); i++) { //(i, j) -> j*(j+1) + i - result[i] = CCPotentials::update_pair_mp2_macrotask(world, pair[i], parameters, all_coords_vec, mo_ket, - mo_bra, U1, U2, mp2_coupling[i]); + result[i] = CCPotentials::update_pair_mp2_macrotask(world, pair[i], info.parameters, all_coords_vec, info.mo_ket, + info.mo_bra, info.U1, info.U2, mp2_coupling[i]); + } + return result; +} + +std::vector +MacroTaskIteratePair::operator()(const std::vector& pair, + const std::vector& local_coupling, + const CC_vecfunction& gs_singles, + const CC_vecfunction& ex_singles, + const Info& info, + const std::size_t& maxiter) const { + World& world = info.mo_ket[0].world(); + resultT result = zero_functions_compressed(world, pair.size()); + + for (size_t i = 0; i < pair.size(); i++) { + result[i]= CCPotentials::iterate_pair_macrotask(world, pair[i], gs_singles, ex_singles, + local_coupling[i], info, maxiter).function(); } return result; + } template class CCConvolutionOperator; diff --git a/src/madness/chem/CCStructures.h b/src/madness/chem/CCStructures.h index bdf9847f4e1..37ef03c7cc0 100644 --- a/src/madness/chem/CCStructures.h +++ b/src/madness/chem/CCStructures.h @@ -19,6 +19,8 @@ #include #include +#include "lowrankfunction.h" + namespace madness { /// Calculation Types used by CC2 @@ -228,11 +230,11 @@ struct CCParameters : public QCCalculationParametersBase { initialize < double > ("thresh_Ue", thresh_operators, "ue threshold"); initialize < double > ("econv", thresh, "overal convergence threshold "); initialize < double > ("econv_pairs", 0.1*thresh, "convergence threshold for pairs"); - initialize < double > ("dconv_3d", 0.01*thresh, "convergence for cc singles"); - initialize < double > ("dconv_6d", thresh, "convergence for cc doubles"); + initialize < double > ("dconv_3d", 0.3*thresh, "convergence for cc singles"); + initialize < double > ("dconv_6d", 3.0*thresh, "convergence for cc doubles"); initialize < std::size_t > ("iter_max", 10, "max iterations"); - initialize < std::size_t > ("iter_max_3d", 10, "max iterations"); - initialize < std::size_t > ("iter_max_6d", 10, "max iterations"); + initialize < std::size_t > ("iter_max_3d", 10, "max iterations for singles"); + initialize < std::size_t > ("iter_max_6d", 10, "max iterations for doubles"); initialize < std::pair> ("only_pair", {-1, -1}, "compute only a single pair"); initialize < bool > ("restart", false, "restart"); initialize < bool > ("no_compute", false, "no compute"); @@ -252,11 +254,11 @@ struct CCParameters : public QCCalculationParametersBase { initialize < long > ("freeze", -1, "number of frozen orbitals: -1: automatic"); initialize < bool > ("test", false, ""); // choose if Q for the constant part of MP2 and related calculations should be decomposed: GQV or GV - GO12V - initialize < bool > ("decompose_Q", true, ""); + initialize < bool > ("decompose_Q", true, "always true",{true}); // if true the ansatz for the CC2 ground state pairs is |tau_ij> = |u_ij> + Qtf12|titj>, with Qt = Q - |tau> ("QtAnsatz", true, ""); + initialize < bool > ("QtAnsatz", true, "always true",{true}); // a vector containing the excitations which shall be optizmized later (with CIS(D) or CC2) initialize < std::vector> ("excitations", {}, "vector containing the excitations"); @@ -379,6 +381,7 @@ struct CCParameters : public QCCalculationParametersBase { struct PairVectorMap { std::vector> map; ///< maps pair index (i,j) to vector index k + PairVectorMap() = default; PairVectorMap(const std::vector> map1) : map(map1) {} static PairVectorMap triangular_map(const int nfreeze, const int nocc) { @@ -552,7 +555,7 @@ struct CC_vecfunction : public archive::ParallelSerializableObject { delta(other.delta), irrep(other.irrep) { } - /// assignment operator + /// assignment operator, shallow wrt the functions // CC_vecfunction& operator=(const CC_vecfunction& other) = default; CC_vecfunction& operator=(const CC_vecfunction& other) { if (this == &other) return *this; @@ -567,8 +570,29 @@ struct CC_vecfunction : public archive::ParallelSerializableObject { /// returns a deep copy (void shallow copy errors) - CC_vecfunction - copy() const; + friend CC_vecfunction + copy(const CC_vecfunction& other) { + CC_vecfunction tmp=other; + tmp.functions.clear(); + for (const auto& x : other.functions) { + tmp.functions.insert(std::make_pair(x.first, copy(x.second))); + } + return tmp; + } + + +//madness::CC_vecfunction +//CC_vecfunction::copy() const { +// std::vector> vn; +// for (auto x : functions) { +// const CCFunction fn(madness::copy(x.second.function), x.second.i, x.second.type); +// vn.push_back(fn); +// } +// CC_vecfunction result(vn, type); +// result.irrep = irrep; +// return result; +//} +// static CC_vecfunction load_restartdata(World& world, std::string filename) { archive::ParallelInputArchive ar(world, filename.c_str()); @@ -608,6 +632,13 @@ struct CC_vecfunction : public archive::ParallelSerializableObject { } } + hashT hash() const { + hashT hashval = std::hash{}(type); + for (const auto& f : functions) hash_combine(hashval, hash_value(f.second.f().get_impl()->id())); + + return hashval; + } + typedef std::map> CC_functionmap; CC_functionmap functions; @@ -618,7 +649,9 @@ struct CC_vecfunction : public archive::ParallelSerializableObject { std::string irrep = "null"; /// excitation irrep (direct product of x function and corresponding orbital) std::string - name(const int ex) const; + name(const int ex) const { + return madness::name(type,ex); + }; bool is_converged(const double econv, const double dconv) const { return (current_error CCConvolutionOperatorPtr(World& world, const OpType type, Parameters param) { + return std::shared_ptr(new CCConvolutionOperator(world, type, param)); + } + protected: friend CCConvolutionOperator combine(const CCConvolutionOperator& a, const CCConvolutionOperator& b) { @@ -934,6 +972,40 @@ class CCPair : public archive::ParallelSerializableObject { size_t i; size_t j; + /// customized function to store this to the cloud + + /// functions and constant_part can be very large and we want to split them and store them in different records + Recordlist cloud_store(World& world, Cloud& cloud) const { + // save bookkeeping stuff in a vector + std::vector v; + archive::VectorOutputArchive arout(v); + bool function_is_assigned=(functions.size()>0 && functions[0].is_assigned()); + arout & type & ctype & i & j & bsh_eps & function_is_assigned & constant_part.is_initialized(); + + Recordlist records; + records+=cloud.store(world,v); + if (function_is_assigned) records+=cloud.store(world,functions[0]); + if (constant_part.is_initialized()) records+=cloud.store(world,constant_part); + return records; + } + + /// customized function to load this from the cloud + + /// functions and constant_part can be very large and we want to split them and store them in different records + /// @param[inout] recordlist: containing the keys of the member variables -> will be reduced by the keys which are used + void cloud_load(World& world, const Cloud& cloud, Recordlist& recordlist) { + // load bookkeeping stuff in a vector + std::vector v=cloud.forward_load>(world,recordlist); + archive::VectorInputArchive arin(v); + bool function_is_assigned = false, constant_part_is_initialized=false; + arin & type & ctype & i & j & bsh_eps & function_is_assigned & constant_part_is_initialized; + functions.clear(); + constant_part.clear(); + + if (function_is_assigned) functions.emplace_back(cloud.forward_load>(world,recordlist)); + if (constant_part_is_initialized) constant_part=cloud.forward_load(world,recordlist); + } + /// gives back the pure 6D part of the pair function real_function_6d function() const { MADNESS_ASSERT(not functions.empty()); @@ -1025,7 +1097,11 @@ class CCPair : public archive::ParallelSerializableObject { /// little helper structure which manages the stored singles potentials struct CCIntermediatePotentials { - CCIntermediatePotentials(World& world, const CCParameters& p) : world(world), parameters(p) {}; + CCIntermediatePotentials() = default; + CCIntermediatePotentials(const CCParameters& p) : parameters(p) {}; + + CCIntermediatePotentials(const CCIntermediatePotentials& other) = default; + CCIntermediatePotentials& operator=(const CCIntermediatePotentials& other) = default; /// fetches the correct stored potential or throws an exception vector_real_function_3d @@ -1056,9 +1132,52 @@ struct CCIntermediatePotentials { void insert(const vector_real_function_3d& potential, const CC_vecfunction& f, const PotentialType& type); + Recordlist cloud_store(World& world, Cloud& cloud) const { + Recordlist records; + records+=cloud.store(world,parameters); + records+=cloud.store(world,current_s2b_potential_ex_); + records+=cloud.store(world,current_s2b_potential_gs_); + records+=cloud.store(world,current_s2c_potential_ex_); + records+=cloud.store(world,current_s2c_potential_gs_); + records+=cloud.store(world,current_singles_potential_ex_); + records+=cloud.store(world,current_singles_potential_gs_); + records+=cloud.store(world,unprojected_cc2_projector_response_); + return records; + } + + void cloud_load(World& world, const Cloud& cloud, Recordlist& recordlist) { + parameters=cloud.forward_load(world,recordlist); + current_s2b_potential_ex_=cloud.forward_load(world,recordlist); + current_s2b_potential_gs_=cloud.forward_load(world,recordlist); + current_s2c_potential_ex_=cloud.forward_load(world,recordlist); + current_s2c_potential_gs_=cloud.forward_load(world,recordlist); + current_singles_potential_ex_=cloud.forward_load(world,recordlist); + current_singles_potential_gs_=cloud.forward_load(world,recordlist); + unprojected_cc2_projector_response_=cloud.forward_load(world,recordlist); + } + + friend hashT hash_value(const CCIntermediatePotentials& ip) { + auto hash_vector_of_functions =[](const vector_real_function_3d& v) { + hashT h; + for (const auto& f : v) { + hash_combine(h, hash_value(f.get_impl()->id())); + } + return h; + }; + hashT h; + hash_combine(h, hash_vector_of_functions(ip.current_s2b_potential_ex_)); + hash_combine(h, hash_vector_of_functions(ip.current_s2b_potential_gs_)); + hash_combine(h, hash_vector_of_functions(ip.current_s2c_potential_ex_)); + hash_combine(h, hash_vector_of_functions(ip.current_s2c_potential_gs_)); + hash_combine(h, hash_vector_of_functions(ip.current_singles_potential_ex_)); + hash_combine(h, hash_vector_of_functions(ip.current_singles_potential_gs_)); + hash_combine(h, hash_vector_of_functions(ip.unprojected_cc2_projector_response_)); + return h; + } + + CCParameters parameters; private: - World& world; - const CCParameters& parameters; + // World& world; /// whole ground state singles potential without fock-residue vector_real_function_3d current_singles_potential_gs_; /// whole excited state singles potential without fock-residue @@ -1077,11 +1196,75 @@ struct CCIntermediatePotentials { /// structured output void output(const std::string& msg) const { - if (world.rank() == 0 and parameters.debug()) + if (parameters.debug()) std::cout << "Intermediate Potential Manager: " << msg << "\n"; } }; +/// POD holding some basic functions and some intermediates for the CC2 calculation + +/// the class is cloud-serializable and can be used in MacroTasks +struct Info { + std::vector> mo_ket; + std::vector> mo_bra; + std::vector> molecular_coordinates; + CCParameters parameters; + std::vector orbital_energies; + Tensor fock; + CCIntermediatePotentials intermediate_potentials; + Function R_square, U2, R;; + std::vector> U1; + + vector_real_function_3d get_active_mo_ket() const { + vector_real_function_3d result; + for (size_t i = parameters.freeze(); i < mo_ket.size(); i++) result.push_back(mo_ket[i]); + return result; + } + + vector_real_function_3d get_active_mo_bra() const { + vector_real_function_3d result; + for (size_t i = parameters.freeze(); i < mo_bra.size(); i++) result.push_back(mo_bra[i]); + return result; + } + + /// customized function to store this to the cloud + + /// functions and constant_part can be very large and we want to split them and store them in different records + Recordlist cloud_store(World& world, Cloud& cloud) const { + Recordlist records; + records+=cloud.store(world,mo_bra); + records+=cloud.store(world,mo_ket); + records+=cloud.store(world,parameters); + records+=cloud.store(world,orbital_energies); + records+=cloud.store(world,fock); + records+=cloud.store(world,intermediate_potentials); + records+=cloud.store(world,R_square); + records+=cloud.store(world,molecular_coordinates); + records+=cloud.store(world,U2); + records+=cloud.store(world,U1); + return records; + } + + /// customized function to load this from the cloud + + /// functions and constant_part can be very large and we want to split them and store them in different records + /// @param[inout] recordlist: containing the keys of the member variables -> will be reduced by the keys which are used + void cloud_load(World& world, const Cloud& cloud, Recordlist& recordlist) { + // load bookkeeping stuff in a vector + mo_bra=cloud.forward_load>>(world,recordlist); + mo_ket=cloud.forward_load>>(world,recordlist); + parameters=cloud.forward_load(world,recordlist); + orbital_energies=cloud.forward_load>(world,recordlist); + fock=cloud.forward_load>(world,recordlist); + intermediate_potentials=cloud.forward_load(world,recordlist); + R_square=cloud.forward_load>(world,recordlist); + molecular_coordinates=cloud.forward_load>>(world,recordlist); + U2=cloud.forward_load>(world,recordlist); + U1=cloud.forward_load>>(world,recordlist); + } + +}; + class MacroTaskMp2ConstantPart : public MacroTaskOperationBase { class ConstantPartPartitioner : public MacroTaskPartitioner { @@ -1102,9 +1285,10 @@ class MacroTaskMp2ConstantPart : public MacroTaskOperationBase { public: MacroTaskMp2ConstantPart(){partitioner.reset(new ConstantPartPartitioner());} - typedef std::tuple&, const std::vector>&, - const std::vector>&, const CCParameters&, const Function&, - const std::vector>&, const std::vector& > argtupleT; + // typedef std::tuple&, const std::vector>&, + // const std::vector>&, const CCParameters&, const Function&, + // const std::vector>&, const std::vector& > argtupleT; + typedef std::tuple&, const madness::Info&, const std::vector& > argtupleT; using resultT = std::vector; @@ -1114,10 +1298,60 @@ class MacroTaskMp2ConstantPart : public MacroTaskOperationBase { return result; } - resultT operator() (const std::vector& pair, const std::vector>& mo_ket, - const std::vector>& mo_bra, const CCParameters& parameters, - const Function& Rsquare, const std::vector>& U1, - const std::vector& argument) const; +// resultT operator() (const std::vector& pair, const std::vector>& mo_ket, +// const std::vector>& mo_bra, const CCParameters& parameters, +// const Function& Rsquare, const std::vector>& U1, +// const std::vector& argument) const; + resultT operator() (const std::vector& pair, const Info& info, const std::vector& argument) const; +}; + +/// compute the "constant" part of MP2, CC2, or LR-CC2 +/// +/// the constant part is +/// result = G [F,f] |ij> for MP2 +/// result = G [F,f] |t_i t_j> for CC2 +/// result = G [F,f] |t_i x_j> + |x_i t_j> for LR-CC2 +class MacroTaskConstantPart : public MacroTaskOperationBase { + + class ConstantPartPartitioner : public MacroTaskPartitioner { + public: + ConstantPartPartitioner() {}; + + partitionT do_partitioning(const std::size_t& vsize1, const std::size_t& vsize2, + const std::string policy) const override { + partitionT p; + for (size_t i = 0; i < vsize1; i++) { + Batch batch(Batch_1D(i,i+1), Batch_1D(i,i+1)); + p.push_back(std::make_pair(batch,1.0)); + } + return p; + } + }; + +public: + MacroTaskConstantPart() { + partitioner.reset(new ConstantPartPartitioner()); + name="ConstantPart"; + } + + // typedef std::tuple&, const std::vector>&, + // const std::vector>&, const CCParameters&, const Function&, + // const std::vector>&, const std::vector& > argtupleT; + typedef std::tuple&, + const std::vector>&, const std::vector>&, + const madness::Info&> argtupleT; + + using resultT = std::vector; + + resultT allocator(World& world, const argtupleT& argtuple) const { + std::size_t n = std::get<0>(argtuple).size(); + resultT result = zero_functions_compressed(world, n); + return result; + } + resultT operator() (const std::vector& pair, + const std::vector>& gs_singles, + const std::vector>& ex_singles, + const Info& info) const; }; class MacroTaskMp2UpdatePair : public MacroTaskOperationBase { @@ -1139,12 +1373,65 @@ class MacroTaskMp2UpdatePair : public MacroTaskOperationBase { } }; public: - MacroTaskMp2UpdatePair() {partitioner.reset(new UpdatePairPartitioner());} + MacroTaskMp2UpdatePair() { + partitioner.reset(new UpdatePairPartitioner()); + name="MP2UpdatePair"; + } - typedef std::tuple&, const std::vector&, const CCParameters&, - const std::vector< madness::Vector >&, - const std::vector>&, const std::vector>&, - const std::vector>&, const Function&> argtupleT; + // typedef std::tuple&, const std::vector&, const CCParameters&, + // const std::vector< madness::Vector >&, + // const std::vector>&, const std::vector>&, + // const std::vector>&, const Function&> argtupleT; + typedef std::tuple&, const std::vector&, + const std::vector>&, const Info& > argtupleT; + + using resultT = std::vector; + + resultT allocator(World& world, const argtupleT& argtuple) const { + std::size_t n = std::get<0>(argtuple).size(); + resultT result = zero_functions_compressed(world, n); + return result; + } + +// resultT operator() (const std::vector& pair, const std::vector& mp2_coupling, const CCParameters& parameters, +// const std::vector< madness::Vector >& all_coords_vec, +// const std::vector>& mo_ket, const std::vector>& mo_bra, +// const std::vector>& U1, const Function& U2) const; + resultT operator() (const std::vector& pair, const std::vector& mp2_coupling, + const std::vector< madness::Vector >& all_coords_vec, const Info& info) const; +}; + + +class MacroTaskIteratePair : public MacroTaskOperationBase { + + class IteratePairPartitioner : public MacroTaskPartitioner { + public : + IteratePairPartitioner() = default; + + partitionT do_partitioning(const std::size_t& vsize1, const std::size_t& vsize2, + const std::string policy) const override { + partitionT p; + for (size_t i = 0; i < vsize1; i++) { + Batch batch(Batch_1D(i, i+1), Batch_1D(i, i+1), Batch_1D(i,i+1)); + p.push_back(std::make_pair(batch,1.0)); + } + return p; + } + }; +public: + MacroTaskIteratePair() { + partitioner.reset(new IteratePairPartitioner()); + name="IteratePair"; + } + + typedef std::tuple< + const std::vector&, // pair + const std::vector&, // local coupling + const CC_vecfunction&, // gs singles + const CC_vecfunction&, // ex singles + const Info&, + const std::size_t& + > argtupleT; using resultT = std::vector; @@ -1154,10 +1441,23 @@ class MacroTaskMp2UpdatePair : public MacroTaskOperationBase { return result; } - resultT operator() (const std::vector& pair, const std::vector& mp2_coupling, const CCParameters& parameters, - const std::vector< madness::Vector >& all_coords_vec, - const std::vector>& mo_ket, const std::vector>& mo_bra, - const std::vector>& U1, const Function& U2) const; + /// iterate a given pair of the MP2, CC2 or LRCC2 calculation + + /// will *NOT* compute the local coupling, + /// will apply the Fock operators (J-K+V)|pair> and use + /// the (excited) singles vectors to update the pair + /// @param[in] pair: the pair which will be updated + /// @param[in] gs_singles: the ground state singles, may be dummy for MP2 + /// @param[in] ex_singles: the excited state singles, may be dummy for MP2, CC2 + /// @param[in] all_coords_vec: the coordinates of the atoms + /// @param[in] info: the info structure + /// @param[in] maxiter: the maximal number of iterations + resultT operator() (const std::vector& pair, + const std::vector& local_coupling, + const CC_vecfunction& gs_singles, + const CC_vecfunction& ex_singles, + const Info& info, + const std::size_t& maxiter) const; }; }//namespace madness diff --git a/src/madness/chem/PNO.cpp b/src/madness/chem/PNO.cpp index 9dd1b0f4111..8984a6e32b3 100644 --- a/src/madness/chem/PNO.cpp +++ b/src/madness/chem/PNO.cpp @@ -781,7 +781,7 @@ PNOPairs PNO::initialize_pairs(PNOPairs& pairs, const GuessType& inpgt) const { vector_real_function_3d& pno = pno_ij[it.ij()]; if (not pno.empty()) { msg << it.name() << ": pnos not empty ... project out and assemble\n"; - QProjector Qpno(world, pno, pno); + QProjector Qpno( pno, pno); pno = append(pno, Qpno(virtuals)); } else pno = append(pno, virtuals); @@ -814,7 +814,7 @@ PNOPairs PNO::initialize_pairs(PNOPairs& pairs, const GuessType& inpgt) const { } vector_real_function_3d virtij = guess_virtuals(pair_mo, guesstype); if (not pno.empty()) { - QProjector Qpno(world, pno, pno); + QProjector Qpno( pno, pno); virtij = Qpno(virtij); } @@ -1574,7 +1574,7 @@ PNOPairs PNO::grow_rank(PNOPairs& pairs, std::string exop)const{ vector_real_function_3d virtij = Q(basis.guess_with_exop(pair_mo, exop,param.exop_trigo()));// guess_virtuals(pair_mo, EXOP_TYPE); // project out already existing pno pairs if (not pairs.pno_ij[it.ij()].empty()) { - QProjector Qpno(world, pairs.pno_ij[it.ij()], pairs.pno_ij[it.ij()]); + QProjector Qpno(pairs.pno_ij[it.ij()], pairs.pno_ij[it.ij()]); virtij = Qpno(virtij); } diff --git a/src/madness/chem/PNO.h b/src/madness/chem/PNO.h index 0d0e77db417..cf0b24ffb91 100644 --- a/src/madness/chem/PNO.h +++ b/src/madness/chem/PNO.h @@ -36,7 +36,7 @@ class PNO : public QCPropertyInterface { T(world), V(world, nemo.ncf), F(world, &nemo), - Q(world, nemo.get_calc()->amo), + Q( nemo.get_calc()->amo), basis(world,nemo.get_calc()->molecule,8), f12(world,nemo,basis,paramf12), msg(world) diff --git a/src/madness/chem/PNOF12Potentials.cpp b/src/madness/chem/PNOF12Potentials.cpp index 99fc2bdcfc5..fa7804fd940 100644 --- a/src/madness/chem/PNOF12Potentials.cpp +++ b/src/madness/chem/PNOF12Potentials.cpp @@ -72,7 +72,7 @@ F12Potentials::F12Potentials(World& world,const Nemo& nemo, const BasisFunctions mos(nemo.get_calc()->amo), acmos(initialize_active_mos(nemo)), K(ParametrizedExchange(world, nemo, pp.exchange())), - Q(world, nemo.get_calc()->amo) { + Q(nemo.get_calc()->amo) { const double lo = 1.e-6; const double eps = param.op_thresh(); coulombop = std::shared_ptr < real_convolution_3d > (CoulombOperatorPtr(world, lo, eps)); @@ -1377,7 +1377,7 @@ PairEnergies F12Potentials::compute_hylleraas_f12_energies( for (ElectronPairIterator it = pit(); it; ++it) { // right now this will make the same guess for all pairs //const vector_real_function_3d tmp=guess_virtuals(param.abs); - QProjector Qpno(world, pnos[it.ij()]); + QProjector Qpno( pnos[it.ij()]); const vector_real_function_3d tmp = Qpno(cabs); abs_ij[it.ij()] = tmp; } diff --git a/src/madness/chem/TDHF.cc b/src/madness/chem/TDHF.cc index 72b89e28f73..e9a876d1360 100644 --- a/src/madness/chem/TDHF.cc +++ b/src/madness/chem/TDHF.cc @@ -154,7 +154,7 @@ void TDHF::prepare_calculation() { mo_ket_ = make_mo_ket(get_calc()->amo); mo_bra_ = make_mo_bra(get_calc()->amo); - Q = QProjector(world, mo_bra_.get_vecfunction(), mo_ket_.get_vecfunction()); + Q = QProjector( mo_bra_.get_vecfunction(), mo_ket_.get_vecfunction()); if (not parameters.no_compute()) { diff --git a/src/madness/chem/ccpairfunction.cc b/src/madness/chem/ccpairfunction.cc index 8fbfe3010c6..f6cb1a0cfdf 100644 --- a/src/madness/chem/ccpairfunction.cc +++ b/src/madness/chem/ccpairfunction.cc @@ -38,11 +38,17 @@ void CCPairFunction::convert_to_pure_no_op_inplace() { result= CompositeFactory(world()) .g12(get_operator().get_kernel()) .ket(get_function()); - } else if (is_decomposed()) { + } else if (is_decomposed_no_op()) { + result= CompositeFactory(world()) + .particle1(get_a()) + .particle2(get_b()); + } else if (is_op_decomposed()) { result= CompositeFactory(world()) .g12(get_operator().get_kernel()) .particle1(get_a()) .particle2(get_b()); + } else { + MADNESS_EXCEPTION("error in convert_to_pure_no_op_inplace",1); } result.fill_tree(); result.truncate(FunctionDefaults::get_thresh()*0.1); @@ -99,10 +105,26 @@ std::vector> CCPairFunction::op_dec_to_dec(const return result; } +/// turn decomposed functions with operator into pure functions +template +std::vector> CCPairFunction::dec_to_pure(const std::vector>& other) { + std::vector> result; + for (const auto& c : other) { + if (c.is_decomposed_no_op()) { + CCPairFunction tmp=copy(c); + tmp.convert_to_pure_no_op_inplace(); + result.push_back(tmp); + } else { + result.push_back(c); + } + } + return result; +} + + /// turn decomposed functions with operator into pure functions template std::vector> CCPairFunction::op_dec_to_pure(const std::vector>& other) { - LowRankFunctionParameters lrparameters; std::vector> result; for (const auto& c : other) { if (c.is_op_decomposed()) { @@ -220,6 +242,8 @@ std::vector> CCPairFunction::consolidate(const st bool op_dec_to_dec=find(options.begin(),options.end(),"op_dec_to_dec")!=options.end(); // convert op_dec functions to pure (via fill_tree) bool op_dec_to_pure=find(options.begin(),options.end(),"op_dec_to_pure")!=options.end(); + // convert dec functions to pure (via hartree product) + bool dec_to_pure=find(options.begin(),options.end(),"dec_to_pure")!=options.end(); // reorthogonalize decomposed functions and op_decomposed functions bool lindep=find(options.begin(),options.end(),"remove_lindep")!=options.end(); @@ -229,6 +253,7 @@ std::vector> CCPairFunction::consolidate(const st if (op_dec_to_dec) result=CCPairFunction::op_dec_to_dec(result,centers); if (op_dec_to_pure) result=CCPairFunction::op_dec_to_pure(result); + if (dec_to_pure) result=CCPairFunction::dec_to_pure(result); if (op_pure_to_pure) result=CCPairFunction::op_pure_to_pure(result); if (not is_collected(result)) result=collect_same_types(result); @@ -252,6 +277,8 @@ CCPairFunction& CCPairFunction::multiply_with_op_inplace(const s template double CCPairFunction::make_xy_u(const CCFunction& xx, const CCFunction& yy) const { + CCPairFunction bra(xx.function,yy.function); + return inner(bra,*this); T result = 0.0; if (is_pure()) { World& world=xx.function.world(); @@ -602,11 +629,9 @@ std::vector> CCPairFunction::apply(const Projecto constexpr std::size_t LDIM=CCPairFunction::LDIM; // print("apply projector on argument with terms",argument.size()); if (auto P=dynamic_cast*>(&projector)) { -// print("P->get_particle()",P->get_particle()); MADNESS_CHECK_THROW(P->get_particle()==0 or P->get_particle()==1,"P Projector particle must be 0 or 1 in CCPairFunction"); } if (auto Q=dynamic_cast*>(&projector)) { -// print("Q->get_particle()",Q->get_particle()); MADNESS_CHECK_THROW(Q->get_particle()==0 or Q->get_particle()==1,"Q Projector particle must be 0 or 1 in CCPairFunction"); } std::vector> result; @@ -618,10 +643,15 @@ std::vector> CCPairFunction::apply(const Projecto auto tmp2=CCPairFunction(tmp); result.push_back(tmp2); } else if (auto P=dynamic_cast*>(&projector)) { - result.push_back(CCPairFunction((*P)(pf.get_function(),P->get_particle()+1))); + // result.push_back(CCPairFunction((*P)(pf.get_function()))); + auto [left,right]=P->get_vectors_for_outer_product(pf.get_function()); + result.push_back(CCPairFunction(left,right)); + } else if (auto Q=dynamic_cast*>(&projector)) { - result.push_back(CCPairFunction((*Q)(pf.get_function(),Q->get_particle()+1))); + // result.push_back(CCPairFunction((*Q)(pf.get_function()))); + result.push_back(pf); + result.push_back(-1.0*Q->get_P_projector()(pf)); } else { MADNESS_EXCEPTION("CCPairFunction: unknown projector type",1); @@ -629,8 +659,8 @@ std::vector> CCPairFunction::apply(const Projecto } else if (pf.is_decomposed_no_op()) { // pair function is sum_i | a_i b_i > if (auto SO=dynamic_cast*>(&projector)) { // Q12 | kl > = (1-O1)(1-O2) |kl> = |(1-O1)k (1-O2)l> - QProjector Q1(world,SO->bra1(),SO->ket1()); - QProjector Q2(world,SO->bra2(),SO->ket2()); + QProjector Q1(SO->bra1(),SO->ket1()); + QProjector Q2(SO->bra2(),SO->ket2()); result.push_back(CCPairFunction(Q1(pf.get_a()),Q2(pf.get_b()))); } else if (auto P=dynamic_cast*>(&projector)) { @@ -652,9 +682,9 @@ std::vector> CCPairFunction::apply(const Projecto // CCTimer t(world,"SO block"); // Q12 = 1 - O1 (1 - 1/2 O2) - O2 (1 - 1/2 O1) // print("entering SO block"); - QProjector Q1(world,SO->bra1(),SO->ket1()); + QProjector Q1(SO->bra1(),SO->ket1()); Q1.set_particle(0); - QProjector Q2(world,SO->bra2(),SO->ket2()); + QProjector Q2(SO->bra2(),SO->ket2()); Q2.set_particle(1); Projector O1(SO->bra1(),SO->ket1()); diff --git a/src/madness/chem/ccpairfunction.h b/src/madness/chem/ccpairfunction.h index d805daf9f8b..81a3437d4bb 100644 --- a/src/madness/chem/ccpairfunction.h +++ b/src/madness/chem/ccpairfunction.h @@ -25,6 +25,20 @@ class ProjectorBase; /// Types of Functions used by CC_function class enum FuncType { UNDEFINED, HOLE, PARTICLE, MIXED, RESPONSE }; +inline std::string name(const FuncType& type, const int ex=-1) { + if (type == PARTICLE) return "tau"; + else if (type == HOLE) return "phi"; + else if (type == MIXED) return "t"; + else if (type == RESPONSE) { + MADNESS_CHECK_THROW(ex>=0,"ex must be >=0"); + return std::to_string(ex) + "_" + "x"; + } + else { + MADNESS_EXCEPTION("unknown FuncType",1); + } + return "undefined"; +} + /// structure for a CC Function 3D which holds an index and a type // the type is defined by the enum FuncType (definition at the start of this file) template @@ -41,6 +55,17 @@ class CCFunction : public archive::ParallelSerializableObject { CCFunction(const CCFunction& other) : current_error(other.current_error), function(other.function), i(other.i), type(other.type) {}; + + /// deep copy + friend CCFunction copy(const CCFunction& other) { + CCFunction tmp; + tmp.current_error=other.current_error; + tmp.function=madness::copy(other.function); + tmp.i=other.i; + tmp.type=other.type; + return tmp; + } + double current_error; Function function; @@ -116,7 +141,7 @@ class TwoBodyFunctionComponentBase { virtual bool has_operator() const = 0; // virtual void set_operator(const std::shared_ptr op) = 0; // virtual const std::shared_ptr get_operator_ptr() const = 0; - virtual void print_size() const = 0; + virtual void print_size(const std::string name="") const = 0; virtual std::string name(const bool transpose=false) const = 0; virtual World& world() const =0; virtual std::shared_ptr clone() = 0; @@ -155,8 +180,8 @@ class TwoBodyFunctionPureComponent : public TwoBodyFunctionComponentBase { World& world() const override {return u.world();}; - void print_size() const override { - u.print_size(name(false)); + void print_size(const std::string name1="") const override { + u.print_size(name1+name(false)); } std::string name(const bool transpose) const override { @@ -240,7 +265,7 @@ class TwoBodyFunctionSeparatedComponent : public TwoBodyFunctionComponentBase { return a.front().world(); }; - void print_size() const override { + void print_size(const std::string name1="") const override { if (a.size() > 0) { World& world = a.front().world(); madness::print_size(world, a, "a from " + name(false)); @@ -441,6 +466,9 @@ using pureT=Function; /// turn decomposed functions with operator into pure functions without operators static std::vector op_dec_to_pure(const std::vector& other); + /// turn decomposed functions without operator into pure functions without operators + static std::vector dec_to_pure(const std::vector& other); + /// remove linear dependent terms in the low-rank parts static std::vector remove_linearly_dependent_terms(const std::vector& other, double thresh=-1.0); @@ -458,7 +486,7 @@ using pureT=Function; /// @param[in] centers: a vector of 3D-vectors which are the centers of the grid for low-rank functions /// TODO: implement a function for removing linearly dependent terms without orthonormalization friend std::vector consolidate(const std::vector& other, - const std::vector options, + const std::vector options=std::vector(), const std::vector> centers=std::vector>()) { if (other.size()>0) return other.front().consolidate(other,options,centers); // workaround @@ -617,8 +645,27 @@ using pureT=Function; } /// print the size of the functions - void print_size() const { - if (component) component->print_size(); + void print_size(const std::string name1="") const { + if (not component) { + print("CCPairFunction "+name1+ " not assigned"); + } else if (component->is_pure()) { + component->print_size(name1); + } else { + print("printing",name1,name()); + double wall=wall_time(); + component->print_size(); + double anorm=madness::norm2(world(),get_a()); + double bnorm=madness::norm2(world(),get_b()); + print("anorm, bnorm",anorm,bnorm); + double norm=this->norm2(); + std::size_t fsize=get_a().size(); + std::size_t bufsize=128; + char buf[bufsize]; + snprintf(buf, bufsize, "%40s at time %.1fs: norm/ #functions: %7.5f %zu \n", + ((name1+" "+name()).c_str()), wall, norm, fsize); + if (world().rank()==0) print(std::string(buf)); + } + }; std::string name(const bool transpose=false) const { @@ -627,8 +674,13 @@ using pureT=Function; } typename Tensor::scalar_type norm2() const { - if (component->is_pure()) return pure().get_function().norm2(); - if (component->is_decomposed()) { + if (is_pure_no_op()) { + return pure().get_function().norm2(); + } else if (is_op_pure()) { + double n2=inner(*this,*this); + if (n2<0.0) print("norm of ",name()," is < 0.0"); + return sqrt(std::max(0.0,n2)); + } else if (component->is_decomposed()) { Function R2; auto tmp= inner_internal(*this,R2); typename Tensor::scalar_type result=std::real(tmp); @@ -760,33 +812,8 @@ using pureT=Function; const std::pair>, std::vector>> assign_particles(const size_t particle) const; static std::vector> apply(const ProjectorBase& P, const std::vector>& argument); - - /// apply the operator on a CCPairfunction, both with the same dimension - - /// note there is another function, where the operator works only on some dimensions of the CCPairFunction! - /// @return result(x) = \int op(x,x') arg(x') dx': a CCPairfunction with the same dimension as the argument - friend CCPairFunction apply(const SeparatedConvolution& G, const CCPairFunction& argument) { - CCPairFunction result; - timer t1(argument.world()); - if (argument.is_pure()) { - result=CCPairFunction(G(argument.get_function())); - } else if (argument.is_decomposed_no_op()) { - Function result1=real_factory_6d(argument.world()).compressed(); - - MADNESS_ASSERT(argument.get_a().size() == argument.get_b().size()); - MADNESS_CHECK_THROW(G.particle()==-1,"G must be a two-particle operator in apply(CCPairFunction)"); - - for (size_t k = 0; k < argument.get_a().size(); k++) { - const Function tmp = G(argument.get_a()[k], argument.get_b()[k]); - result1 += tmp; - } - result=CCPairFunction(result1); - } else { - MADNESS_EXCEPTION("unknown type in CCPairFunction::apply",1); - } - t1.end("applying G to " + argument.name()); - return result; - }; + static std::vector> apply(const SeparatedConvolution& G, const CCPairFunction& argument); + static std::vector> apply(const SeparatedConvolution& G, const std::vector>& argument); Function partial_inner(const Function& f, @@ -910,12 +937,34 @@ std::vector> apply(const SeparatedConvolution& } template -CCPairFunction apply(const ProjectorBase& projector, const CCPairFunction& argument) { - auto result=madness::apply(projector,std::vector> (1,argument)); - MADNESS_CHECK(result.size()==1); - return result[0]; +CCPairFunction apply(const SeparatedConvolution& G, const std::vector>& argument) { + CCPairFunction result; + for (const auto& a : argument) result+=G(a); + return result; } +/// apply the operator on a CCPairfunction, both with the same dimension + +/// note there is another function, where the operator works only on some dimensions of the CCPairFunction! +/// @return result(x) = \int op(x,x') arg(x') dx': a CCPairfunction with the same dimension as the argument +template +CCPairFunction apply(const SeparatedConvolution& G, const CCPairFunction& argument) { + CCPairFunction result; + timer t1(argument.world()); + if (argument.is_pure()) { + result=CCPairFunction(G(argument.get_function())); + } else if (argument.is_decomposed_no_op()) { + MADNESS_ASSERT(argument.get_a().size() == argument.get_b().size()); + Function result1=G(argument.get_a(), argument.get_b()); + result=CCPairFunction(result1); + } else { + MADNESS_EXCEPTION("unknown type in CCPairFunction::apply",1); + } + t1.end("applying G to " + argument.name()); + return result; +}; + + /// apply the projector on the argument function, potentially yielding a vector of CCPairfunctions as result /// result can be @@ -928,6 +977,13 @@ std::vector> apply(const ProjectorBase& projector, const } +template +CCPairFunction apply(const ProjectorBase& projector, const CCPairFunction& argument) { + auto result=madness::apply(projector,std::vector> (1,argument)); + MADNESS_CHECK(result.size()==1); + return result[0]; +} + template Function::LDIM>inner(const CCPairFunction& c, const Function::LDIM>& f, const std::tuple v1, const std::tuple v2) { @@ -987,6 +1043,28 @@ std::vector> inner(const std::vector +std::vector > operator+(const std::vector> c1, const std::vector >& c2) { + std::vector> result; + for (const auto& l : c1) result.push_back(l); + for (const auto& l : c2) result.push_back(l); + return result; +} + +template +std::vector > operator-(const std::vector> c1, const std::vector >& c2) { + std::vector> result; + for (const auto& l : c1) result.push_back(l); + for (const auto& l : c2) result.push_back(-1.0*l); + return result; +} + +template +std::vector >& operator+=(std::vector >& lhs, + const CCPairFunction& rhs) { + lhs.push_back(rhs); + return lhs; +} template std::vector >& operator+=(std::vector >& rhs, diff --git a/src/madness/chem/correlationfactor.cc b/src/madness/chem/correlationfactor.cc index def3c837957..9f34b498a1e 100644 --- a/src/madness/chem/correlationfactor.cc +++ b/src/madness/chem/correlationfactor.cc @@ -37,6 +37,7 @@ namespace madness{ /// create and return a new nuclear correlation factor + /// note there is also an Ad-hoc nuclear correlation factor, which can only be created directly /// @param[in] world the world /// @param[in] calc the calculation as read from the input file /// @return a nuclear correlation factor diff --git a/src/madness/chem/correlationfactor.h b/src/madness/chem/correlationfactor.h index 65e75178d18..d80ff48586c 100644 --- a/src/madness/chem/correlationfactor.h +++ b/src/madness/chem/correlationfactor.h @@ -83,7 +83,7 @@ namespace madness { class NuclearCorrelationFactor { public: enum corrfactype {None, GradientalGaussSlater, GaussSlater, LinearSlater, - Polynomial, Slater, poly4erfc, Two}; + Polynomial, Slater, poly4erfc, Two, Adhoc}; typedef std::shared_ptr< FunctionFunctorInterface > functorT; /// ctor @@ -213,12 +213,14 @@ class NuclearCorrelationFactor { /// the molecule const Molecule& molecule; +protected: /// the three components of the U1 potential std::vector U1_function; /// the purely local U2 potential, having absorbed the nuclear pot V_nuc real_function_3d U2_function; +private: /// the correlation factor S wrt a given atom /// @param[in] r the distance of the req'd coord to the nucleus @@ -2032,6 +2034,65 @@ class PseudoNuclearCorrelationFactor : public NuclearCorrelationFactor { }; +/// this ncf has no information about itself, only U2 and U1 assigned +class AdhocNuclearCorrelationFactor : public NuclearCorrelationFactor { + +public: + /// ctor + + /// @param[in] world the world + /// @param[in] mol molecule with the sites of the nuclei + AdhocNuclearCorrelationFactor(World& world, const real_function_3d U2, + const std::vector& U1) + : NuclearCorrelationFactor(world,Molecule()) { + + U2_function=U2; + U1_function=U1; + + if (world.rank()==0) { + print("constructed ad hoc nuclear correlation factor"); + } + } + + corrfactype type() const {return Adhoc;} + +private: + + double Sr_div_S(const double& r, const double& Z) const { + MADNESS_EXCEPTION("no Sr_div_S() in AdhocNuclearCorrelationFactor",0); + return 0.0; + } + + double Srr_div_S(const double& r, const double& Z) const { + MADNESS_EXCEPTION("no Srr_div_S() in AdhocNuclearCorrelationFactor",0); + return 0.0; + } + + double Srrr_div_S(const double& r, const double& Z) const { + MADNESS_EXCEPTION("no Srrr_div_S() in AdhocNuclearCorrelationFactor",0); + return 0.0; + } + + /// the nuclear correlation factor + double S(const double& r, const double& Z) const { + MADNESS_EXCEPTION("no S() in AdhocNuclearCorrelationFactor",0); + return 0.0; + } + + /// radial part first derivative of the nuclear correlation factor + coord_3d Sp(const coord_3d& vr1A, const double& Z) const { + MADNESS_EXCEPTION("no Sp() in AdhocNuclearCorrelationFactor",0); + return coord_3d(0.0); + } + + /// second derivative of the nuclear correlation factor + double Spp_div_S(const double& r, const double& Z) const { + MADNESS_EXCEPTION("no Spp_div_S() in AdhocNuclearCorrelationFactor",0); + return 0.0; + } +}; + + std::shared_ptr create_nuclear_correlation_factor(World& world, const Molecule& molecule, diff --git a/src/madness/chem/electronic_correlation_factor.h b/src/madness/chem/electronic_correlation_factor.h index a1fab4d12c1..c4b3168b513 100644 --- a/src/madness/chem/electronic_correlation_factor.h +++ b/src/madness/chem/electronic_correlation_factor.h @@ -35,20 +35,18 @@ class CorrelationFactor { CorrelationFactor(World& world, const double& gamma, const double dcut, const Molecule& molecule) : world(world), _gamma(gamma), dcut(dcut) { lo=1.e-6;//lo = molecule.smallest_length_scale(); - if (world.rank()==0) { - - if (gamma>0.0) print("constructed correlation factor with gamma=",gamma); - else if (gamma==0.0) print("constructed linear correlation factor"); - } +// if (world.rank()==0) { +// if (gamma>0.0) print("constructed correlation factor with gamma=",gamma); +// else if (gamma==0.0) print("constructed linear correlation factor"); +// } } /// ctor, use negative gamma for linear correlation factor r12 CorrelationFactor(World& world, const double& gamma, const double dcut, const double lo) : world(world), _gamma(gamma), dcut(dcut), lo(lo) { - if (world.rank()==0) { - - if (gamma>0.0) print("constructed correlation factor with gamma=",gamma); - else if (gamma==0.0) print("constructed linear correlation factor"); - } +// if (world.rank()==0) { +// if (gamma>0.0) print("constructed correlation factor with gamma=",gamma); +// else if (gamma==0.0) print("constructed linear correlation factor"); +// } } /// copy ctor diff --git a/src/madness/chem/lowrankfunction.h b/src/madness/chem/lowrankfunction.h index 3fbfffe2005..9b8845dda45 100644 --- a/src/madness/chem/lowrankfunction.h +++ b/src/madness/chem/lowrankfunction.h @@ -271,8 +271,9 @@ namespace madness { public: /// ctor takes centers of the grid and the grid parameters molecular_grid(const std::vector> origins, const LowRankFunctionParameters& params) - : centers(origins) { - if (centers.size()==0) centers.push_back({0,0,0}); + : centers(origins) + { + if (centers.size()==0) centers.push_back(Vector(0) ); if (params.gridtype()=="random") grid_builder=std::make_shared>(params.volume_element(),params.radius()); // else if (params.gridtype()=="cartesian") grid_builder=std::make_shared>(params.volume_element(),params.radius()); else if (params.gridtype()=="dftgrid") { diff --git a/src/madness/chem/mp3.cc b/src/madness/chem/mp3.cc index da6343dfef7..a720e34f3be 100644 --- a/src/madness/chem/mp3.cc +++ b/src/madness/chem/mp3.cc @@ -971,26 +971,26 @@ double MP3::mp3_energy_contribution_macrotask_driver(const Pairs& mp2pai MacroTaskMP3 task_square("square"); MacroTask macrotask_triangular(world,task_triangular,taskq); MacroTask macrotask_square(world,task_square,taskq); - // auto ghij_future=macrotask_triangular(std::string("ghij"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector()); + auto ghij_future=macrotask_triangular(std::string("ghij"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector()); auto klmn_future=macrotask_square(std::string("klmn"), nact, nact, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector()); - // auto cd_future=macrotask_triangular(std::string("cd"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector()); - // auto ef_future=macrotask_triangular(std::string("ef"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector()); - // taskq->print_taskq(); + auto cd_future=macrotask_triangular(std::string("cd"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector()); + auto ef_future=macrotask_triangular(std::string("ef"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector()); + taskq->print_taskq(); taskq->run_all(); - // double term_CD=cd_future->get(); - // double term_EF=ef_future->get(); - // double term_GHIJ=ghij_future->get(); - // double term_KLMN=klmn_future->get(); - // double mp3_energy=term_CD+term_GHIJ+term_KLMN+term_EF; - // if (world.rank()==0) { - // printf("term_CD %12.8f\n",term_CD); - // printf("term_GHIJ %12.8f\n",term_GHIJ); - // printf("term_KLMN %12.8f\n",term_KLMN); - // printf("term_EF %12.8f\n",term_EF); - // printf("MP3 energy contribution %12.8f\n",mp3_energy); - // } - // return mp3_energy; + double term_CD=cd_future->get(); + double term_EF=ef_future->get(); + double term_GHIJ=ghij_future->get(); + double term_KLMN=klmn_future->get(); + double mp3_energy=term_CD+term_GHIJ+term_KLMN+term_EF; + if (world.rank()==0) { + printf("term_CD %12.8f\n",term_CD); + printf("term_GHIJ %12.8f\n",term_GHIJ); + printf("term_KLMN %12.8f\n",term_KLMN); + printf("term_EF %12.8f\n",term_EF); + printf("MP3 energy contribution %12.8f\n",mp3_energy); + } + return mp3_energy; return 0.0; } } diff --git a/src/madness/chem/nemo.cc b/src/madness/chem/nemo.cc index 773345b8b3d..2730c2d99e0 100644 --- a/src/madness/chem/nemo.cc +++ b/src/madness/chem/nemo.cc @@ -1094,7 +1094,7 @@ vecfuncT Nemo::make_cphf_constant_term(const size_t iatom, const int iaxis, const int nmo=nemo.size(); const Tensor occ=get_calc()->get_aocc(); - QProjector Q(world,R2nemo,nemo); + QProjector Q(R2nemo,nemo); DNuclear Dunuc(world,this,iatom,iaxis); vecfuncT Vpsi2b=Dunuc(nemo); @@ -1162,7 +1162,7 @@ vecfuncT Nemo::solve_cphf(const size_t iatom, const int iaxis, const Tensor Q(world,R2nemo,nemo); + QProjector Q(R2nemo,nemo); // construct quantities that are independent of xi diff --git a/src/madness/chem/projector.h b/src/madness/chem/projector.h index f40341a5d50..ce348ef35a2 100644 --- a/src/madness/chem/projector.h +++ b/src/madness/chem/projector.h @@ -17,11 +17,15 @@ namespace madness { class ProjectorBase { protected: /// a projector might work only on a subset of dimensions, e.g. P(1) | \psi(1,2) > - int particle=-1; + int particle=-1; // must only be 0 or 1! public: virtual ~ProjectorBase() {} - virtual void set_particle(const int p) {particle=p;} - int get_particle() const {return particle;} + virtual void set_particle(const int p) + { + MADNESS_CHECK_THROW(p==0 or p==1, "particle must be 0 or 1"); + particle=p; + } + virtual int get_particle() const {return particle;} virtual std::string type() const = 0; }; @@ -62,18 +66,35 @@ namespace madness { /// bra and ket spaces are not symmetric (e.g. |ket>^+ = ^+ = = \sum_p |p(particle)> _{particle} /// \f] /// @param[in] f the 6D function to be projected - /// @param[in] the particle that is projected (1 or 2) + /// @param[in] the particle that is projected (0 or 1) /// @return the projected function template typename std::enable_if >::type - operator()(const Function& f, size_t particle1=size_t(-1)) const { - Function result = FunctionFactory(f.world()); - if (particle1==size_t(-1)) particle1=particle; - MADNESS_CHECK_THROW(particle1 == 1 or particle1 == 2, "particle must be 1 or 2"); - for (size_t i = 0; i < mo_ket_.size(); i++) { - Function tmp1 = mo_ket_[i]; - Function tmp2 = f.project_out(mo_bra_[i], particle1 - 1); - Function tmp12; - if (particle1 == 1) { - tmp12 = CompositeFactory(f.world()).particle1(copy(tmp1)).particle2(copy(tmp2)); - tmp12.fill_tree(); - } else { - tmp12 = CompositeFactory(f.world()).particle1(copy(tmp2)).particle2(copy(tmp1)); - tmp12.fill_tree(); - } - result += tmp12; + operator()(const Function& f, int particle1=-1) const { + if (particle1==-1) particle1=get_particle(); + MADNESS_CHECK_THROW(particle1 == 0 or particle1 == 1, "particle must be 0 or 1"); + auto [left,right]=get_vectors_for_outer_product(f); + return hartree_product(left,right); + } + + /// apply the projection parts of the operator on a function f + + /// The operator applied on f(1,2) is + /// O(1)f(1,2) = \sum_i |i(1) > _1 = \sum_i |i(1) f_i(2)> + /// return the lo-dim vectors i and f_i only, perform no outer product + std::pair>,std::vector>> + get_vectors_for_outer_product(const Function& f) const { + World& world=f.world(); + reconstruct(world, mo_bra_, false); + f.reconstruct(false); + reconstruct(world, mo_ket_, true); + std::vector> projected; + for (const auto& i : mo_bra_) { + projected.push_back(f.project_out(i,particle)); + } + if (particle==0) return std::make_pair(mo_ket_,projected); + else if (particle==1) return std::make_pair(projected,mo_ket_); + else { + MADNESS_EXCEPTION("confused particles in Projector::get_vector_for_outer_products",1); } - return result; } template @@ -162,10 +192,17 @@ namespace madness { QProjector() = default; /// constructor with symmetric bra and ket spaces - QProjector(World& world, const vecfuncT& amo) : O(amo) {}; + [[deprecated]] QProjector(World& world, const vecfuncT& amo) : O(amo) {}; + + /// constructor with asymmetric bra and ket spaces + [[deprecated]] QProjector(World& world, const vecfuncT& bra, const vecfuncT& ket) + : O(bra,ket) {}; + + /// constructor with symmetric bra and ket spaces + QProjector(const vecfuncT& amo) : O(amo) {}; /// constructor with asymmetric bra and ket spaces - QProjector(World& world, const vecfuncT& bra, const vecfuncT& ket) + QProjector(const vecfuncT& bra, const vecfuncT& ket) : O(bra,ket) {}; /// copy ctor @@ -173,6 +210,14 @@ namespace madness { std::string type() const override {return "QProjector";} + void set_spaces(const vecfuncT& p) { + O.set_spaces(p); + } + + void set_spaces(const vecfuncT& bra, const vecfuncT& ket) { + O.set_spaces(bra,ket); + } + Function operator()(const Function& rhs) const { return (rhs-O(rhs)).truncate(); } @@ -184,7 +229,7 @@ namespace madness { return result; } - Function operator()(const Function& f, const size_t particle) const { + Function operator()(const Function& f, const size_t particle=-1) const { return f-O(f,particle); } @@ -200,8 +245,12 @@ namespace madness { Projector get_P_projector() const {return O;} void set_particle(const int p) override { - particle=p; O.set_particle(p); + particle=p; + } + + int get_particle() const override { + return O.get_particle(); } private: @@ -249,6 +298,8 @@ namespace madness { bra1_=bra1; ket2_=ket2; bra2_=bra2; + MADNESS_CHECK_THROW(ket1.size()==bra1.size(), "bra1 and ket1 spaces must have the same size in SOprojector"); + MADNESS_CHECK_THROW(ket2.size()==bra2.size(), "bra2 and ket2 spaces must have the same size in SOprojector"); } /// return the orbital space for the ket of particle 1 @@ -367,6 +418,45 @@ namespace madness { std::vector > ket1_, bra1_, ket2_, bra2_; }; + + + /// an outer product of two projectors + template + class OuterProjector : public ProjectorBase { + projT projector0; + projQ projector1; + public: + + OuterProjector() = default; + OuterProjector(const projT& p0, const projQ& p1) : projector0(p0), projector1(p1) { + static_assert(std::is_base_of::value, "projT must be a ProjectorBase"); + static_assert(std::is_base_of::value, "projQ must be a ProjectorBase"); + projector0.set_particle(0); + projector1.set_particle(1); + } + + std::string type() const override { + return "OuterProjector"; + } + + template + resultT operator()(const resultT& argument) const { + + if (projector0.type()=="PProjector") return projector1(projector0(argument)); + return projector0(projector1(argument)); + } + }; + +// template +// OuterProjector outer(const projT& p0 , const projQ& p1) { +// return OuterProjector(p0, p1); +// } + + template + typename std::enable_if::value, OuterProjector>::type + outer(const projT& p0 , const projQ& p1) { + return OuterProjector(p0, p1); + } } #endif /* PROJECTOR_H_ */ diff --git a/src/madness/chem/test_ccpairfunction.cc b/src/madness/chem/test_ccpairfunction.cc index 862c8613e4a..5e9682d86b0 100644 --- a/src/madness/chem/test_ccpairfunction.cc +++ b/src/madness/chem/test_ccpairfunction.cc @@ -195,6 +195,31 @@ int test_constructor(World& world, std::shared_ptr ncf return t1.end(); } +template +int test_norm(World& world, std::shared_ptr ncf, data& data, + const CCParameters& parameter) { + test_output t1("norm of "); + + auto [p1,p2,p3,p4,p5]=data.get_ccpairfunctions(); // p2-p5 correspond to f230 + for (const CCPairFunction& p : {p2,p3,p4,p5}) { + double n=p.norm2(); + print("norm of ",p.name(),n); + double n1=sqrt(inner(p,p)); + print("inner",n1); + t1.checkpoint(n,n1,FunctionDefaults::get_thresh(),"norm of p"); + } + + double n2=p2.norm2(); + double n3=p3.norm2(); + double n4=p4.norm2(); + double n5=p5.norm2(); + t1.checkpoint(n2,n4,FunctionDefaults::get_thresh(),"norm of p2/4"); + t1.checkpoint(n3,n5,FunctionDefaults::get_thresh(),"norm of p3/5"); + + return t1.end(); + +} + template int test_load_store(World& world, std::shared_ptr ncf, data& data, const CCParameters& parameter) { @@ -1133,7 +1158,7 @@ int test_projector(World& world, std::shared_ptr ncf, std::vector> vp3({p3}); Projector O(o,o); - QProjector Q(world,o,o); + QProjector Q(o,o); StrongOrthogonalityProjector Q12(world); Q12.set_spaces(o); @@ -1151,7 +1176,10 @@ int test_projector(World& world, std::shared_ptr ncf, O.set_particle(0); { double ref=inner({CCPairFunction({of1},{f2})},vp[i]); - double result=inner({CCPairFunction({f1},{f2})},O(vp[i])); + auto tmp=O(vp[i]); + t1.checkpoint(tmp.size()==1,"vector size correct"); + t1.checkpoint(tmp[0].is_decomposed(),"O(argument) is decomposed"); + double result=inner({CCPairFunction({f1},{f2})},tmp); t1.checkpoint(result,ref,thresh,"O1 p"+std::to_string(i)); } @@ -1159,7 +1187,10 @@ int test_projector(World& world, std::shared_ptr ncf, O.set_particle(1); { double ref=inner({CCPairFunction({f1},{of2})},vp[i]); - double result=inner({CCPairFunction({f1},{f2})},O(vp[i])); + auto tmp=O(vp[i]); + t1.checkpoint(tmp.size()==1,"vector size correct"); + t1.checkpoint(tmp[0].is_decomposed(),"O(argument) is decomposed"); + double result=inner({CCPairFunction({f1},{f2})},tmp); t1.checkpoint(result,ref,thresh,"O2 p"+std::to_string(i)); } // Q1 @@ -1315,20 +1346,21 @@ int main(int argc, char **argv) { auto data4=data(world,ccparam); auto data6=data(world,ccparam); -// isuccess+=test_constructor(world, ncf, data2, ccparam); -// isuccess+=test_load_store(world,ncf,data2,ccparam); -// isuccess+=test_operator_apply(world, ncf, data2, ccparam); -// isuccess+=test_transformations(world, ncf, data2, ccparam); -// isuccess+=test_multiply_with_f12(world, ncf, data2, ccparam); -// isuccess+=test_inner(world, ncf, data2, ccparam); -// isuccess+=test_multiply(world, ncf, data2, ccparam); -// isuccess+=test_swap_particles(world, ncf, data2, ccparam); -// isuccess+=test_scalar_multiplication(world, ncf, data2, ccparam); + isuccess+=test_constructor(world, ncf, data2, ccparam); + isuccess+=test_load_store(world,ncf,data2,ccparam); + isuccess+=test_operator_apply(world, ncf, data2, ccparam); + isuccess+=test_norm(world,ncf,data2,ccparam); + isuccess+=test_transformations(world, ncf, data2, ccparam); + isuccess+=test_multiply_with_f12(world, ncf, data2, ccparam); + isuccess+=test_inner(world, ncf, data2, ccparam); + isuccess+=test_multiply(world, ncf, data2, ccparam); + isuccess+=test_swap_particles(world, ncf, data2, ccparam); + isuccess+=test_scalar_multiplication(world, ncf, data2, ccparam); isuccess+=test_projector(world, ncf, data2, ccparam); - // isuccess+=test_partial_inner_3d(world, ncf, data2, ccparam); - // isuccess+=test_partial_inner_6d(world, ncf, data2, ccparam); - // isuccess+=test_apply(world, ncf, data2, ccparam); - // isuccess+=test_consolidate(world, ncf, data2, ccparam); + isuccess+=test_partial_inner_3d(world, ncf, data2, ccparam); + isuccess+=test_partial_inner_6d(world, ncf, data2, ccparam); + isuccess+=test_apply(world, ncf, data2, ccparam); + isuccess+=test_consolidate(world, ncf, data2, ccparam); // isuccess+=test_constructor(world, ncf, data4, ccparam); diff --git a/src/madness/chem/test_projector.cc b/src/madness/chem/test_projector.cc index 1cf44fd8147..5aff5b528a4 100644 --- a/src/madness/chem/test_projector.cc +++ b/src/madness/chem/test_projector.cc @@ -70,6 +70,44 @@ int test_projector(World& world) { + return t1.end(); +} + +template +int test_projector_outer(World& world) { + test_output t1("testing projector_outer for dimension " + std::to_string(NDIM)); + constexpr std::size_t LDIM=NDIM/2; + static_assert(2*LDIM==NDIM); + + auto g1=[](const Vector& r){return exp(-inner(r,r));}; + auto g_hidim=[](const Vector& r){return 2.0*exp(-3.0*inner(r,r));}; + Function f1=FunctionFactory(world).f(g1); + Function f_hidim=FunctionFactory(world).f(g_hidim); + + + // compare explicit SO projector Q12 and outer product projector Q1Q2 + StrongOrthogonalityProjector Q1(world); + Q1.set_spaces({f1}); + + QProjector q(world,{f1}); + auto Q2=outer(q,q); + + auto Q1f=Q1(f_hidim); + auto Q2f=Q2(f_hidim); + double err=(Q1f-Q2f).norm2(); + print("error",err); + double norm1=Q1f.norm2(); + double norm2=Q2f.norm2(); + print("norm1/2",norm1,norm2); + double trace1=Q1f.trace(); + double trace2=Q2f.trace(); + print("trace1/2",trace1,trace2); + + t1.checkpoint(norm1-norm2,FunctionDefaults::get_thresh(),"Q1 direct and Q2 outer are the same"); + t1.checkpoint(trace1-trace2,FunctionDefaults::get_thresh(),"Q1 direct and Q2 outer are the same"); + // loosen threshold due to outer product + t1.checkpoint(err,FunctionDefaults::get_thresh()*3.0,"Q1 direct and Q2 outer are the same"); + return t1.end(); } @@ -128,8 +166,8 @@ int test_Q12_projector(World& world) { // SO(f) = f - O1(f) - O2(f) + O1O2(f) Projector O1(vphi); Projector O2(vphi); - O1.set_particle(1); - O2.set_particle(2); + O1.set_particle(0); + O2.set_particle(1); Function f3=f-O1(f)-O2(f)+O1(O2(f)); double err1=(f1-f3).norm2()/f.norm2(); print("err1",err1); @@ -191,6 +229,8 @@ int main(int argc, char**argv) { error+=test_projector(world); error+=test_projector(world); + error+=test_projector_outer(world); + if (HAVE_GENTENSOR) { error+=test_Q12_projector(world); error+=test_Q12_projector(world); diff --git a/src/madness/chem/zcis.h b/src/madness/chem/zcis.h index aee71bfe1ae..6899e937cb2 100644 --- a/src/madness/chem/zcis.h +++ b/src/madness/chem/zcis.h @@ -126,7 +126,7 @@ class Zcis : public QCPropertyInterface { Zcis(World& w, const commandlineparser& parser, std::shared_ptr n) : world(w), cis_param(world, parser), nemo(n), - Qa(world,nemo->amo,nemo->amo), Qb(world,nemo->bmo,nemo->bmo) { + Qa(nemo->amo,nemo->amo), Qb(nemo->bmo,nemo->bmo) { cis_param.print("response","end"); print("Qa projector",Qa.get_ket_vector().size()); print("Qb projector",Qb.get_ket_vector().size()); diff --git a/src/madness/misc/CMakeLists.txt b/src/madness/misc/CMakeLists.txt index f825dcfa6a3..ff5f0a7219b 100644 --- a/src/madness/misc/CMakeLists.txt +++ b/src/madness/misc/CMakeLists.txt @@ -2,7 +2,7 @@ set(MADMISC_HEADERS misc.h ran.h phandler.h interpolation_1d.h cfft.h info.h gnuplot.h) set(MADMISC_SOURCES - checksum_file.cc position_stream.cc gprofexit.cc ran.cc cfft.cc info.cc) + checksum_file.cc position_stream.cc gprofexit.cc ran.cc cfft.cc info.cc unique_filename.cc) # retrieve git metadata include(GetGitMetadata) vgkit_cmake_git_metadata() diff --git a/src/madness/mra/QCCalculationParametersBase.h b/src/madness/mra/QCCalculationParametersBase.h index 9d6d7fbb951..b563911bd89 100644 --- a/src/madness/mra/QCCalculationParametersBase.h +++ b/src/madness/mra/QCCalculationParametersBase.h @@ -388,7 +388,7 @@ class QCCalculationParametersBase { std::transform(key_lower.begin(), key_lower.end(), key_lower.begin(), ::tolower); std::transform(svalue.begin(), svalue.end(), svalue.begin(), ::tolower); std::vector av_lower_vec; - for (auto av : allowed_values) { + for (const T& av : allowed_values) { std::string av_lower=tostring(av); std::transform(av_lower.begin(), av_lower.end(), av_lower.begin(), ::tolower); av_lower_vec.push_back(av_lower); @@ -598,6 +598,7 @@ class QCCalculationParametersBase { static std::string tostring(const T& arg) { using madness::operators::operator<<; std::ostringstream ss; + static_assert(not std::is_same::value, "you need to specialize tostring for this type"); ss< { print("redirecting output to files task.#####"); } - + double cpu0=cpu_time(); cloud.replicate(); universe.gop.fence(); + double cpu1=cpu_time(); + if (printtimings()) print("cloud replication wall time",cpu1-cpu0); if (printdebug()) cloud.print_size(universe); universe.gop.set_forbid_fence(true); // make sure there are no hidden universe fences pmap1=FunctionDefaults<1>::get_pmap(); @@ -363,6 +365,7 @@ class MacroTaskQ : public WorldObject< MacroTaskQ> { // cleanup task-persistent input data for (auto& task : taskq) task->cleanup(); cloud.clear_cache(subworld); + cloud.clear(); subworld.gop.fence(); subworld.gop.fence(); universe.gop.fence(); @@ -503,7 +506,7 @@ class MacroTask { /// constructor takes the actual task MacroTask(World &world, taskT &task, std::shared_ptr taskq_ptr = 0) - : task(task), world(world), taskq_ptr(taskq_ptr) { + : task(task), name(task.name), world(world), taskq_ptr(taskq_ptr) { if (taskq_ptr) { // for the time being this condition must hold because tasks are // constructed as replicated objects and are not broadcast to other processes @@ -638,7 +641,14 @@ class MacroTask { const argtupleT argtuple = cloud.load(subworld, inputrecords); const argtupleT batched_argtuple = task.batch.template copy_input_batch(argtuple); try { + print("starting task no",element, "in subworld",subworld.id(),"at time",wall_time()); + double cpu0=cpu_time(); resultT result_tmp = std::apply(task, batched_argtuple); + double cpu1=cpu_time(); + std::size_t bufsize=256; + char buffer[bufsize]; + std::snprintf(buffer,bufsize,"completed task %3ld after %6.1fs at time %6.1fs\n",element,cpu1-cpu0,wall_time()); + print(std::string(buffer)); resultT result = get_output(subworld, cloud, argtuple); // lives in the universe if constexpr (is_madness_function::value) { @@ -706,6 +716,7 @@ class MacroTask { class MacroTaskOperationBase { public: Batch batch; + std::string name="unknown_task"; std::shared_ptr partitioner=0; MacroTaskOperationBase() : batch(Batch(_, _, _)), partitioner(new MacroTaskPartitioner) {} }; diff --git a/src/madness/mra/mra.h b/src/madness/mra/mra.h index a34d2edd3a5..e14be5fb59d 100644 --- a/src/madness/mra/mra.h +++ b/src/madness/mra/mra.h @@ -496,8 +496,11 @@ namespace madness { /// print some info about this void print_size(const std::string name) const { - if (!impl) print("function",name,"not assigned yet"); - impl->print_size(name); + if (!impl) { + print("function",name,"not assigned yet"); + } else { + impl->print_size(name); + } } /// Returns the maximum depth of the function tree ... collective global sum @@ -2101,8 +2104,10 @@ namespace madness { result.get_impl()->recursive_apply(op, f1[i].get_impl().get(),f2[i].get_impl().get(),false); world.gop.fence(); - result.get_impl()->print_timer(); - op.print_timer(); + if (op.print_timings) { + result.get_impl()->print_timer(); + op.print_timer(); + } result.get_impl()->finalize_apply(); // need fence before reconstruct diff --git a/src/madness/mra/test_cloud.cc b/src/madness/mra/test_cloud.cc index b421e8b1efa..3b2027559ec 100644 --- a/src/madness/mra/test_cloud.cc +++ b/src/madness/mra/test_cloud.cc @@ -31,6 +31,43 @@ struct gaussian { } }; + +/// this class stores different member variables in different records of the cloud +class custom_serialize_tester { +public: + int i; + double d; + + custom_serialize_tester() : i(0), d(0.0) {} + bool operator==(const custom_serialize_tester& other) const { + return i == other.i && d == other.d; + } + + /// customized function to store this to the cloud + + /// functions and constant_part can be very large and we want to split them and store them in differenc records + Recordlist cloud_store(World& world, Cloud& cloud) const { + // save bookkeeping stuff in a vector + std::vector v; + archive::VectorOutputArchive arout(v); + arout & i; + + Recordlist records; + records+=cloud.store(world,v); + records+=cloud.store(world,d); + return records; + } + + void cloud_load(World& world, const Cloud& cloud, Recordlist& recordlist) { + std::vector v=cloud.forward_load>(world,recordlist); + archive::VectorInputArchive arin(v); + arin & i; + d=cloud.forward_load(world,recordlist); + } + + +}; + template double norm(const T i1) { return fabs(i1); } @@ -157,6 +194,35 @@ int test_custom_worldobject(World& universe, World& subworld, Cloud& cloud) { double error=d1-d2; cloud.set_force_load_from_cache(false); return t1.end(error < 1.e-10 ); +} + +int test_custom_serialization(World& universe, Cloud& cloud) { + test_output t1("testing custom serialization"); + t1.set_cout_to_terminal(); + cloud.set_debug(true); + custom_serialize_tester cst; + cst.i=1; + cst.d=2.0; + static_assert(Cloud::has_cloud_serialize::value,"custom_serialize_tester must have a cloud_serialize method"); + { + auto records = cloud.store(universe, cst); + auto cst2=cloud.load(universe, records); + t1.checkpoint(cst==cst2,"custom serialization"); + } + + // test being part of a tuple + typedef std::tuple tupleT; + tupleT tuple1=std::make_tuple(1,2.0,cst); + cloud.clear(); + { + auto records = cloud.store(universe, tuple1); + auto tuple2=cloud.load(universe, records); + + t1.checkpoint(tuple1==tuple2,"custom serialization with tuple"); + } + + return t1.end(); + } @@ -177,6 +243,7 @@ int main(int argc, char **argv) { // test storing custom WorldObject success += test_custom_worldobject(universe, subworld, cloud); + success += test_custom_serialization(universe, cloud); if (universe.rank() == 0) print("entering test_cloud"); print("my world: universe_rank, subworld_id", universe.rank(), subworld.id()); diff --git a/src/madness/tensor/srconf.h b/src/madness/tensor/srconf.h index 9ecb4858b48..6e5797c60d8 100644 --- a/src/madness/tensor/srconf.h +++ b/src/madness/tensor/srconf.h @@ -663,7 +663,7 @@ namespace madness { public: /// return the number of physical dimensions int dim_per_vector(int idim) const { - MADNESS_ASSERT(vector_.size()>idim); + MADNESS_ASSERT(vector_.size()>size_t(idim)); return vector_[idim].ndim()-1; // remove dimension for the rank } diff --git a/src/madness/world/cloud.h b/src/madness/world/cloud.h index ae05897c9f8..f5638351863 100644 --- a/src/madness/world/cloud.h +++ b/src/madness/world/cloud.h @@ -68,6 +68,13 @@ struct Recordlist { template using has_member_id = madness::meta::is_detected; + // if type provides a hashing function use that, intrusive hashing, see worldhash.h + template + using member_hash_t = decltype(std::declval().hash()); + + template + using has_member_hash = madness::meta::is_detected; + template static keyT compute_record(const Function& arg) {return hash_value(arg.get_impl()->id());} @@ -99,7 +106,10 @@ struct Recordlist { } else if constexpr (std::is_pointer_v && has_member_id>::value) { return hash_value(arg->id()); } else { - return hash_value(arg); + // compute hash_code for fundamental types + std::size_t hashtype = typeid(T).hash_code(); + hash_combine(hashtype,hash_value(arg)); + return hashtype; } } @@ -154,6 +164,13 @@ class Cloud { cacheT cached_objects; recordlistT local_list_of_container_keys; // a world-local list of keys occupied in container +public: + template + using member_cloud_serialize_t = decltype(std::declval().cloud_store(std::declval(), std::declval())); + + template + using has_cloud_serialize = madness::meta::is_detected; + public: /// @param[in] universe the universe world @@ -176,12 +193,17 @@ class Cloud { void print_size(World& universe) { std::size_t memsize=0; - for (auto& item : container) memsize+=item.second.size(); + std::size_t max_record_size=0; + for (auto& item : container) { + memsize+=item.second.size(); + max_record_size=std::max(max_record_size,item.second.size()); + } std::size_t global_memsize=memsize; std::size_t max_memsize=memsize; std::size_t min_memsize=memsize; universe.gop.sum(global_memsize); universe.gop.max(max_memsize); + universe.gop.max(max_record_size); universe.gop.min(min_memsize); auto local_size=container.size(); @@ -193,25 +215,25 @@ class Cloud { print("Cloud memory:"); print(" replicated:",is_replicated); print("size of cloud (total)"); - print(" number of records:",global_size); - print(" memory in GBytes: ",global_memsize*byte2gbyte); + print(" number of records: ",global_size); + print(" memory in GBytes: ",global_memsize*byte2gbyte); print("size of cloud (average per node)"); - print(" number of records:",double(global_size)/universe.size()); - print(" memory in GBytes: ",global_memsize*byte2gbyte/universe.size()); + print(" number of records: ",double(global_size)/universe.size()); + print(" memory in GBytes: ",global_memsize*byte2gbyte/universe.size()); print("min/max of node"); - print(" memory in GBytes: ",min_memsize*byte2gbyte,max_memsize*byte2gbyte); + print(" memory in GBytes: ",min_memsize*byte2gbyte,max_memsize*byte2gbyte); + print(" max record size in GBytes:",max_record_size*byte2gbyte); + } } void print_timings(World &universe) const { double rtime = double(reading_time); double wtime = double(writing_time); - double wtime1 = double(writing_time1); double ptime = double(replication_time); - universe.gop.max(rtime); - universe.gop.max(wtime); - universe.gop.max(wtime1); - universe.gop.max(ptime); + universe.gop.sum(rtime); + universe.gop.sum(wtime); + universe.gop.sum(ptime); long creads = long(cache_reads); long cstores = long(cache_stores); universe.gop.sum(creads); @@ -219,10 +241,9 @@ class Cloud { if (universe.rank() == 0) { auto precision = std::cout.precision(); std::cout << std::fixed << std::setprecision(1); - print("cloud storing wall time", wtime * 0.001); - print("cloud storing wall time inner loop", wtime1 * 0.001); - print("cloud replication wall time", ptime * 0.001); - print("cloud reading wall time", rtime * 0.001, std::defaultfloat); + print("cloud storing cpu time", wtime * 0.001); + print("cloud replication cpu time", ptime * 0.001); + print("cloud reading cpu time", rtime * 0.001, std::defaultfloat); std::cout << std::setprecision(precision) << std::scientific; print("cloud cache stores ", long(cstores)); print("cloud cache loads ", long(creads)); @@ -234,6 +255,10 @@ class Cloud { subworld.gop.fence(); } + void clear() { + container.clear(); + } + void clear_timings() { reading_time=0l; writing_time=0l; @@ -249,10 +274,26 @@ class Cloud { T load(madness::World &world, const recordlistT recordlist) const { recordlistT rlist = recordlist; cloudtimer t(world, reading_time); + + // forward_load will consume the recordlist while loading elements + return forward_load(world, rlist); + } + + /// load a single object from the cloud, recordlist is consumed while loading elements + template + T forward_load(madness::World &world, recordlistT& recordlist) const { + // different objects are stored in different ways + // - tuples are split up into their components + // - classes with their own cloud serialization are stored using that + // - everything else is stored using their usual serialization if constexpr (is_tuple::value) { - return load_tuple(world, rlist); + return load_tuple(world, recordlist); + } else if constexpr (has_cloud_serialize::value) { + T target = allocator(world); + target.cloud_load(world, *this, recordlist); + return target; } else { - return load_other(world, rlist); + return do_load(world, recordlist); } } @@ -264,9 +305,16 @@ class Cloud { MADNESS_EXCEPTION("cloud error",1); } cloudtimer t(world,writing_time); + + // different objects are stored in different ways + // - tuples are split up into their components + // - classes with their own cloud serialization are stored using that + // - everything else is stored using their usual serialization recordlistT recordlist; if constexpr (is_tuple::value) { recordlist+=store_tuple(world,source); + } else if constexpr (has_cloud_serialize::value) { + recordlist+=source.cloud_store(world,*this); } else { recordlist+=store_other(world,source); } @@ -371,17 +419,16 @@ class Cloud { } }; - template void cache(madness::World &world, const T &obj, const keyT &record) const { const_cast(cached_objects).insert({record,std::make_any(obj)}); } + /// load an object from the cache, record is unchanged template T load_from_cache(madness::World &world, const keyT &record) const { if (world.rank()==0) cache_reads++; if (debug) print("loading", typeid(T).name(), "from cache record", record, "to world", world.id()); -// if (auto obj = std::get_if(&cached_objects.find(record)->second)) return *obj; if (auto obj = std::any_cast(&cached_objects.find(record)->second)) return *obj; MADNESS_EXCEPTION("failed to load from cloud-cache", 1); T target = allocator(world); @@ -417,7 +464,6 @@ class Cloud { bool is_already_present= is_in_container(record); if (debug) { if (is_already_present) std::cout << "skipping "; - std::string msg; if constexpr (Recordlist::has_member_id::value) { std::cout << "storing world object of " << typeid(T).name() << "id " << source.id() << " to record " << record << std::endl; } @@ -438,20 +484,29 @@ class Cloud { return recordlistT{record}; } +public: + /// load a vector from the cloud, pop records from recordlist + /// + /// @param[inout] world destination world + /// @param[inout] recordlist list of records to load from (reduced by the first few elements) template typename std::enable_if::value, T>::type - load_other(World &world, recordlistT &recordlist) const { - std::size_t sz = load_other(world, recordlist); + do_load(World &world, recordlistT &recordlist) const { + std::size_t sz = do_load(world, recordlist); T target(sz); for (std::size_t i = 0; i < sz; ++i) { - target[i] = load_other(world, recordlist); + target[i] = do_load(world, recordlist); } return target; } + /// load a single object from the cloud, pop record from recordlist + /// + /// @param[inout] world destination world + /// @param[inout] recordlist list of records to load from (reduced by the first element) template typename std::enable_if::value, T>::type - load_other(World &world, recordlistT &recordlist) const { + do_load(World &world, recordlistT &recordlist) const { keyT record = recordlist.pop_front_and_return(); if (force_load_from_cache) MADNESS_CHECK(is_cached(record)); @@ -465,6 +520,8 @@ class Cloud { return target; } +public: + // overloaded template recordlistT store_other(madness::World& world, const std::vector& source) { @@ -491,12 +548,16 @@ class Cloud { return v; } + /// load a tuple from the cloud, pop records from recordlist + /// + /// @param[inout] world destination world + /// @param[inout] recordlist list of records to load from (reduced by the first few elements) template T load_tuple(madness::World &world, recordlistT &recordlist) const { if (debug) std::cout << "loading tuple of type " << typeid(T).name() << " to world " << world.id() << std::endl; T target; std::apply([&](auto &&... args) { - ((args = load_other::type>(world, recordlist)), ...); + ((args = forward_load::type>(world, recordlist)), ...); }, target); return target; } diff --git a/src/madness/world/parallel_archive.h b/src/madness/world/parallel_archive.h index bcfedf9e2af..45cb9c8a61c 100644 --- a/src/madness/world/parallel_archive.h +++ b/src/madness/world/parallel_archive.h @@ -104,7 +104,7 @@ namespace madness { /// \return The process doing I/O for this node. ProcessID my_io_node() const { - MADNESS_ASSERT(world); + MADNESS_CHECK(world); return io_node(world->rank()); } @@ -112,7 +112,7 @@ namespace madness { /// \return The number of I/O clients for this node, including self (zero if not an I/O node). int num_io_clients() const { - MADNESS_ASSERT(world); + MADNESS_CHECK(world); return nclient; } @@ -120,7 +120,7 @@ namespace madness { /// \return True if this node is doing physical I/O. bool is_io_node() const { - MADNESS_ASSERT(world); + MADNESS_CHECK(world); return world->rank() == my_io_node(); } @@ -128,7 +128,7 @@ namespace madness { /// \return A pointer to the world. World* get_world() const { - MADNESS_ASSERT(world); + MADNESS_CHECK(world); return world; } @@ -166,12 +166,12 @@ namespace madness { if (nio > maxio) nio = maxio; // Sanity? if (nio > world.size()) nio = world.size(); - MADNESS_ASSERT(filename); - MADNESS_ASSERT(strlen(filename)-1::value || std::is_same::value, bool> exists(World& world, const char* filename) { - constexpr std::size_t bufsize=256; + constexpr std::size_t bufsize=512; char buf[bufsize]; - MADNESS_ASSERT(strlen(filename)+7 <= sizeof(buf)); + MADNESS_CHECK(strlen(filename)+7 <= sizeof(buf)); snprintf(buf,bufsize, "%s.%5.5d", filename, world.rank()); bool status; if (world.rank() == 0) @@ -237,7 +237,7 @@ namespace madness { /// Closes the parallel archive. void close() { - MADNESS_ASSERT(world); + MADNESS_CHECK(world); if (is_io_node()) ar.close(); } @@ -246,8 +246,8 @@ namespace madness { /// \throw MadnessException If not an I/O node. /// \return A reference to the local archive. Archive& local_archive() const { - MADNESS_ASSERT(world); - MADNESS_ASSERT(is_io_node()); + MADNESS_CHECK(world); + MADNESS_CHECK(is_io_node()); return ar; } @@ -273,9 +273,9 @@ namespace madness { void> remove(World& world, const char* filename) { if (world.rank() == 0) { - constexpr std::size_t bufsize=268; + constexpr std::size_t bufsize=512; char buf[bufsize]; - MADNESS_ASSERT(strlen(filename)+7 <= sizeof(buf)); + MADNESS_CHECK(strlen(filename)+7 <= sizeof(buf)); for (ProcessID p=0; p + struct ArchivePrePostImpl { + /// Store the preamble. + + /// \param[in] ar The archive. + static void preamble_store(const ContainerRecordOutputArchive& ar) {}; + + /// Store the postamble. + + /// \param[in] ar The archive. + static inline void postamble_store(const ContainerRecordOutputArchive& ar) {}; + }; + + /// Implementation of functions for loading the pre/postamble in ContainerRecord archives. + + /// \attention No type checking over ContainerRecord buffers, for efficiency. + /// \tparam T The data type. + template + struct ArchivePrePostImpl { + /// Load the preamble. + + /// \param[in] ar The archive. + static inline void preamble_load(const ContainerRecordInputArchive& ar) {}; + + /// Load the postamble. + + /// \param[in] ar The archive. + static inline void postamble_load(const ContainerRecordInputArchive& ar) {}; + }; + + // Forward storing to VectorOutputArchive template struct ArchiveStoreImpl< ParallelOutputArchive, WorldContainer > { static void store(const ParallelOutputArchive& ar, const WorldContainer& t) { - ParallelOutputArchive par(*(ar.get_world()), ar.local_archive().get_archive()); + std::vector v; + VectorOutputArchive dummyar(v,0); + const int me = ar.get_world()->rank(); + + // Need to pass local archive by reference + ParallelOutputArchive par(*(ar.get_world()), (me==0) ? ar.local_archive().get_archive() : dummyar); par & t; } }; + + } diff --git a/src/madness/world/test_dc.cc b/src/madness/world/test_dc.cc index bf03c72ee6f..f908df1dc64 100644 --- a/src/madness/world/test_dc.cc +++ b/src/madness/world/test_dc.cc @@ -270,7 +270,16 @@ void test_local(World& world) { void test_florian(World& world) { WorldContainer c(world); - long nlarge=200000; + + long nlarge=20000; + // get nlarge variable from the environment and convert it into long + char* nlarge_env = getenv("NLARGE"); + if (nlarge_env) { + nlarge = atol(nlarge_env); + } + if (world.rank()==0) print("size of the container",nlarge); + + if (world.rank() == 0) { for (int i=0; i v; { archive::VectorOutputArchive var(v); @@ -287,7 +296,7 @@ void test_florian(World& world) { ar & c; } double wall1=wall_time(); - printf("ending at time %8.4f after %8.4fs\n",wall1,wall1-wall0); + if (world.rank() == 0) printf("ending at time %8.4f after %8.4fs\n",wall1,wall1-wall0); WorldContainer c2(world); { @@ -303,7 +312,7 @@ void test_florian(World& world) { } world.gop.fence(); - print("test_florian passed"); + if (world.rank() == 0) print("test_florian passed"); } int main(int argc, char** argv) { diff --git a/src/madness/world/vector_archive.h b/src/madness/world/vector_archive.h index 129c35e44e9..3530a5a258d 100644 --- a/src/madness/world/vector_archive.h +++ b/src/madness/world/vector_archive.h @@ -144,6 +144,40 @@ namespace madness { void close() {} }; + /// Implementation of functions for storing the pre/postamble in Vector archives. + + /// \attention No type checking over Vector buffers, for efficiency. + /// \tparam T The data type. + template + struct ArchivePrePostImpl { + /// Store the preamble. + + /// \param[in] ar The archive. + static void preamble_store(const VectorOutputArchive& ar) {}; + + /// Store the postamble. + + /// \param[in] ar The archive. + static inline void postamble_store(const VectorOutputArchive& ar) {}; + }; + + /// Implementation of functions for loading the pre/postamble in Vector archives. + + /// \attention No type checking over Vector buffers, for efficiency. + /// \tparam T The data type. + template + struct ArchivePrePostImpl { + /// Load the preamble. + + /// \param[in] ar The archive. + static inline void preamble_load(const VectorInputArchive& ar) {}; + + /// Load the postamble. + + /// \param[in] ar The archive. + static inline void postamble_load(const VectorInputArchive& ar) {}; + }; + /// @} } } diff --git a/src/madness/world/worlddc.h b/src/madness/world/worlddc.h index 671e2964071..4cf178cafda 100644 --- a/src/madness/world/worlddc.h +++ b/src/madness/world/worlddc.h @@ -48,7 +48,8 @@ #include #include -namespace madness { +namespace madness +{ template class WorldContainer; @@ -57,37 +58,40 @@ namespace madness { class WorldContainerImpl; template - void swap(WorldContainer&, WorldContainer&); + void swap(WorldContainer &, WorldContainer &); template class WorldDCPmapInterface; template - class WorldDCRedistributeInterface { + class WorldDCRedistributeInterface + { public: virtual std::size_t size() const = 0; - virtual void redistribute_phase1(const std::shared_ptr< WorldDCPmapInterface >& newmap) = 0; + virtual void redistribute_phase1(const std::shared_ptr> &newmap) = 0; virtual void redistribute_phase2() = 0; virtual void redistribute_phase3() = 0; - virtual ~WorldDCRedistributeInterface() {}; + virtual ~WorldDCRedistributeInterface() {}; }; - /// Interface to be provided by any process map /// \ingroup worlddc template - class WorldDCPmapInterface { + class WorldDCPmapInterface + { public: - typedef WorldDCRedistributeInterface* ptrT; + typedef WorldDCRedistributeInterface *ptrT; + private: std::set ptrs; + public: /// Maps key to processor /// @param[in] key Key for container /// @return Processor that logically owns the key - virtual ProcessID owner(const keyT& key) const = 0; + virtual ProcessID owner(const keyT &key) const = 0; virtual ~WorldDCPmapInterface() {} @@ -96,14 +100,16 @@ namespace madness { /// Registers object for receipt of redistribute callbacks /// @param[in] ptr Pointer to class derived from WorldDCRedistributedInterface - void register_callback(ptrT ptr) { + void register_callback(ptrT ptr) + { ptrs.insert(ptr); } /// Deregisters object for receipt of redistribute callbacks /// @param[in] ptr Pointer to class derived from WorldDCRedistributedInterface - void deregister_callback(ptrT ptr) { + void deregister_callback(ptrT ptr) + { ptrs.erase(ptr); } @@ -113,26 +119,30 @@ namespace madness { /// new map and no objects will be registered in the current map. /// @param[in] world The associated world /// @param[in] newpmap The new process map - void redistribute(World& world, const std::shared_ptr< WorldDCPmapInterface >& newpmap) { + void redistribute(World &world, const std::shared_ptr> &newpmap) + { print_data_sizes(world, "before redistributing"); world.gop.fence(); for (typename std::set::iterator iter = ptrs.begin(); iter != ptrs.end(); - ++iter) { + ++iter) + { (*iter)->redistribute_phase1(newpmap); } world.gop.fence(); for (typename std::set::iterator iter = ptrs.begin(); iter != ptrs.end(); - ++iter) { + ++iter) + { (*iter)->redistribute_phase2(); newpmap->register_callback(*iter); } world.gop.fence(); for (typename std::set::iterator iter = ptrs.begin(); iter != ptrs.end(); - ++iter) { - (*iter)->redistribute_phase3(); + ++iter) + { + (*iter)->redistribute_phase3(); } world.gop.fence(); ptrs.clear(); @@ -142,7 +152,8 @@ namespace madness { /// Counts global number of entries in all containers associated with this process map /// Collective operation with global fence - std::size_t global_size(World& world) const { + std::size_t global_size(World &world) const + { world.gop.fence(); std::size_t sum = local_size(); world.gop.sum(sum); @@ -151,9 +162,11 @@ namespace madness { } /// Counts local number of entries in all containers associated with this process map - std::size_t local_size() const { + std::size_t local_size() const + { std::size_t sum = 0; - for (typename std::set::iterator iter = ptrs.begin(); iter != ptrs.end(); ++iter) { + for (typename std::set::iterator iter = ptrs.begin(); iter != ptrs.end(); ++iter) + { sum += (*iter)->size(); } return sum; @@ -162,17 +175,20 @@ namespace madness { /// Prints size info to std::cout /// Collective operation with global fence - void print_data_sizes(World& world, const std::string msg="") const { + void print_data_sizes(World &world, const std::string msg = "") const + { world.gop.fence(); std::size_t total = global_size(world); std::vector sizes(world.size()); sizes[world.rank()] = local_size(); - world.gop.sum(&sizes[0],world.size()); - if (world.rank() == 0) { + world.gop.sum(&sizes[0], world.size()); + if (world.rank() == 0) + { madness::print("data distribution info", msg); madness::print(" total: ", total); std::cout << " procs: "; - for (int i=0; i > - class WorldDCDefaultPmap : public WorldDCPmapInterface { + template > + class WorldDCDefaultPmap : public WorldDCPmapInterface + { private: const int nproc; hashfunT hashfun; + public: - WorldDCDefaultPmap(World& world, const hashfunT& hf = hashfunT()) : - nproc(world.mpi.nproc()), - hashfun(hf) - { } + WorldDCDefaultPmap(World &world, const hashfunT &hf = hashfunT()) : nproc(world.mpi.nproc()), + hashfun(hf) + { + } - ProcessID owner(const keyT& key) const { - if (nproc == 1) return 0; - return hashfun(key)%nproc; + ProcessID owner(const keyT &key) const + { + if (nproc == 1) + return 0; + return hashfun(key) % nproc; } }; /// Local process map will always return the current process as owner /// \ingroup worlddc - template > - class WorldDCLocalPmap : public WorldDCPmapInterface { + template > + class WorldDCLocalPmap : public WorldDCPmapInterface + { private: - ProcessID me; + ProcessID me; + public: - WorldDCLocalPmap(World& world) : me(world.rank()) { } - ProcessID owner(const keyT& key) const { - return me; - } + WorldDCLocalPmap(World &world) : me(world.rank()) {} + ProcessID owner(const keyT &key) const + { + return me; + } }; /// Iterator for distributed container wraps the local iterator /// \ingroup worlddc template - class WorldContainerIterator { + class WorldContainerIterator + { public: - typedef typename std::iterator_traits::iterator_category iterator_category; - typedef typename std::iterator_traits::value_type value_type; - typedef typename std::iterator_traits::difference_type difference_type; - typedef typename std::iterator_traits::pointer pointer; - typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::iterator_category iterator_category; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::difference_type difference_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::reference reference; private: - internal_iteratorT it; ///< Iterator from local container + internal_iteratorT it; ///< Iterator from local container // TODO: Convert this to a scoped pointer. - mutable value_type* value; ///< holds the remote values + mutable value_type *value; ///< holds the remote values public: /// Default constructor makes a local uninitialized value explicit WorldContainerIterator() - : it(), value(nullptr) {} + : it(), value(nullptr) {} /// Initializes from a local iterator - explicit WorldContainerIterator(const internal_iteratorT& it) - : it(it), value(nullptr) {} + explicit WorldContainerIterator(const internal_iteratorT &it) + : it(it), value(nullptr) {} /// Initializes to cache a remote value - explicit WorldContainerIterator(const value_type& v) - : it(), value(nullptr) + explicit WorldContainerIterator(const value_type &v) + : it(), value(nullptr) { value = new value_type(v); } - WorldContainerIterator(const WorldContainerIterator& other) - : it(), value(nullptr) + WorldContainerIterator(const WorldContainerIterator &other) + : it(), value(nullptr) { copy(other); } template - WorldContainerIterator(const WorldContainerIterator& other) - : it(), value(nullptr) + WorldContainerIterator(const WorldContainerIterator &other) + : it(), value(nullptr) { copy(other); } - ~WorldContainerIterator() { + ~WorldContainerIterator() + { delete value; } /// Assignment - WorldContainerIterator& operator=(const WorldContainerIterator& other) { + WorldContainerIterator &operator=(const WorldContainerIterator &other) + { copy(other); return *this; } /// Determines if two iterators are identical - bool operator==(const WorldContainerIterator& other) const { + bool operator==(const WorldContainerIterator &other) const + { return (((!is_cached()) && (!other.is_cached())) && it == other.it) || - ((is_cached() && other.is_cached()) && value->first == other.value->first); + ((is_cached() && other.is_cached()) && value->first == other.value->first); } - /// Determines if two iterators are different - bool operator!=(const WorldContainerIterator& other) const { + bool operator!=(const WorldContainerIterator &other) const + { return !(*this == other); } - /// Pre-increment of an iterator (i.e., ++it) --- \em local iterators only /// Trying to increment a remote iterator will throw - WorldContainerIterator& operator++() { - MADNESS_ASSERT( !is_cached() ); + WorldContainerIterator &operator++() + { + MADNESS_ASSERT(!is_cached()); ++it; return *this; } - WorldContainerIterator operator++(int) { - MADNESS_ASSERT( !is_cached() ); + WorldContainerIterator operator++(int) + { + MADNESS_ASSERT(!is_cached()); WorldContainerIterator result(*this); ++it; return result; } /// Iterators dereference to std::pair - pointer operator->() const { - return (is_cached() ? value : it.operator->() ); + pointer operator->() const + { + return (is_cached() ? value : it.operator->()); } /// Iterators dereference to std::pair - reference operator*() const { - return (is_cached() ? *value : *it ); + reference operator*() const + { + return (is_cached() ? *value : *it); } /// Private: (or should be) Returns iterator of internal container - const internal_iteratorT& get_internal_iterator() const { + const internal_iteratorT &get_internal_iterator() const + { return it; } /// Returns true if this is non-local or cached value - bool is_cached() const { + bool is_cached() const + { return value != nullptr; } template - void serialize(const Archive&) { + void serialize(const Archive &) + { MADNESS_EXCEPTION("Serializing DC iterator ... why?", false); } @@ -328,13 +361,18 @@ namespace madness { friend class WorldContainerIterator; template - void copy(const WorldContainerIterator& other) { - if (static_cast(this) != static_cast(&other)) { + void copy(const WorldContainerIterator &other) + { + if (static_cast(this) != static_cast(&other)) + { delete value; - if(other.is_cached()) { - value = new value_type(* other.value); + if (other.is_cached()) + { + value = new value_type(*other.value); it = internal_iteratorT(); - } else { + } + else + { it = other.it; value = nullptr; } @@ -345,22 +383,23 @@ namespace madness { /// Internal implementation of distributed container to facilitate shallow copy /// \ingroup worlddc - template + template class WorldContainerImpl - : public WorldObject< WorldContainerImpl > - , public WorldDCRedistributeInterface + : public WorldObject>, + public WorldDCRedistributeInterface #ifndef MADNESS_DISABLE_SHARED_FROM_THIS - , public std::enable_shared_from_this > + , + public std::enable_shared_from_this> #endif // MADNESS_DISABLE_SHARED_FROM_THIS { public: - typedef typename std::pair pairT; + typedef typename std::pair pairT; typedef const pairT const_pairT; - typedef WorldContainerImpl implT; + typedef WorldContainerImpl implT; - typedef ConcurrentHashMap< keyT,valueT,hashfunT > internal_containerT; + typedef ConcurrentHashMap internal_containerT; - //typedef WorldObject< WorldContainerImpl > worldobjT; + // typedef WorldObject< WorldContainerImpl > worldobjT; typedef typename internal_containerT::iterator internal_iteratorT; typedef typename internal_containerT::const_iterator internal_const_iteratorT; @@ -371,82 +410,87 @@ namespace madness { typedef WorldContainerIterator const_iteratorT; typedef WorldContainerIterator const_iterator; - friend class WorldContainer; + friend class WorldContainer; -// template -// inline -// static -// typename containerT::iterator replace(containerT& c, const datumT& d) { -// std::pair p = c.insert(d); -// if (!p.second) p.first->second = d.second; // Who's on first? -// return p.first; -// } + // template + // inline + // static + // typename containerT::iterator replace(containerT& c, const datumT& d) { + // std::pair p = c.insert(d); + // if (!p.second) p.first->second = d.second; // Who's on first? + // return p.first; + // } private: + WorldContainerImpl(); // Inhibit default constructor - WorldContainerImpl(); // Inhibit default constructor - - std::shared_ptr< WorldDCPmapInterface > pmap;///< Function/class to map from keys to owning process - const ProcessID me; ///< My MPI rank - internal_containerT local; ///< Locally owned data - std::vector* move_list; ///< Tempoary used to record data that needs redistributing + std::shared_ptr> pmap; ///< Function/class to map from keys to owning process + const ProcessID me; ///< My MPI rank + internal_containerT local; ///< Locally owned data + std::vector *move_list; ///< Tempoary used to record data that needs redistributing /// Handles find request - void find_handler(ProcessID requestor, const keyT& key, const RemoteReference< FutureImpl >& ref) { + void find_handler(ProcessID requestor, const keyT &key, const RemoteReference> &ref) + { internal_iteratorT r = local.find(key); - if (r == local.end()) { - //print("find_handler: failure:", key); + if (r == local.end()) + { + // print("find_handler: failure:", key); this->send(requestor, &implT::find_failure_handler, ref); } - else { - //print("find_handler: success:", key, r->first, r->second); + else + { + // print("find_handler: success:", key, r->first, r->second); this->send(requestor, &implT::find_success_handler, ref, *r); } } /// Handles successful find response - void find_success_handler(const RemoteReference< FutureImpl >& ref, const pairT& datum) { - FutureImpl* f = ref.get(); + void find_success_handler(const RemoteReference> &ref, const pairT &datum) + { + FutureImpl *f = ref.get(); f->set(iterator(datum)); - //print("find_success_handler: success:", datum.first, datum.second, f->get()->first, f->get()->second); - // Todo: Look at this again. -// ref.reset(); // Matching inc() in find() where ref was made + // print("find_success_handler: success:", datum.first, datum.second, f->get()->first, f->get()->second); + // Todo: Look at this again. + // ref.reset(); // Matching inc() in find() where ref was made } /// Handles unsuccessful find response - void find_failure_handler(const RemoteReference< FutureImpl >& ref) { - FutureImpl* f = ref.get(); + void find_failure_handler(const RemoteReference> &ref) + { + FutureImpl *f = ref.get(); f->set(end()); - //print("find_failure_handler"); - // Todo: Look at this again. -// ref.reset(); // Matching inc() in find() where ref was made + // print("find_failure_handler"); + // Todo: Look at this again. + // ref.reset(); // Matching inc() in find() where ref was made } public: - - WorldContainerImpl(World& world, - const std::shared_ptr< WorldDCPmapInterface >& pm, - const hashfunT& hf) - : WorldObject< WorldContainerImpl >(world) - , pmap(pm) - , me(world.mpi.rank()) - , local(5011, hf) { + WorldContainerImpl(World &world, + const std::shared_ptr> &pm, + const hashfunT &hf) + : WorldObject>(world), pmap(pm), me(world.mpi.rank()), local(5011, hf) + { pmap->register_callback(this); } - virtual ~WorldContainerImpl() { + virtual ~WorldContainerImpl() + { pmap->deregister_callback(this); } - const std::shared_ptr< WorldDCPmapInterface >& get_pmap() const { + const std::shared_ptr> &get_pmap() const + { return pmap; } - std::shared_ptr< WorldDCPmapInterface >& get_pmap() { + std::shared_ptr> &get_pmap() + { return pmap; } - void reset_pmap_to_local() { + void reset_pmap_to_local() + { pmap->deregister_callback(this); pmap.reset(new WorldDCLocalPmap(this->get_world())); pmap->register_callback(this); @@ -454,51 +498,61 @@ namespace madness { /// replicates this WorldContainer on all ProcessIDs and generates a /// ProcessMap where all nodes are local - void replicate(bool fence) { - - World& world=this->get_world(); - pmap->deregister_callback(this); - pmap.reset(new WorldDCLocalPmap(world)); - pmap->register_callback(this); - - for (ProcessID rank=0; rankfirst; - valueT value = it->second; - world.gop.broadcast_serializable(key, rank); - world.gop.broadcast_serializable(value, rank); - } - } - else { - size_t sz; - world.gop.broadcast_serializable(sz, rank); - for (size_t i=0; iget_world(); + pmap->deregister_callback(this); + pmap.reset(new WorldDCLocalPmap(world)); + pmap->register_callback(this); + + for (ProcessID rank = 0; rank < world.size(); rank++) + { + if (rank == world.rank()) + { + std::size_t sz = size(); + world.gop.broadcast_serializable(sz, rank); + + for (auto it = begin(); it != end(); ++it) + { + keyT key = it->first; + valueT value = it->second; + world.gop.broadcast_serializable(key, rank); + world.gop.broadcast_serializable(value, rank); + } + } + else + { + size_t sz; + world.gop.broadcast_serializable(sz, rank); + for (size_t i = 0; i < sz; i++) + { + keyT key; + valueT value; + world.gop.broadcast_serializable(key, rank); + world.gop.broadcast_serializable(value, rank); + insert(pairT(key, value)); + } + } + } + if (fence) + world.gop.fence(); + } + + hashfunT &get_hash() const { return local.get_hash(); } + + bool is_local(const keyT &key) const + { return owner(key) == me; } - ProcessID owner(const keyT& key) const { + ProcessID owner(const keyT &key) const + { return pmap->owner(key); } - bool probe(const keyT& key) const { + bool probe(const keyT &key) const + { ProcessID dest = owner(key); if (dest == me) return local.find(key) != local.end(); @@ -506,123 +560,146 @@ namespace madness { return false; } - std::size_t size() const { + std::size_t size() const + { return local.size(); } - void insert(const pairT& datum) { + void insert(const pairT &datum) + { ProcessID dest = owner(datum.first); - if (dest == me) { + if (dest == me) + { // Was using iterator ... try accessor ????? accessor acc; // N.B. key might already exist if want to simply replace - [[maybe_unused]] auto inserted = local.insert(acc,datum.first); + [[maybe_unused]] auto inserted = local.insert(acc, datum.first); acc->second = datum.second; } - else { - // Must be send (not task) for sequential consistency (and relies on single-threaded remote server) + else + { + // Must be send (not task) for sequential consistency (and relies on single-threaded remote server) this->send(dest, &implT::insert, datum); } } - bool insert_acc(accessor& acc, const keyT& key) { + bool insert_acc(accessor &acc, const keyT &key) + { MADNESS_ASSERT(owner(key) == me); - return local.insert(acc,key); + return local.insert(acc, key); } - bool insert_const_acc(const_accessor& acc, const keyT& key) { + bool insert_const_acc(const_accessor &acc, const keyT &key) + { MADNESS_ASSERT(owner(key) == me); - return local.insert(acc,key); + return local.insert(acc, key); } - void clear() { + void clear() + { local.clear(); } - - void erase(const keyT& key) { + void erase(const keyT &key) + { ProcessID dest = owner(key); - if (dest == me) { + if (dest == me) + { [[maybe_unused]] auto erased = local.try_erase(key); MADNESS_ASSERT(erased); } - else { - void(implT::*eraser)(const keyT&) = &implT::erase; + else + { + void (implT::*eraser)(const keyT &) = &implT::erase; this->send(dest, eraser, key); } } template - void erase(InIter it) { + void erase(InIter it) + { MADNESS_ASSERT(!it.is_cached()); MADNESS_ASSERT(it != end()); erase(it->first); } template - void erase(InIter first, InIter last) { + void erase(InIter first, InIter last) + { InIter it = first; - do { + do + { first++; erase(it->first); it = first; - } while(first != last); + } while (first != last); } - iterator begin() { + iterator begin() + { return iterator(local.begin()); } - const_iterator begin() const { + const_iterator begin() const + { return const_iterator(local.begin()); } - iterator end() { + iterator end() + { return iterator(local.end()); } - const_iterator end() const { + const_iterator end() const + { return const_iterator(local.end()); } - Future find(const keyT& key) const { + Future find(const keyT &key) const + { // Ugliness here to avoid replicating find() and // associated handlers for const. Assumption is that // const and non-const iterators are identical except for // const attribute ... at some point probably need to do // the right thing. - Future r = const_cast(this)->find(key); - return *(Future*)(&r); + Future r = const_cast(this)->find(key); + return *(Future *)(&r); } - - Future find(const keyT& key) { + Future find(const keyT &key) + { ProcessID dest = owner(key); - if (dest == me) { + if (dest == me) + { return Future(iterator(local.find(key))); - } else { + } + else + { Future result; this->send(dest, &implT::find_handler, me, key, result.remote_ref(this->get_world())); return result; } } - bool find(accessor& acc, const keyT& key) { - if (owner(key) != me) return false; - return local.find(acc,key); + bool find(accessor &acc, const keyT &key) + { + if (owner(key) != me) + return false; + return local.find(acc, key); } - - bool find(const_accessor& acc, const keyT& key) const { - if (owner(key) != me) return false; - return local.find(acc,key); + bool find(const_accessor &acc, const keyT &key) const + { + if (owner(key) != me) + return false; + return local.find(acc, key); } - // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun) { + itemfun(const keyT &key, memfunT memfun) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); @@ -632,7 +709,8 @@ namespace madness { // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun, const arg1T& arg1) { + itemfun(const keyT &key, memfunT memfun, const arg1T &arg1) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); @@ -642,110 +720,122 @@ namespace madness { // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2) { + itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); - return (acc->second.*memfun)(arg1,arg2); + return (acc->second.*memfun)(arg1, arg2); } // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3) { + itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); - return (acc->second.*memfun)(arg1,arg2,arg3); + return (acc->second.*memfun)(arg1, arg2, arg3); } // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4) { + itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); - return (acc->second.*memfun)(arg1,arg2,arg3,arg4); + return (acc->second.*memfun)(arg1, arg2, arg3, arg4); } // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5) { + itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); - return (acc->second.*memfun)(arg1,arg2,arg3,arg4,arg5); + return (acc->second.*memfun)(arg1, arg2, arg3, arg4, arg5); } // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6) { + itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); - return (acc->second.*memfun)(arg1,arg2,arg3,arg4,arg5,arg6); + return (acc->second.*memfun)(arg1, arg2, arg3, arg4, arg5, arg6); } // Used to forward call to item member function template MEMFUN_RETURNT(memfunT) - itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, - const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7) { + itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, + const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7) + { accessor acc; // N.B. key may already exist, this is just to ensure lock is held by acc [[maybe_unused]] auto inserted = local.insert(acc, key); - return (acc->second.*memfun)(arg1,arg2,arg3,arg4,arg5,arg6,arg7); + return (acc->second.*memfun)(arg1, arg2, arg3, arg4, arg5, arg6, arg7); } // First phase of redistributions changes pmap and makes list of stuff to move - void redistribute_phase1(const std::shared_ptr< WorldDCPmapInterface >& newpmap) { + void redistribute_phase1(const std::shared_ptr> &newpmap) + { pmap = newpmap; move_list = new std::vector(); - for (typename internal_containerT::iterator iter=local.begin(); iter!=local.end(); ++iter) { - if (owner(iter->first) != me) move_list->push_back(iter->first); + for (typename internal_containerT::iterator iter = local.begin(); iter != local.end(); ++iter) + { + if (owner(iter->first) != me) + move_list->push_back(iter->first); } } - struct P2Op { - implT * impl; - typedef Range::const_iterator> rangeT; - P2Op(implT* impl) : impl(impl) {} - P2Op(const P2Op& p) : impl(p.impl) {} - bool operator()(typename rangeT::iterator& iterator) const { - typename internal_containerT::iterator iter = impl->local.find(*iterator); - MADNESS_ASSERT(iter != impl->local.end()); - - //impl->insert(*iter); - impl->task(impl->owner(*iterator), &implT::insert, *iter); - - impl->local.erase(iter); // delete local copy of the data - return true; - } - }; + struct P2Op + { + implT *impl; + typedef Range::const_iterator> rangeT; + P2Op(implT *impl) : impl(impl) {} + P2Op(const P2Op &p) : impl(p.impl) {} + bool operator()(typename rangeT::iterator &iterator) const + { + typename internal_containerT::iterator iter = impl->local.find(*iterator); + MADNESS_ASSERT(iter != impl->local.end()); + + // impl->insert(*iter); + impl->task(impl->owner(*iterator), &implT::insert, *iter); + + impl->local.erase(iter); // delete local copy of the data + return true; + } + }; // Second phase moves data - void redistribute_phase2() { - this->get_world().taskq.for_each(typename P2Op::rangeT(move_list->begin(), move_list->end()), P2Op(this)); - //std::vector& mvlist = *move_list; - //for (unsigned int i=0; isize(); ++i) { - // typename internal_containerT::iterator iter = local.find(mvlist[i]); - // MADNESS_ASSERT(iter != local.end()); - // insert(*iter); - // local.erase(iter); - //} - //delete move_list; + void redistribute_phase2() + { + this->get_world().taskq.for_each(typename P2Op::rangeT(move_list->begin(), move_list->end()), P2Op(this)); + // std::vector& mvlist = *move_list; + // for (unsigned int i=0; isize(); ++i) { + // typename internal_containerT::iterator iter = local.find(mvlist[i]); + // MADNESS_ASSERT(iter != local.end()); + // insert(*iter); + // local.erase(iter); + // } + // delete move_list; } // Third phase cleans up - void redistribute_phase3() { - delete move_list; + void redistribute_phase3() + { + delete move_list; } }; - /// Makes a distributed container with specified attributes /// \ingroup worlddc @@ -771,11 +861,12 @@ namespace madness { /// All operations, including constructors and destructors, are /// non-blocking and return immediately. If communication occurs /// it is asynchronous, otherwise operations are local. - template > - class WorldContainer : public archive::ParallelSerializableObject { + template > + class WorldContainer : public archive::ParallelSerializableObject + { public: - typedef WorldContainer containerT; - typedef WorldContainerImpl implT; + typedef WorldContainer containerT; + typedef WorldContainerImpl implT; typedef typename implT::pairT pairT; typedef typename implT::iterator iterator; typedef typename implT::const_iterator const_iterator; @@ -787,20 +878,21 @@ namespace madness { private: std::shared_ptr p; - inline void check_initialized() const { + inline void check_initialized() const + { MADNESS_ASSERT(p); } - public: + public: /// Makes an uninitialized container (no communication) /// The container is useless until assigned to from a fully /// constructed container. There is no need to worry about /// default constructors being executed in order. WorldContainer() - : p() - {} - + : p() + { + } /// Makes an initialized, empty container with default data distribution (no communication) @@ -809,12 +901,12 @@ namespace madness { /// making a container, we have to assume that all processes /// execute this constructor in the same order (does not apply /// to the non-initializing, default constructor). - WorldContainer(World& world, bool do_pending=true, const hashfunT& hf = hashfunT()) + WorldContainer(World &world, bool do_pending = true, const hashfunT &hf = hashfunT()) : p(new implT(world, - std::shared_ptr< WorldDCPmapInterface >(new WorldDCDefaultPmap(world, hf)), + std::shared_ptr>(new WorldDCDefaultPmap(world, hf)), hf)) { - if(do_pending) + if (do_pending) p->process_pending(); } @@ -825,22 +917,21 @@ namespace madness { /// making a container, we have to assume that all processes /// execute this constructor in the same order (does not apply /// to the non-initializing, default constructor). - WorldContainer(World& world, - const std::shared_ptr< WorldDCPmapInterface >& pmap, - bool do_pending=true, - const hashfunT& hf = hashfunT()) + WorldContainer(World &world, + const std::shared_ptr> &pmap, + bool do_pending = true, + const hashfunT &hf = hashfunT()) : p(new implT(world, pmap, hf)) { - if(do_pending) + if (do_pending) p->process_pending(); } - /// Copy constructor is shallow (no communication) /// The copy refers to exactly the same container as other /// which must be initialized. - WorldContainer(const WorldContainer& other) + WorldContainer(const WorldContainer &other) : p(other.p) { check_initialized(); @@ -850,8 +941,10 @@ namespace madness { /// The copy refers to exactly the same container as other /// which must be initialized. - containerT& operator=(const containerT& other) { - if (this != &other) { + containerT &operator=(const containerT &other) + { + if (this != &other) + { other.check_initialized(); p = other.p; } @@ -859,140 +952,146 @@ namespace madness { } /// Returns the world associated with this container - World& get_world() const { + World &get_world() const + { check_initialized(); return p->get_world(); } - std::shared_ptr< WorldDCPmapInterface >& get_impl() { + std::shared_ptr> &get_impl() + { check_initialized(); return p; } /// replicates this WorldContainer on all ProcessIDs - void replicate(bool fence=true) { - p->replicate(fence); + void replicate(bool fence = true) + { + p->replicate(fence); } /// Inserts/replaces key+value pair (non-blocking communication if key not local) - void replace(const pairT& datum) { + void replace(const pairT &datum) + { check_initialized(); p->insert(datum); } - /// Inserts/replaces key+value pair (non-blocking communication if key not local) - void replace(const keyT& key, const valueT& value) { - replace(pairT(key,value)); + void replace(const keyT &key, const valueT &value) + { + replace(pairT(key, value)); } - /// Write access to LOCAL value by key. Returns true if found, false otherwise (always false for remote). - bool find(accessor& acc, const keyT& key) { + bool find(accessor &acc, const keyT &key) + { check_initialized(); - return p->find(acc,key); + return p->find(acc, key); } - /// Read access to LOCAL value by key. Returns true if found, false otherwise (always false for remote). - bool find(const_accessor& acc, const keyT& key) const { + bool find(const_accessor &acc, const keyT &key) const + { check_initialized(); - return p->find(acc,key); + return p->find(acc, key); } - /// Write access to LOCAL value by key. Returns true if inserted, false if already exists (throws if remote) - bool insert(accessor& acc, const keyT& key) { + bool insert(accessor &acc, const keyT &key) + { check_initialized(); - return p->insert_acc(acc,key); + return p->insert_acc(acc, key); } - /// Read access to LOCAL value by key. Returns true if inserted, false if already exists (throws if remote) - bool insert(const_accessor& acc, const keyT& key) { + bool insert(const_accessor &acc, const keyT &key) + { check_initialized(); - return p->insert_acc(acc,key); + return p->insert_acc(acc, key); } - /// Inserts pairs (non-blocking communication if key(s) not local) template - void replace(input_iterator& start, input_iterator& end) { + void replace(input_iterator &start, input_iterator &end) + { check_initialized(); using std::placeholders::_1; - std::for_each(start,end,std::bind(this,std::mem_fn(&containerT::insert),_1)); + std::for_each(start, end, std::bind(this, std::mem_fn(&containerT::insert), _1)); } - /// Returns true if local data is immediately available (no communication) - bool probe(const keyT& key) const { + bool probe(const keyT &key) const + { check_initialized(); return p->probe(key); } - /// Returns processor that logically owns key (no communication) /// Local remapping may have changed its physical location, but all /// operations should forward correctly. - inline ProcessID owner(const keyT& key) const { + inline ProcessID owner(const keyT &key) const + { check_initialized(); return p->owner(key); } - /// Returns true if the key maps to the local processor (no communication) - bool is_local(const keyT& key) const { + bool is_local(const keyT &key) const + { check_initialized(); return p->is_local(key); } - /// Returns a future iterator (non-blocking communication if key not local) /// Like an std::map an iterator "points" to an std::pair. /// /// Refer to Future for info on how to avoid blocking. - Future find(const keyT& key) { // + Future find(const keyT &key) + { // check_initialized(); return p->find(key); } - /// Returns a future iterator (non-blocking communication if key not local) /// Like an std::map an iterator "points" to an std::pair. /// /// Refer to Future for info on how to avoid blocking. - Future find(const keyT& key) const { + Future find(const keyT &key) const + { check_initialized(); - return const_cast(p.get())->find(key); + return const_cast(p.get())->find(key); } - /// Returns an iterator to the beginning of the \em local data (no communication) - iterator begin() { + iterator begin() + { check_initialized(); return p->begin(); } - /// Returns an iterator to the beginning of the \em local data (no communication) - const_iterator begin() const { + const_iterator begin() const + { check_initialized(); - return const_cast(p.get())->begin(); + return const_cast(p.get())->begin(); } /// Returns an iterator past the end of the \em local data (no communication) - iterator end() { + iterator end() + { check_initialized(); return p->end(); } /// Returns an iterator past the end of the \em local data (no communication) - const_iterator end() const { + const_iterator end() const + { check_initialized(); - return const_cast(p.get())->end(); + return const_cast(p.get())->end(); } /// Erases entry from container (non-blocking comm if remote) @@ -1003,51 +1102,58 @@ namespace madness { /// remote end. This is just the same as what happens when /// using STL iterators on an STL container in a sequential /// algorithm. - void erase(const keyT& key) { + void erase(const keyT &key) + { check_initialized(); p->erase(key); } /// Erases entry corresponding to \em local iterator (no communication) - void erase(const iterator& it) { + void erase(const iterator &it) + { check_initialized(); p->erase(it); } /// Erases range defined by \em local iterators (no communication) - void erase(const iterator& start, const iterator& finish) { + void erase(const iterator &start, const iterator &finish) + { check_initialized(); - p->erase(start,finish); + p->erase(start, finish); } - /// Clears all \em local data (no communication) /// Invalidates all iterators - void clear() { + void clear() + { check_initialized(); p->clear(); } /// Returns the number of \em local entries (no communication) - std::size_t size() const { + std::size_t size() const + { check_initialized(); return p->size(); } /// Returns shared pointer to the process mapping - inline const std::shared_ptr< WorldDCPmapInterface >& get_pmap() const { + inline const std::shared_ptr> &get_pmap() const + { check_initialized(); return p->get_pmap(); } /// Returns shared pointer to the process mapping - inline void reset_pmap_to_local() { + inline void reset_pmap_to_local() + { p->reset_pmap_to_local(); } /// Returns a reference to the hashing functor - hashfunT& get_hash() const { + hashfunT &get_hash() const + { check_initialized(); return p->get_hash(); } @@ -1057,7 +1163,8 @@ namespace madness { /// If the constructor was given \c do_pending=false then you /// \em must invoke this routine in order to process both /// prior and future messages. - inline void process_pending() { + inline void process_pending() + { check_initialized(); p->process_pending(); } @@ -1072,14 +1179,15 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< MEMFUN_RETURNT(memfunT) > - send(const keyT& key, memfunT memfun) { + Future + send(const keyT &key, memfunT memfun) + { check_initialized(); - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun); } - /// Sends message "resultT memfun(arg1T)" to item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1090,18 +1198,19 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, const memfunT& memfun, const arg1T& arg1) { + Future + send(const keyT &key, const memfunT &memfun, const arg1T &arg1) + { check_initialized(); // To work around bug in g++ 4.3.* use static cast as alternative mechanism to force type deduction - MEMFUN_RETURNT(memfunT) (implT::*itemfun)(const keyT&, memfunT, const arg1T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const arg1T &) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun, arg1); /*return p->send(owner(key), static_cast(&implT:: template itemfun), key, memfun, arg1);*/ } - /// Sends message "resultT memfun(arg1T,arg2T)" to item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1112,17 +1221,18 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2) { + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2) + { check_initialized(); // To work around bug in g++ 4.3.* use static cast as alternative mechanism to force type deduction - MEMFUN_RETURNT(memfunT) (implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun, arg1, arg2); /*return p->send(owner(key), static_cast(&implT:: template itemfun), key, memfun, arg1, arg2);*/ } - /// Sends message "resultT memfun(arg1T,arg2T,arg3T)" to item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1133,14 +1243,15 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3) { + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3) + { check_initialized(); - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3); } - /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T)" to item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1151,14 +1262,15 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4) { + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4) + { check_initialized(); - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4); } - /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T)" to item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1169,14 +1281,15 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5) { + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5) + { check_initialized(); - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&, const arg5T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &, const arg5T &) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5); } - /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T)" to item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1187,14 +1300,15 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6) { + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6) + { check_initialized(); - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&, const arg5T&, const arg6T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &, const arg5T &, const arg6T &) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6); } - /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T,arg7T)" to item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1205,91 +1319,98 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, - const arg5T& arg5, const arg6T& arg6, const arg7T& arg7) { + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, + const arg5T &arg5, const arg6T &arg6, const arg7T &arg7) + { check_initialized(); - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&, const arg5T&, const arg6T&, const arg7T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &, const arg5T &, const arg6T &, const arg7T &) = &implT::template itemfun; return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7); } - /// Sends message "resultT memfun() const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun) const { - return const_cast(this)->send(key,memfun); + Future + send(const keyT &key, memfunT memfun) const + { + return const_cast(this)->send(key, memfun); } /// Sends message "resultT memfun(arg1T) const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1) const { - return const_cast(this)->send(key,memfun,arg1); + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1) const + { + return const_cast(this)->send(key, memfun, arg1); } /// Sends message "resultT memfun(arg1T,arg2T) const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2) const { - return const_cast(this)->send(key,memfun,arg1,arg2); + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2) const + { + return const_cast(this)->send(key, memfun, arg1, arg2); } - /// Sends message "resultT memfun(arg1T,arg2T,arg3T) const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3) const { - return const_cast(this)->send(key,memfun,arg1,arg2,arg3); + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3) const + { + return const_cast(this)->send(key, memfun, arg1, arg2, arg3); } /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T) const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4) const { - return const_cast(this)->send(key,memfun,arg1,arg2,arg3,arg4); + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4) const + { + return const_cast(this)->send(key, memfun, arg1, arg2, arg3, arg4); } /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T) const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5) const { - return const_cast(this)->send(key,memfun,arg1,arg2,arg3,arg4,arg5); + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5) const + { + return const_cast(this)->send(key, memfun, arg1, arg2, arg3, arg4, arg5); } /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T) const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, - const arg4T& arg4, const arg5T& arg5, const arg6T& arg6) const { - return const_cast(this)->send(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6); + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, + const arg4T &arg4, const arg5T &arg5, const arg6T &arg6) const + { + return const_cast(this)->send(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6); } /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T,arg7T) const" to item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, - const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7) const { - return const_cast(this)->send(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6,arg7); + Future + send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, + const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7) const + { + return const_cast(this)->send(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7); } - /// Adds task "resultT memfun()" in process owning item (non-blocking comm if remote) /// If item does not exist it is made with the default constructor. @@ -1301,10 +1422,12 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, attr); } @@ -1319,11 +1442,13 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); typedef REMFUTURE(arg1T) a1T; - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const a1T &) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, arg1, attr); } @@ -1338,12 +1463,14 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); typedef REMFUTURE(arg1T) a1T; typedef REMFUTURE(arg2T) a2T; - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, arg1, arg2, attr); } @@ -1358,13 +1485,15 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); typedef REMFUTURE(arg1T) a1T; typedef REMFUTURE(arg2T) a2T; typedef REMFUTURE(arg3T) a3T; - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, attr); } @@ -1379,14 +1508,16 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); typedef REMFUTURE(arg1T) a1T; typedef REMFUTURE(arg2T) a2T; typedef REMFUTURE(arg3T) a3T; typedef REMFUTURE(arg4T) a4T; - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, attr); } @@ -1401,15 +1532,17 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); typedef REMFUTURE(arg1T) a1T; typedef REMFUTURE(arg2T) a2T; typedef REMFUTURE(arg3T) a3T; typedef REMFUTURE(arg4T) a4T; typedef REMFUTURE(arg5T) a5T; - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&, const a5T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &, const a5T &) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, attr); } @@ -1424,8 +1557,9 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); typedef REMFUTURE(arg1T) a1T; typedef REMFUTURE(arg2T) a2T; @@ -1433,7 +1567,8 @@ namespace madness { typedef REMFUTURE(arg4T) a4T; typedef REMFUTURE(arg5T) a5T; typedef REMFUTURE(arg6T) a6T; - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&, const a5T&, const a6T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &, const a5T &, const a6T &) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, attr); } @@ -1448,8 +1583,9 @@ namespace madness { /// /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7, const TaskAttributes& attr = TaskAttributes()) { + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7, const TaskAttributes &attr = TaskAttributes()) + { check_initialized(); typedef REMFUTURE(arg1T) a1T; typedef REMFUTURE(arg2T) a2T; @@ -1458,7 +1594,8 @@ namespace madness { typedef REMFUTURE(arg5T) a5T; typedef REMFUTURE(arg6T) a6T; typedef REMFUTURE(arg7T) a7T; - MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&, const a5T&, const a6T&, const a7T&) = &implT:: template itemfun; + MEMFUN_RETURNT(memfunT) + (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &, const a5T &, const a6T &, const a7T &) = &implT::template itemfun; return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7, attr); } @@ -1466,80 +1603,88 @@ namespace madness { /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,attr); + Future + task(const keyT &key, memfunT memfun, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, attr); } /// Adds task "resultT memfun(arg1T) const" in process owning item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,arg1,attr); + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, arg1, attr); } /// Adds task "resultT memfun(arg1T,arg2T) const" in process owning item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,arg1,arg2,attr); + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, arg1, arg2, attr); } /// Adds task "resultT memfun(arg1T,arg2T,arg3T) const" in process owning item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,arg1,arg2,arg3,attr); + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, arg1, arg2, arg3, attr); } /// Adds task "resultT memfun(arg1T,arg2T,arg3T, arg4T) const" in process owning item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,arg1,arg2,arg3,arg4,attr); + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, arg1, arg2, arg3, arg4, attr); } /// Adds task "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T) const" in process owning item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,arg1,arg2,arg3,arg4,arg5,attr); + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, arg1, arg2, arg3, arg4, arg5, attr); } /// Adds task "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T) const" in process owning item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6,attr); + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, attr); } /// Adds task "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T,arg7T) const" in process owning item (non-blocking comm if remote) /// The method executes with a write lock on the item. template - Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > - task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7, const TaskAttributes& attr = TaskAttributes()) const { - return const_cast(this)->task(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6,arg7,attr); + Future + task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7, const TaskAttributes &attr = TaskAttributes()) const + { + return const_cast(this)->task(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7, attr); } - /// (de)Serialize --- *Local* data only to/from anything *except* Buffer*Archive and Parallel*Archive /// Advisable for *you* to fence before and after this to ensure consistency template - void serialize(const Archive& ar) { + void serialize(const Archive &ar) + { // // !! If you change the format of this stream make sure that // !! the parallel in/out archive below is compatible @@ -1548,18 +1693,23 @@ namespace madness { unsigned long count = 0; check_initialized(); - if (Archive::is_output_archive) { + if (Archive::is_output_archive) + { ar & magic; - for (iterator it=begin(); it!=end(); ++it) count++; + for (iterator it = begin(); it != end(); ++it) + count++; ar & count; - for (iterator it=begin(); it!=end(); ++it) ar & *it; + for (iterator it = begin(); it != end(); ++it) + ar &*it; } - else { + else + { long cookie = 0l; ar & cookie; MADNESS_ASSERT(cookie == magic); ar & count; - while (count--) { + while (count--) + { pairT datum; ar & datum; replace(datum); @@ -1570,49 +1720,55 @@ namespace madness { /// (de)Serialize --- !! ONLY for purpose of interprocess communication /// This just writes/reads the unique id to/from the Buffer*Archive. - void serialize(const archive::BufferOutputArchive& ar) { + void serialize(const archive::BufferOutputArchive &ar) + { check_initialized(); - ar & static_cast*>(p.get()); + ar &static_cast *>(p.get()); } /// (de)Serialize --- !! ONLY for purpose of interprocess communication /// This just writes/reads the unique id to/from the Buffer*Archive. - void serialize(const archive::BufferInputArchive& ar) { - WorldObject* ptr = nullptr; + void serialize(const archive::BufferInputArchive &ar) + { + WorldObject *ptr = nullptr; ar & ptr; MADNESS_ASSERT(ptr); #ifdef MADNESS_DISABLE_SHARED_FROM_THIS - p.reset(static_cast(ptr), [] (implT *p_) -> void {}); + p.reset(static_cast(ptr), [](implT *p_) -> void{}); #else - p = static_cast(ptr)->shared_from_this(); + p = static_cast(ptr)->shared_from_this(); #endif // MADNESS_DISABLE_SHARED_FROM_THIS } /// Returns the associated unique id ... must be initialized - const uniqueidT& id() const { + const uniqueidT &id() const + { check_initialized(); return p->id(); } /// Destructor passes ownership of implementation to world for deferred cleanup - virtual ~WorldContainer() { + virtual ~WorldContainer() + { detail::deferred_cleanup(p->get_world(), p); } - friend void swap<>(WorldContainer&, WorldContainer&); + friend void swap<>(WorldContainer &, WorldContainer &); }; /// Swaps the content of two WorldContainer objects. It should be called on all nodes. /// \ingroup worlddc template - void swap(WorldContainer& dc0, WorldContainer& dc1) { - std::swap(dc0.p, dc1.p); + void swap(WorldContainer &dc0, WorldContainer &dc1) + { + std::swap(dc0.p, dc1.p); } - namespace archive { + namespace archive + { /// Write container to parallel archive @@ -1620,119 +1776,189 @@ namespace madness { /// all threads on each process serialize some values into a buffer, which gets concatenated /// and finally serialized to localarchive (aka VectorOutputArchive). template - struct ArchiveStoreImpl< ParallelOutputArchive, WorldContainer > { - static void store(const ParallelOutputArchive& ar, const WorldContainer& t) { + struct ArchiveStoreImpl, WorldContainer> + { + static void store(const ParallelOutputArchive &ar, const WorldContainer &t) + { using localarchiveT = VectorOutputArchive; const long magic = -5881828; // Sitar Indian restaurant in Knoxville (negative to indicate parallel!) - typedef WorldContainer dcT; + typedef WorldContainer dcT; using const_iterator = typename dcT::const_iterator; + int count = t.size(); // Must be INT for MPI and NOT const since we'll do a global sum eventually - // const size_t default_size = 100*1024*1024; - const size_t default_size = 8ul<<30; + // Strategy: + // 1. Serialize local data to a buffer in parallel over threads + // a) Compute the size of the buffer needed by each task + // b) Sum sizes and allocate the buffer of exact sizes needed for all threads + // c) Serialize the data into the buffer in parallel over threads + // 2. Gather all buffers to process 0 - World* world = ar.get_world(); - world->gop.fence(); + World *world = ar.get_world(); + world->gop.fence(); // Global fence here - class op_serialize : public TaskInterface { - const size_t ntasks; - const size_t taskid; - const dcT& t; - std::vector& v; + class op_inspector : public TaskInterface + { + const_iterator start, end; + size_t &size; public: - op_serialize(size_t ntasks, size_t taskid, const dcT& t, std::vector& v) - : ntasks(ntasks), taskid(taskid), t(t), v(v) {} - void run(World& world) { - std::size_t hint_size=(1ul<<30)/ntasks; - VectorOutputArchive var(v,hint_size); - const_iterator it=t.begin(); - size_t n = 0; - /// threads serialize round-robin over the container - while (it!=t.end()) { - if ((n%ntasks) == taskid) { - var & *it; - } - ++it; - n++; - } + op_inspector(const_iterator start, const_iterator end, size_t &size) + : start(start), end(end), size(size) {} + void run(World &world) + { + BufferOutputArchive bo; + for (const_iterator it = start; it != end; ++it) + bo &*it; + size = bo.size(); } }; - class op_concat : public TaskInterface { - unsigned char* all_data; - const std::vector& v; + class op_executor : public TaskInterface + { + const_iterator start, end; + unsigned char *buf; + const size_t size; + public: - op_concat(unsigned char* all_data, const std::vector& v) - : all_data(all_data), v(v) {} - void run(World& world) { - memcpy(all_data, v.data(), v.size()); + op_executor(const_iterator start, const_iterator end, unsigned char *buf, size_t size) + : start(start), end(end), buf(buf), size(size) {} + void run(World &world) + { + BufferOutputArchive bo(buf, size); + for (const_iterator it = start; it != end; ++it) + { + bo &*it; + } + MADNESS_CHECK(size == bo.size()); } }; - world->gop.fence(); - double wall0=wall_time(); - Mutex mutex; - size_t ntasks = std::max(size_t(1), ThreadPool::size()); + // No need for LOCAL fence here since only master thread is busy + double wall0 = wall_time(); + const size_t ntasks = std::min(size_t(count), std::max(size_t(1), ThreadPool::size())); + size_t local_size = 0; + double wall1 = wall0; + unsigned char* buf = 0; + if (ntasks > 0) + { + const size_t max_items_per_task = (std::max(1, count) - 1) / ntasks + 1; + // Compute the size of the buffer needed by each task + const_iterator starts[ntasks], ends[ntasks]; + size_t local_sizes[ntasks]; + const_iterator start = t.begin(); + size_t nleft = count; + for (size_t taskid = 0; taskid < ntasks; taskid++) + { + const_iterator end = start; + if (taskid == (ntasks - 1)) + { + end = t.end(); + } + else + { + size_t nitems = std::min(max_items_per_task, nleft); + std::advance(end, max_items_per_task); + nleft -= nitems; + } + starts[taskid] = start; + ends[taskid] = end; + world->taskq.add(new op_inspector(start, end, local_sizes[taskid])); // Be sure to pass iterators by value!! + start = end; + } + world->taskq.fence(); // just need LOCAL fence + wall1 = wall_time(); + // if (world->rank() == 0) + // printf("time in op_inspector: %8.4fs\n", wall1 - wall0); + wall0 = wall1; + + // total size over all threads + for (size_t taskid = 0; taskid < ntasks; taskid++) + { + local_size += local_sizes[taskid]; + // print("taskid",taskid,"size",local_sizes[taskid]); + } - std::vector> v(ntasks); - for (size_t taskid=0; taskidtaskq.add(new op_serialize(ntasks, taskid, t, v[taskid])); - world->gop.fence(); - // total size of all vectors - size_t total_size = 0; - for (size_t taskid=0; taskid vtotal(total_size); - - size_t offset = 0; - for (size_t taskid=0; taskidtaskq.add(new op_concat(&vtotal[offset], v[taskid])); - offset += v[taskid].size(); + // Allocate the buffer for all threads + buf = new unsigned char[local_size]; + + // Now execute the serialization + size_t offset = 0; + for (size_t taskid = 0; taskid < ntasks; taskid++) + { + world->taskq.add(new op_executor(starts[taskid], ends[taskid], buf + offset, local_sizes[taskid])); + offset += local_sizes[taskid]; + } + world->taskq.fence(); // just need LOCAL fence + + wall1 = wall_time(); + // if (world->rank() == 0) + // printf("time in op_executor: %8.4fs\n", wall1 - wall0); + wall0 = wall1; } - v.clear(); + // VERify that the serialization worked!! + // { + // BufferInputArchive bi(buf, local_size); + // for (int item=0; item datum; + // bi & datum; + // print("deserializing",datum.first); + // } + // } - double wall1=wall_time(); - if (world->rank()==0) printf("time in the taskq: %8.4fs\n",wall1-wall0); // Gather all buffers to process 0 // first gather all of the sizes and counts to a vector in process 0 - int size = vtotal.size(); - int count = t.size(); + const int size = local_size; std::vector sizes(world->size()); MPI_Gather(&size, 1, MPI_INT, sizes.data(), 1, MPI_INT, 0, world->mpi.comm().Get_mpi_comm()); world->gop.sum(count); // just need total number of elements - print("time 3",wall_time()); - // build the cumulative sum of sizes + // print("time 3",wall_time()); + // build the cumulative sum of sizes std::vector offsets(world->size()); offsets[0] = 0; - for (int i=1; isize(); ++i) offsets[i] = offsets[i-1] + sizes[i-1]; - MADNESS_CHECK(offsets.back() + sizes.back() == total_size); + for (int i = 1; i < world->size(); ++i) + offsets[i] = offsets[i - 1] + sizes[i - 1]; + size_t total_size = offsets.back() + sizes.back(); + // if (world->rank() == 0) + // print("total_size", total_size); - print("time 4",wall_time()); + // print("time 4",wall_time()); // gather the vector of data v from each process to process 0 - unsigned char* all_data=0; - if (world->rank() == 0) { + unsigned char *all_data = 0; + if (world->rank() == 0) + { all_data = new unsigned char[total_size]; } - MPI_Gatherv(vtotal.data(), vtotal.size(), MPI_BYTE, all_data, sizes.data(), offsets.data(), MPI_BYTE, 0, world->mpi.comm().Get_mpi_comm()); + MPI_Gatherv(buf, local_size, MPI_BYTE, all_data, sizes.data(), offsets.data(), MPI_BYTE, 0, world->mpi.comm().Get_mpi_comm()); + + wall1 = wall_time(); + // if (world->rank() == 0) + // printf("time in gather+gatherv: %8.4fs\n", wall1 - wall0); + wall0 = wall1; + + delete[] buf; - print("time 5",wall_time()); - if (world->rank() == 0) { - auto& localar = ar.local_archive(); + // print("time 5",wall_time()); + if (world->rank() == 0) + { + auto &localar = ar.local_archive(); localar & magic & 1; // 1 client // localar & t; - ArchivePrePostImpl::preamble_store(localar); - localar & -magic & count; + ArchivePrePostImpl::preamble_store(localar); + localar & -magic &(unsigned long)(count); localar.store(all_data, total_size); - ArchivePrePostImpl::postamble_store(localar); + ArchivePrePostImpl::postamble_store(localar); + wall1 = wall_time(); + // if (world->rank() == 0) + // printf("time in final copy on node 0: %8.4fs\n", wall1 - wall0); delete[] all_data; } world->gop.fence(); - print("time 6",wall_time()); + // print("time 6",wall_time()); } }; - /// Write container to parallel archive with optional fence /// \ingroup worlddc @@ -1751,57 +1977,68 @@ namespace madness { /// subsequent modifications. Also, there is always at least /// some synchronization between a client and its IO server. template - struct ArchiveStoreImpl< ParallelOutputArchive, WorldContainer > { - static void store(const ParallelOutputArchive& ar, const WorldContainer& t) { + struct ArchiveStoreImpl, WorldContainer> + { + static void store(const ParallelOutputArchive &ar, const WorldContainer &t) + { const long magic = -5881828; // Sitar Indian restaurant in Knoxville (negative to indicate parallel!) - typedef WorldContainer dcT; + typedef WorldContainer dcT; // typedef typename dcT::const_iterator iterator; // unused? typedef typename dcT::pairT pairT; - World* world = ar.get_world(); + World *world = ar.get_world(); Tag tag = world->mpi.unique_tag(); ProcessID me = world->rank(); - if (ar.dofence()) world->gop.fence(); - if (ar.is_io_node()) { - auto& localar = ar.local_archive(); + if (ar.dofence()) + world->gop.fence(); + if (ar.is_io_node()) + { + auto &localar = ar.local_archive(); localar & magic & ar.num_io_clients(); - for (ProcessID p=0; psize(); ++p) { - if (p == me) { + for (ProcessID p = 0; p < world->size(); ++p) + { + if (p == me) + { localar & t; } - else if (ar.io_node(p) == me) { - world->mpi.Send(int(1),p,tag); // Tell client to start sending + else if (ar.io_node(p) == me) + { + world->mpi.Send(int(1), p, tag); // Tell client to start sending archive::MPIInputArchive source(*world, p); long cookie = 0l; unsigned long count = 0ul; - ArchivePrePostImpl::preamble_store(localar); + ArchivePrePostImpl::preamble_store(localar); source & cookie & count; localar & cookie & count; - while (count--) { + while (count--) + { pairT datum; source & datum; localar & datum; } - ArchivePrePostImpl::postamble_store(localar); + ArchivePrePostImpl::postamble_store(localar); } } } - else { + else + { ProcessID p = ar.my_io_node(); int flag; - world->mpi.Recv(flag,p,tag); + world->mpi.Recv(flag, p, tag); MPIOutputArchive dest(*world, p); dest & t; dest.flush(); } - if (ar.dofence()) world->gop.fence(); + if (ar.dofence()) + world->gop.fence(); } }; template - struct ArchiveLoadImpl< ParallelInputArchive, WorldContainer > { + struct ArchiveLoadImpl, WorldContainer> + { /// Read container from parallel archive /// \ingroup worlddc @@ -1811,24 +2048,29 @@ namespace madness { /// can always run a separate job to copy to a different number. /// /// The IO node simply reads all data and inserts entries. - static void load(const ParallelInputArchive& ar, WorldContainer& t) { + static void load(const ParallelInputArchive &ar, WorldContainer &t) + { const long magic = -5881828; // Sitar Indian restaurant in Knoxville (negative to indicate parallel!) // typedef WorldContainer dcT; // unused // typedef typename dcT::iterator iterator; // unused // typedef typename dcT::pairT pairT; // unused - World* world = ar.get_world(); - if (ar.dofence()) world->gop.fence(); - if (ar.is_io_node()) { + World *world = ar.get_world(); + if (ar.dofence()) + world->gop.fence(); + if (ar.is_io_node()) + { long cookie = 0l; int nclient = 0; - auto& localar = ar.local_archive(); + auto &localar = ar.local_archive(); localar & cookie & nclient; MADNESS_CHECK(cookie == magic); - while (nclient--) { + while (nclient--) + { localar & t; } } - if (ar.dofence()) world->gop.fence(); + if (ar.dofence()) + world->gop.fence(); } }; }