diff --git a/src/apps/cc2/cc2.cc b/src/apps/cc2/cc2.cc
index 79f7d3864b7..8fe169735ae 100644
--- a/src/apps/cc2/cc2.cc
+++ b/src/apps/cc2/cc2.cc
@@ -79,8 +79,8 @@ int main(int argc, char **argv) {
         nemo->get_calc()->param.set_derived_value("print_level", 2);
         nemo->param.set_derived_value("k", 5);
         nemo->get_calc()->param.set_derived_value("k", 5);
-        nemo->param.set_derived_value<std::string>("localize", "canon");
-        nemo->get_calc()->param.set_derived_value<std::string>("localize", "canon");
+        // nemo->param.set_derived_value<std::string>("localize", "canon");
+        // nemo->get_calc()->param.set_derived_value<std::string>("localize", "canon");
         nemo->param.set_derived_values(nemo->molecule(),nemo->get_calc()->aobasis,parser);
         nemo->get_calc()->param.set_derived_values(nemo->molecule(),nemo->get_calc()->aobasis,parser);
         CC2 cc2(world, parser, nemo);
diff --git a/src/madness/chem/BSHApply.h b/src/madness/chem/BSHApply.h
index 137481911f3..dfb59f0ba02 100644
--- a/src/madness/chem/BSHApply.h
+++ b/src/madness/chem/BSHApply.h
@@ -24,6 +24,7 @@ template<typename T, std::size_t NDIM>
 class BSHApply {
 
 public:
+	enum return_value {update, residual};
 	World& world;
 	double levelshift=0.0;
 	double lo=1.e-6;
@@ -31,6 +32,7 @@ class BSHApply {
 	bool printme=false;
 	bool destroy_Vpsi=false;
 	Function<double,NDIM> metric;
+	return_value ret_value=residual;		// return the new orbitals/functions or the residuals
 
 public:
 	BSHApply(World& world) : world(world),
@@ -62,6 +64,7 @@ class BSHApply {
 	    std::vector < std::shared_ptr<SeparatedConvolution<double,NDIM> > > ops(psi.size());
 	    for (int i=0; i<eps.dim(0); ++i) {
 	    	T e_i= (eps.ndim()==2) ? eps(i,i) : eps(i);
+	    	if (printme) print("orbital energy for the BSH operator",e_i);
 	    	ops[i]=std::shared_ptr<SeparatedConvolution<double,NDIM> >(
 	    			BSHOperatorPtr<NDIM>(world, sqrt(-2.0*eps_in_green(e_i)), lo, bshtol));
 	    	ops[i]->destructive()=true;
@@ -91,7 +94,11 @@ class BSHApply {
 	    double cpu1=cpu_time();
 	    if (printme) printf("time in BSHApply()  %8.4fs\n",cpu1-cpu0);
 
-	    return std::make_tuple(res,delta_eps);
+		if (ret_value==update) return std::make_tuple(tmp,delta_eps);
+		else if (ret_value==residual) return std::make_tuple(res,delta_eps);
+		else {
+			MADNESS_EXCEPTION("unknown return value in BSHApply",1);
+		}
 	}
 
 
@@ -124,7 +131,7 @@ class BSHApply {
 			const Tensor<T> fock1) const {
 
 		// check dimensions
-   	        bool consistent=(psi.size()==size_t(fock1.dim(0)));
+   	    bool consistent=(psi.size()==size_t(fock1.dim(0)));
 		if ((fock1.ndim()==2) and not (psi.size()==size_t(fock1.dim(1)))) consistent=false;
 
 		if (not consistent) {
@@ -144,6 +151,10 @@ class BSHApply {
 			for (int i=0; i<fock.dim(0); ++i) {
 				fock(i,i)-=eps_in_green(fock(i,i));
 			}
+			if (printme) {
+				print("coupling fock matrix");
+				print(fock);
+			}
 			return transform(world, psi, fock);
 
 		} else  {
diff --git a/src/madness/chem/CC2.cc b/src/madness/chem/CC2.cc
index fd129ba6fbc..97d1ee2e1cd 100644
--- a/src/madness/chem/CC2.cc
+++ b/src/madness/chem/CC2.cc
@@ -30,13 +30,24 @@ CC2::solve() {
     dummy_mo.print_frozen_orbitals(parameters.freeze());
 
     CCOPS.reset_nemo(nemo);
+    CCOPS.get_potentials.parameters=parameters;
     CCOPS.update_intermediates(CCOPS.mo_ket());
 
+    // info keep information on the MOs and the molecular coordinates
+    Info info;
+    info=CCOPS.update_info(parameters,nemo);
+    info.intermediate_potentials=CCIntermediatePotentials(parameters);
+
     // doubles for ground state
     Pairs<CCPair> mp2pairs, cc2pairs;
     // singles for ground state
     CC_vecfunction cc2singles(PARTICLE);
 
+    // Pairs structure to vector if necessary
+    const std::size_t nfreeze=parameters.freeze();
+    const int nocc=CCOPS.mo_ket().size();
+    triangular_map=PairVectorMap::triangular_map(nfreeze,nocc);
+
     double mp2_energy=0.0, cc2_energy=0.0, mp3_energy=0.0;
 
     bool need_tdhf=parameters.response();
@@ -46,7 +57,7 @@ CC2::solve() {
     // check for restart data for CC2, otherwise use MP2 as guess
     if (need_cc2) {
         Pairs<CCPair> dummypairs;
-        bool found_cc2d = initialize_pairs(dummypairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE));
+        bool found_cc2d = initialize_pairs(dummypairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE), 0, info);
         if (not found_cc2d) need_mp2=true;
     }
 
@@ -58,11 +69,11 @@ CC2::solve() {
     }
 
     if (need_mp2) {
-        bool restarted=initialize_pairs(mp2pairs, GROUND_STATE, CT_MP2, CC_vecfunction(PARTICLE), CC_vecfunction(RESPONSE), 0);
+        bool restarted=initialize_pairs(mp2pairs, GROUND_STATE, CT_MP2, CC_vecfunction(PARTICLE), CC_vecfunction(RESPONSE), 0, info);
         if (restarted and parameters.no_compute_mp2()) {
 //            for (auto& pair : mp2pairs.allpairs) mp2_energy+=CCOPS.compute_pair_correlation_energy(pair.second);
         } else {
-            mp2_energy = solve_mp2_coupled(mp2pairs);
+            mp2_energy = solve_mp2_coupled(mp2pairs, info);
             output_calc_info_schema("mp2",mp2_energy);
         }
         output.section(assign_name(CT_MP2) + " Calculation Ended !");
@@ -74,8 +85,8 @@ CC2::solve() {
 
     if (need_cc2) {
         // check if singles or/and doubles to restart are there
-        initialize_singles(cc2singles, PARTICLE);
-        const bool load_doubles = initialize_pairs(cc2pairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE), 0);
+        cc2singles=initialize_singles(PARTICLE);
+        const bool load_doubles = initialize_pairs(cc2pairs, GROUND_STATE, CT_CC2, cc2singles, CC_vecfunction(RESPONSE), 0, info);
 
         // nothing to restart -> make MP2
         if (not load_doubles) {
@@ -87,13 +98,12 @@ CC2::solve() {
             }
         }
 
-        cc2_energy = solve_cc2(cc2singles, cc2pairs);
+        cc2_energy = solve_cc2(cc2singles, cc2pairs, info);
         output_calc_info_schema("cc2",cc2_energy);
 
         output.section(assign_name(CT_CC2) + " Calculation Ended !");
         if (world.rank() == 0) {
             printf_msg_energy_time("CC2 correlation energy",cc2_energy,wall_time());
-//            std::cout << std::fixed << std::setprecision(10) << " MP2 Correlation Energy =" << mp2_energy << "\n";
             std::cout << std::fixed << std::setprecision(10) << " CC2 Correlation Energy =" << cc2_energy << "\n";
         }
     }
@@ -128,10 +138,10 @@ CC2::solve() {
             CCTimer time_ex(world, "CIS(D) for Excitation " + std::to_string(int(excitation)));
 
             // check the convergence of the cis function (also needed to store the ccs potential) and to recalulate the excitation energy
-            iterate_ccs_singles(ccs);
+            iterate_ccs_singles(ccs, info);
 
             Pairs<CCPair> cispd;
-            initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation);
+            initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation, info);
 
             const double ccs_omega = ccs.omega;
             const double cispd_omega = solve_cispd(cispd, mp2pairs, ccs);
@@ -175,14 +185,13 @@ CC2::solve() {
             CCTimer time_ex(world, "ADC(2) for Excitation " + std::to_string(int(excitation)));
 
             // check the convergence of the cis function (also needed to store the ccs potential) and to recalulate the excitation energy
-            CC_vecfunction dummy = ccs.copy();
-            iterate_ccs_singles(dummy);
+            CC_vecfunction dummy = copy(ccs);
+            iterate_ccs_singles(dummy, CCOPS.info);
             ccs.omega = dummy.omega; // will be overwritten soon
             output("Changes not stored!");
 
             Pairs<CCPair> xpairs;
-            const bool restart = initialize_pairs(xpairs, EXCITED_STATE, CT_ADC2, CC_vecfunction(PARTICLE), ccs,
-                                                  excitation);
+            const bool restart = initialize_pairs(xpairs, EXCITED_STATE, CT_ADC2, CC_vecfunction(PARTICLE), ccs, excitation, CCOPS.info);
 
             // if no restart: Calculate CIS(D) as first guess
             const double ccs_omega = ccs.omega;
@@ -190,7 +199,7 @@ CC2::solve() {
             if (not restart) {
                 output.section("No Restart-Pairs found: Calculating CIS(D) as first Guess");
                 Pairs<CCPair> cispd;
-                initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation);
+                initialize_pairs(cispd, EXCITED_STATE, CT_CISPD, CC_vecfunction(PARTICLE), ccs, excitation, CCOPS.info);
                 cispd_omega = solve_cispd(cispd, mp2pairs, ccs);
                 for (auto& tmp:cispd.allpairs) {
                     const size_t i = tmp.first.first;
@@ -199,10 +208,10 @@ CC2::solve() {
                 }
             }
 
-            iterate_adc2_singles(mp2pairs, ccs, xpairs);
+            iterate_adc2_singles(mp2pairs, ccs, xpairs, CCOPS.info);
             for (size_t iter = 0; iter < 10; iter++) {
                 bool dconv = iterate_adc2_pairs(xpairs, ccs);
-                bool sconv = iterate_adc2_singles(mp2pairs, ccs, xpairs);
+                bool sconv = iterate_adc2_singles(mp2pairs, ccs, xpairs, CCOPS.info);
                 if (sconv and dconv) {
                     output("ADC(2) Converged");
                     break;
@@ -244,66 +253,71 @@ CC2::solve() {
     } else if (ctype == CT_LRCC2) {
         CCTimer time(world, "Whole LRCC2 Calculation");
 
-        std::vector<std::pair<std::string, double> > results;
-        std::vector<std::pair<std::string, std::pair<double, double> > > timings;
-
-        auto vccs=solve_ccs();
-
-        std::vector<std::pair<std::string, std::pair<double, double> > > results_ex;
-        for (size_t xxx = 0; xxx < vccs.size(); xxx++) {
-            const size_t excitation = parameters.excitations()[xxx];
-            CCTimer time_ex(world, "LRCC2 Calculation for Excitation " + std::to_string(int(excitation)));
-            CC_vecfunction lrcc2_s = vccs[xxx];
-            // needed to assign an omega
-            const vector_real_function_3d backup = copy(world, lrcc2_s.get_vecfunction());
-            CC_vecfunction test(backup, RESPONSE, parameters.freeze());
-            iterate_ccs_singles(test);
-            lrcc2_s.omega = test.omega;
-            output("CCS Iteration: Changes are not applied (just omega)!");
-
-
-            Pairs<CCPair> lrcc2_d;
-            bool found_lrcc2d = initialize_pairs(lrcc2_d, EXCITED_STATE, CT_LRCC2, cc2singles, lrcc2_s, excitation);
-
-            if (found_lrcc2d) iterate_lrcc2_singles(cc2singles, cc2pairs, lrcc2_s, lrcc2_d);
-            else iterate_ccs_singles(lrcc2_s);
-            const double omega_cis = lrcc2_s.omega;
-
-            for (size_t iter = 0; iter < parameters.iter_max(); iter++) {
-                output.section("Macroiteration " + std::to_string(int(iter)) + " of LRCC2");
-                bool dconv = iterate_lrcc2_pairs(cc2singles, cc2pairs, lrcc2_s, lrcc2_d);
-                bool sconv = iterate_lrcc2_singles(cc2singles, cc2pairs, lrcc2_s, lrcc2_d);
-                if (dconv and sconv) break;
-            }
-            const double omega_cc2 = lrcc2_s.omega;
-            const std::string msg = "Excitation " + std::to_string(int(excitation));
-            results_ex.push_back(std::make_pair(msg, std::make_pair(omega_cis, omega_cc2)));
-            timings.push_back(std::make_pair(msg, time_ex.current_time(true)));
-
-        }
-
-        timings.push_back(std::make_pair("Whole LRCC2", time.current_time(true)));
-        output.section("LRCC2 Finished");
-        output("Ground State Results:");
-        for (const auto& res:results) {
-            if (world.rank() == 0)
-                std::cout << std::fixed << std::setprecision(10)
-                          << res.first << "=" << res.second << "\n";
-        }
-        output("Response Results:");
-        for (const auto& res:results_ex) {
-            if (world.rank() == 0)
-                std::cout << std::fixed << std::setprecision(10)
-                          << res.first << ": " << res.second.first << " (CIS)*, " << res.second.second << " (CC2)\n";
-        }
-        if (world.rank() == 0) std::cout << "*only if CIS vectors where given in the beginning (not for CC2 restart)\n";
-        output("\nTimings");
-        for (const auto& time:timings) {
-            if (world.rank() == 0)
-                std::cout << std::scientific << std::setprecision(2)
-                          << std::setfill(' ') << std::setw(15) << time.first
-                          << ": " << time.second.first << " (Wall), " << time.second.second << " (CPU)" << "\n";
-        }
+       auto vccs=solve_ccs();
+
+        if (world.rank()==0) print_header3("reiterating CCS");
+        iterate_ccs_singles(vccs[0], info);
+        if (world.rank()==0) print_header3("end reiterating CCS");
+
+       for (size_t iexcitation = 0; iexcitation < vccs.size(); iexcitation++) {
+           if (world.rank()==0) print_header1("Solving LRCC2 for excitation " + std::to_string(iexcitation)
+               + " with omega "+std::to_string(vccs[iexcitation].omega));
+           solve_lrcc2(cc2pairs,cc2singles,vccs[iexcitation],iexcitation,info);
+       }
+        //   const size_t excitation = parameters.excitations()[xxx];
+        //    CCTimer time_ex(world, "LRCC2 Calculation for Excitation " + std::to_string(int(excitation)));
+        //    CC_vecfunction lrcc2_s = vccs[xxx];
+        //    // needed to assign an omega
+        //    const vector_real_function_3d backup = copy(world, lrcc2_s.get_vecfunction());
+        //    CC_vecfunction test(backup, RESPONSE, parameters.freeze());
+        //    iterate_ccs_singles(test, info);
+        //    lrcc2_s.omega = test.omega;
+        //    output("CCS Iteration: Changes are not applied (just omega)!");
+
+
+        //    Pairs<CCPair> lrcc2_d;
+        //    bool found_lrcc2d = initialize_pairs(lrcc2_d, EXCITED_STATE, CT_LRCC2, cc2singles, lrcc2_s, excitation, info);
+
+        //    if (found_lrcc2d) iterate_lrcc2_singles(world, cc2singles, cc2pairs, lrcc2_s, lrcc2_d, info);
+        //    else iterate_ccs_singles(lrcc2_s, info);
+        //    const double omega_cis = lrcc2_s.omega;
+
+        //    for (size_t iter = 0; iter < parameters.iter_max(); iter++) {
+        //        output.section("Macroiteration " + std::to_string(int(iter)) + " of LRCC2");
+        //        bool dconv = iterate_lrcc2_pairs(world, cc2singles, lrcc2_s, lrcc2_d, info);
+        //        bool sconv = iterate_lrcc2_singles(world, cc2singles, cc2pairs, lrcc2_s, lrcc2_d, info);
+        //        update_reg_residues_ex(world, cc2singles, lrcc2_s, lrcc2_d, info);
+        //        if (dconv and sconv) break;
+        //    }
+        //    const double omega_cc2 = lrcc2_s.omega;
+        //    const std::string msg = "Excitation " + std::to_string(int(excitation));
+        //    results_ex.push_back(std::make_pair(msg, std::make_pair(omega_cis, omega_cc2)));
+        //    timings.push_back(std::make_pair(msg, time_ex.current_time(true)));
+
+        //}
+
+//        timings.push_back(std::make_pair("Whole LRCC2", time.current_time(true)));
+//        output.section("LRCC2 Finished");
+//        output("Ground State Results:");
+//        for (const auto& res:results) {
+//            if (world.rank() == 0)
+//                std::cout << std::fixed << std::setprecision(10)
+//                          << res.first << "=" << res.second << "\n";
+//        }
+//        output("Response Results:");
+//        for (const auto& res:results_ex) {
+//            if (world.rank() == 0)
+//                std::cout << std::fixed << std::setprecision(10)
+//                          << res.first << ": " << res.second.first << " (CIS)*, " << res.second.second << " (CC2)\n";
+//        }
+//        if (world.rank() == 0) std::cout << "*only if CIS vectors where given in the beginning (not for CC2 restart)\n";
+//        output("\nTimings");
+//        for (const auto& time:timings) {
+//            if (world.rank() == 0)
+//                std::cout << std::scientific << std::setprecision(2)
+//                          << std::setfill(' ') << std::setw(15) << time.first
+//                          << ": " << time.second.first << " (Wall), " << time.second.second << " (CPU)" << "\n";
+//        }
 
 
     } else MADNESS_EXCEPTION(("Unknown Calculation Type: " + assign_name(ctype)).c_str(), 1);
@@ -375,7 +389,8 @@ Tensor<double> CC2::enforce_core_valence_separation(const Tensor<double>& fmat)
 };
 
 // Solve the CCS equations for the ground state (debug potential and check HF convergence)
-std::vector<CC_vecfunction> CC2::solve_ccs() {
+std::vector<CC_vecfunction> CC2::solve_ccs() const
+{
 //    output.section("SOLVE CCS");
 //    std::vector<CC_vecfunction> excitations;
 //    for (size_t k = 0; k < parameters.excitations().size(); k++) {
@@ -394,16 +409,15 @@ std::vector<CC_vecfunction> CC2::solve_ccs() {
                                                           1);
         result.push_back(excitations[x]);
     }
+    print_header3("Solution of the CCS equations");
+    tdhf->analyze(result);
     return result;
 }
 
-double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles) {
+double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles, Info& info) {
 
     if (world.rank()==0) print_header2(" computing the MP1 wave function");
     double total_energy = 0.0;
-    const std::size_t nfreeze=parameters.freeze();
-    const int nocc=CCOPS.mo_ket().size();
-    auto triangular_map=PairVectorMap::triangular_map(nfreeze,nocc);
 
     // make vector holding CCPairs for partitioner of MacroTask
     std::vector<CCPair> pair_vec=Pairs<CCPair>::pairs2vector(doubles,triangular_map);
@@ -418,33 +432,31 @@ double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles) {
         }
 
     } else {
-        if (world.rank()==0) print_header3("Starting MP2 constant part calculation");
-        // calc constant part via taskq
-        auto taskq = std::shared_ptr<MacroTaskQ>(new MacroTaskQ(world, world.size()));
-        taskq->set_printlevel(3);
-        MacroTaskMp2ConstantPart t;
-        MacroTask task(world, t, taskq);
-        task.set_name("MP2_Constant_Part");
-        std::vector<real_function_6d> result_vec = task(pair_vec, CCOPS.mo_ket().get_vecfunction(),
-                                                        CCOPS.mo_bra().get_vecfunction(), parameters,
-                                                        nemo->R_square, nemo->ncf->U1vec(),std::vector<std::string>({"Ue","KffK"}));
-        taskq->print_taskq();
-        taskq->run_all();
-
-        if (world.rank()==0) std::cout << std::fixed << std::setprecision(1) << "\nFinished constant part at time " << wall_time() << std::endl;
-        if (world.rank()==0) std::cout << std::fixed << std::setprecision(1) << "\nStarting saving pairs and energy calculation at time " << wall_time() << std::endl;
+
+        if (world.rank()==0) {
+            std::cout << std::fixed << std::setprecision(1) << "\nStarting constant part at time " << wall_time() << std::endl;
+        }
+        MacroTaskConstantPart t;
+        MacroTask task(world, t);
+        std::vector<Function<double,3>> gs_singles, ex_singles;         // dummy vectors
+        std::vector<real_function_6d> result_vec = task(pair_vec, gs_singles, ex_singles, info) ;
+
+        if (world.rank()==0) {
+            std::cout << std::fixed << std::setprecision(1) << "\nFinished constant part at time " << wall_time() << std::endl;
+            std::cout << std::fixed << std::setprecision(1) << "\nStarting saving pairs and energy calculation at time " << wall_time() << std::endl;
+        }
 
         // transform vector back to Pairs structure
         for (size_t i = 0; i < pair_vec.size(); i++) {
             pair_vec[i].constant_part = result_vec[i];
-            pair_vec[i].functions[0] = CCPairFunction<double,6>(result_vec[i]);
+            // pair_vec[i].functions[0] = CCPairFunction<double,6>(result_vec[i]);
             pair_vec[i].constant_part.truncate().reduce_rank();
             pair_vec[i].constant_part.print_size("constant_part");
             pair_vec[i].function().truncate().reduce_rank();
             save(pair_vec[i].constant_part, pair_vec[i].name() + "_const");
             // save(pair_vec[i].function(), pair_vec[i].name());
             if (pair_vec[i].type == GROUND_STATE) {
-                double energy = CCOPS.compute_pair_correlation_energy(pair_vec[i]);
+                double energy = CCOPS.compute_pair_correlation_energy(world,info,pair_vec[i]);
                 if (world.rank()==0) printf("pair energy for pair %zu %zu: %12.8f\n", pair_vec[i].i, pair_vec[i].j, energy);
                 total_energy += energy;
             }
@@ -458,9 +470,6 @@ double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles) {
 
     if (world.rank()==0) print_header3("Starting updating MP2 pairs");
 
-    // create new pairs structure
-    Pairs<CCPair> updated_pairs;
-    for (auto& tmp_pair : pair_vec) updated_pairs.insert(tmp_pair.i, tmp_pair.j, tmp_pair);
 
     auto solver= nonlinear_vector_solver<double,6>(world,pair_vec.size());
     solver.set_maxsub(parameters.kain_subspace());
@@ -468,34 +477,36 @@ double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles) {
 
 
     for (size_t iter = 0; iter < parameters.iter_max_6D(); iter++) {
+        if (world.rank()==0) print_header3("Starting iteration " + std::to_string(int(iter)) + " of MP2");
 
         // compute the coupling between the pair functions
-        Pairs<real_function_6d> coupling=compute_local_coupling(updated_pairs);
+        Pairs<real_function_6d> coupling=compute_local_coupling(pair_vec, info);
         auto coupling_vec=Pairs<real_function_6d>::pairs2vector(coupling,triangular_map);
         if (parameters.debug()) print_size(world, coupling_vec, "couplingvector");
 
-        double old_energy = total_energy;
-        total_energy = 0.0;
 
-        // calc update for pairs via macrotask
-        auto taskq = std::shared_ptr<MacroTaskQ>(new MacroTaskQ(world, world.size()));
-        taskq->set_printlevel(3);
-        //taskq->cloud.set_debug(true);
-        MacroTaskMp2UpdatePair t;
-        MacroTask task1(world, t, taskq);
-        std::vector<real_function_6d> u_update = task1(pair_vec, coupling_vec, parameters, nemo->get_calc()->molecule.get_all_coords_vec(),
-                                                      CCOPS.mo_ket().get_vecfunction(), CCOPS.mo_bra().get_vecfunction(),
-                                                      nemo->ncf->U1vec(), nemo->ncf->U2());
-        taskq->print_taskq();
-        taskq->run_all();
+        if (world.rank()==0) {
+            std::cout << std::fixed << std::setprecision(1) << "\nStart updating pairs part at time " << wall_time() << std::endl;
+        }
+
+        MacroTaskIteratePair t;
+        MacroTask task1(world, t);
+        CC_vecfunction dummy_singles1(PARTICLE);
+        const std::size_t maxiter=1;
+        auto unew = task1(pair_vec, coupling_vec, dummy_singles1, dummy_singles1, info, maxiter);
 
+        std::vector<real_function_6d> u;
+        for (auto p : pair_vec) u.push_back(p.function());
+        auto residual=u-unew;
 
+        // some statistics
+        auto [rmsrnorm, maxrnorm]=CCPotentials::residual_stats(residual);
+
+        // update the pair functions
         if (parameters.kain()) {
             if (world.rank()==0) std::cout << "Update with KAIN" << std::endl;
-
-            std::vector<real_function_6d> u;
-            for (auto p : pair_vec) u.push_back(p.function());
-            std::vector<real_function_6d> kain_update = copy(world,solver.update(u, u_update));
+            // std::vector<real_function_6d> kain_update = copy(world,solver.update(u, u_update));
+            std::vector<real_function_6d> kain_update = copy(world,solver.update(u, residual));
             for (size_t i=0; i<pair_vec.size(); ++i) {
                 kain_update[i].truncate().reduce_rank();
                 kain_update[i].print_size("Kain-Update-Function");
@@ -504,39 +515,25 @@ double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles) {
         } else {
             if (world.rank()==0) std::cout << "Update without KAIN" << std::endl;
             for (size_t i=0; i<pair_vec.size(); ++i) {
-                pair_vec[i].update_u(pair_vec[i].function() - u_update[i]);
+                // pair_vec[i].update_u(pair_vec[i].function() - u_update[i]);
+                pair_vec[i].update_u(unew[i]);
             }
         }
 
         // calculate energy and error and update pairs
-        double total_rnorm = 0.0, maxrnorm=0.0;
+        double old_energy = total_energy;
+        total_energy = 0.0;
         for (size_t i = 0; i < pair_vec.size(); i++) {
-            const double error = u_update[i].norm2();
-            if (world.rank()==0) std::cout << "residual " << pair_vec[i].i << " " << pair_vec[i].j << " " << error << std::endl;
-            maxrnorm = std::max(maxrnorm, error);
-            total_rnorm+=error;
-
             save(pair_vec[i].function(), pair_vec[i].name());
-            double energy = 0.0;
-            if (pair_vec[i].type == GROUND_STATE) {
-                double energy = CCOPS.compute_pair_correlation_energy(pair_vec[i]);
-                if (world.rank()==0) printf("pair energy for pair %zu %zu: %12.8f\n", pair_vec[i].i, pair_vec[i].j, energy);
-                total_energy += energy;
-            }
+            double energy = CCOPS.compute_pair_correlation_energy(world,info,pair_vec[i]);
             total_energy += energy;
-        }
-
-        for (auto& tmp_pair : pair_vec) {
-            updated_pairs(tmp_pair.i, tmp_pair.j).update_u(tmp_pair.function());
+            if (world.rank()==0) printf("pair energy for pair %zu %zu: %12.8f\n", pair_vec[i].i, pair_vec[i].j, energy);
         }
 
 		if (world.rank()==0) {
-		    std::cout << "convergence: total/max residual, energy/norm change "
-				<< std::scientific << std::setprecision(1)
-				<< maxrnorm << " " << total_rnorm << " "
-                << std::abs(old_energy - total_energy) << std::endl;
-                // << std::abs(old_norm - total_norm);
-			printf("finished iteration %2d at time %8.1fs with energy  %12.8f\n",
+		    double delta=old_energy - total_energy;
+		    CCPotentials::print_convergence("MP2 doubles",rmsrnorm,maxrnorm,delta,iter);
+			printf("finished MP2 iteration %2d at time %8.1fs with energy  %12.8f\n",
 					int(iter), wall_time(), total_energy);
 		}
 
@@ -546,15 +543,6 @@ double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles) {
         //print pair energies if converged
         if (converged) {
             if (world.rank() == 0) std::cout << "\nPairs converged!\n";
-            if (world.rank() == 0) std::cout << "\nMP2 Pair Correlation Energies:\n";
-            for (auto& pair : updated_pairs.allpairs) {
-                const double pair_energy = CCOPS.compute_pair_correlation_energy(pair.second);
-                if (world.rank() == 0) {
-                    std::cout << std::fixed << std::setprecision(10) << "omega_"
-                              << pair.second.i << pair.second.j << "=" << pair_energy << "\n";
-                }
-            }
-            if (world.rank() == 0) std::cout << "sum     =" << total_energy << "\n";
             break;
         }
     }
@@ -571,15 +559,16 @@ double CC2::solve_mp2_coupled(Pairs<CCPair>& doubles) {
 /// add the coupling terms for local MP2
 
 /// @return \sum_{k\neq i} f_ki |u_kj> + \sum_{l\neq j} f_lj |u_il>
-Pairs<real_function_6d> CC2::compute_local_coupling(const Pairs<real_function_6d>& pairs) const {
+Pairs<real_function_6d> CC2::compute_local_coupling(const Pairs<real_function_6d>& pairs, const Info& info) {
 
-    const int nmo = nemo->get_calc()->amo.size();
+    const int nmo = info.mo_ket.size();
+    World& world=pairs.allpairs.begin()->second.world();
 
     // temporarily make all N^2 pair functions
     typedef std::map<std::pair<int, int>, real_function_6d> pairsT;
     pairsT quadratic;
-    for (int k = parameters.freeze(); k < nmo; ++k) {
-        for (int l = parameters.freeze(); l < nmo; ++l) {
+    for (int k = info.parameters.freeze(); k < nmo; ++k) {
+        for (int l = info.parameters.freeze(); l < nmo; ++l) {
             if (l >= k) {
                 quadratic[std::make_pair(k, l)] = pairs(k, l);
             } else {
@@ -592,35 +581,36 @@ Pairs<real_function_6d> CC2::compute_local_coupling(const Pairs<real_function_6d
     world.gop.fence();
 
     // the coupling matrix is the Fock matrix, skipping diagonal elements
-    Tensor<double> fock1 = nemo->compute_fock_matrix(nemo->get_calc()->amo, nemo->get_calc()->aocc);
+    // Tensor<double> fock1 = nemo->compute_fock_matrix(nemo->get_calc()->amo, nemo->get_calc()->aocc);
+    Tensor<double> fock1 = copy(info.fock);
     for (int k = 0; k < nmo; ++k) {
         if (fock1(k, k) > 0.0) MADNESS_EXCEPTION("positive orbital energies", 1);
         fock1(k, k) = 0.0;
     }
 
     Pairs<real_function_6d> coupling;
-    for (int i = parameters.freeze(); i < nmo; ++i) {
+    for (int i = info.parameters.freeze(); i < nmo; ++i) {
         for (int j = i; j < nmo; ++j) {
             coupling.insert(i, j, real_factory_6d(world).compressed());
         }
     }
 
-    for (int i = parameters.freeze(); i < nmo; ++i) {
+    for (int i = info.parameters.freeze(); i < nmo; ++i) {
         for (int j = i; j < nmo; ++j) {
-            for (int k = parameters.freeze(); k < nmo; ++k) {
+            for (int k = info.parameters.freeze(); k < nmo; ++k) {
                 if (fock1(k, i) != 0.0) {
                     coupling(i, j).gaxpy(1.0, quadratic[std::make_pair(k, j)], fock1(k, i), false);
                 }
             }
 
-            for (int l = parameters.freeze(); l < nmo; ++l) {
+            for (int l = info.parameters.freeze(); l < nmo; ++l) {
                 if (fock1(l, j) != 0.0) {
                     coupling(i, j).gaxpy(1.0, quadratic[std::make_pair(i, l)], fock1(l, j), false);
                 }
             }
             world.gop.fence();
             const double thresh = FunctionDefaults<6>::get_thresh();
-            coupling(i, j).truncate(thresh * 0.1).reduce_rank();
+            coupling(i, j).truncate(thresh * 0.3).reduce_rank();
         }
     }
     world.gop.fence();
@@ -683,95 +673,196 @@ CC2::iterate_adc2_pairs(Pairs<CCPair>& cispd, const CC_vecfunction& ccs) {
 }
 
 bool
-CC2::iterate_lrcc2_pairs(const CC_vecfunction& cc2_s, const Pairs<CCPair>& cc2_d, const CC_vecfunction lrcc2_s,
-                         Pairs<CCPair>& lrcc2_d) {
-    output.section("Solve LRCC2 for Excitation energy " + std::to_string(double(lrcc2_s.omega)));
+CC2::iterate_lrcc2_pairs(World& world, const CC_vecfunction& cc2_s,
+                         const CC_vecfunction lrcc2_s, Pairs<CCPair>& lrcc2_d, const Info& info) {
+    // output.section("Solve LRCC2 for Excitation energy " + std::to_string(double(lrcc2_s.omega)));
+    if (world.rank()==0) {
+        print_header3("Solving LRCC2 doubles equations");
+        print("starting at time ",wall_time());
+        print("using macrotasks with redirected output");
+    }
     MADNESS_ASSERT(lrcc2_s.type == RESPONSE);
-    CCOPS.update_intermediates(lrcc2_s);
 
-    bool conv = true;
-    for (auto& tmp:lrcc2_d.allpairs) {
-        CCPair& pair = tmp.second;
-        const size_t i = pair.i;
-        const size_t j = pair.j;
-        // check if singles have significantly changed
-        if (lrcc2_s(i).current_error < 0.1 * parameters.thresh_6D() and
-            lrcc2_s(j).current_error < 0.1 * parameters.thresh_6D())
-            output("Skipping Pair Iteration, No significant Change in Singles");
-        else {
-            pair.bsh_eps = CCOPS.get_epsilon(pair.i, pair.j) + lrcc2_s.omega;
-            update_constant_part_lrcc2(pair, cc2_s, lrcc2_s);
-            conv = iterate_pair(pair, lrcc2_s);
-        }
+    auto triangular_map=PairVectorMap::triangular_map(info.parameters.freeze(),info.mo_ket.size());
+    auto pair_vec=Pairs<CCPair>::pairs2vector(lrcc2_d,triangular_map);
+
+    // make new constant part
+    MacroTaskConstantPart tc;
+    MacroTask task(world, tc);
+    auto cp = task(pair_vec, cc2_s.get_vecfunction(), lrcc2_s.get_vecfunction(), info) ;
+    print_size(world,cp,"constant part in iter");
+
+    for (int i=0; i<pair_vec.size(); ++i) {
+        pair_vec[i].constant_part=cp[i];
+        save(pair_vec[i].constant_part, pair_vec[i].name() + "_const");
+
     }
 
-    return conv;
+    // if no function has been computed so far use the constant part (first iteration)
+    for (auto& pair : pair_vec) if (not pair.function().is_initialized()) pair.update_u(pair.constant_part);
+
+    for (const auto& p : pair_vec) p.constant_part.print_size("constant_part before iter");
+    for (const auto& p : pair_vec) p.function().print_size("u before iter");
+
+    // compute the coupling between the pair functions
+    if (world.rank()==0) print("computing local coupling in the universe");
+    Pairs<real_function_6d> coupling=compute_local_coupling(pair_vec, info);
+    auto coupling_vec=Pairs<real_function_6d>::pairs2vector(coupling,triangular_map);
+    reconstruct(world,coupling_vec);
+    for (auto& p : pair_vec) {
+        p.constant_part.reconstruct();
+        p.function().reconstruct();
+    }
+
+    if (info.parameters.debug()) print_size(world, coupling_vec, "couplingvector");
+
+    // iterate the pair
+    MacroTaskIteratePair t1;
+    MacroTask task1(world, t1);
+    // temporary fix: create dummy functions to that the cloud is not confused
+    // real_function_6d tmp=real_factory_6d(world).functor([](const coord_6d& r){return 0.0;});
+    // std::vector<real_function_6d> vdummy_6d(pair_vec.size(),tmp);         // dummy vectors
+    const std::size_t maxiter=10;
+    auto unew = task1(pair_vec, coupling_vec, cc2_s, lrcc2_s, info, maxiter);
+
+    for (const auto& u : unew) u.print_size("u after iter");
+    // get some statistics
+    std::vector<Function<double,6>> uold;
+    for (const auto & p : pair_vec) uold.push_back(p.function());
+    auto residual=uold-unew;
+    double nold=norm2(world,uold);
+    double nnew=norm2(world,unew);
+    print("norm(old), norm(new) ",nold,nnew);
+    auto [rmsrnorm, rmsrmax] = CCPotentials::residual_stats(residual);
+    if (world.rank()==0) CCPotentials::print_convergence("LRCC2 doubles",rmsrnorm, rmsrmax,0,0);
+
+    // update the pair functions
+    for (int i=0; i<pair_vec.size(); ++i) pair_vec[i].update_u(unew[i]);
+    lrcc2_d=Pairs<CCPair>::vector2pairs(pair_vec,triangular_map);
+
+    // save latest iteration
+    if (world.rank()==0) print("saving latest iteration of LRCC2 to file");
+    for (const auto& pair : pair_vec) {
+        save(pair.constant_part, pair.name() + "_const");
+        save(pair.function(), pair.name());
+    }
+
+    return (rmsrnorm<info.parameters.dconv_6D());
 }
 
 
 double
-CC2::solve_cc2(CC_vecfunction& singles, Pairs<CCPair>& doubles) {
+CC2::solve_cc2(CC_vecfunction& singles, Pairs<CCPair>& doubles, Info& info) const
+{
 
     output.section("Solving CC2 Ground State");
 
     MADNESS_ASSERT(singles.type == PARTICLE);
-    CCOPS.update_intermediates(singles);
-    output.section("Solve CC2 Ground State");
     CCTimer time(world, "CC2 Ground State");
 
-    double omega = CCOPS.compute_cc2_correlation_energy(singles, doubles);
+    double omega = CCPotentials::compute_cc2_correlation_energy(world, singles, doubles, info);
     if (world.rank() == 0)
         std::cout << std::fixed << std::setprecision(10) << "Current Correlation Energy = " << omega << "\n";
 
-    if (not parameters.no_compute_cc2()) {
-        // first singles iteration
-        output.section("Initialize Singles to the Doubles");
-        iterate_cc2_singles(singles, doubles);
-        // update correlation energy
-        omega = CCOPS.compute_cc2_correlation_energy(singles, doubles);
-
-        for (size_t iter = 0; iter < parameters.iter_max(); iter++) {
-            CCTimer time_miter(world, "Macroiteration " + std::to_string(int(iter)) + " of CC2");
-            output.section("Macroiteration " + std::to_string(int(iter)) + " of CC2");
-
-            // iterate doubles
-            bool doubles_converged = true;
-            for (auto& pairs: doubles.allpairs) {
-                CCPair& pair = pairs.second;
-                update_constant_part_cc2_gs(singles, pair);
-                bool pair_converged = iterate_pair(pair, singles);
-                save(pair.function(), pair.name());
-                if (not pair_converged) doubles_converged = false;
-            }
+    if (parameters.no_compute_cc2()) {
+        if (world.rank()==0) print("found no_compute_cc2 key -- recompute singles for the singles-potentials");
+        iterate_cc2_singles(world, singles, doubles, info);
+        return omega;
+    }
 
-            // new omega
-            omega = CCOPS.compute_cc2_correlation_energy(singles, doubles);
+    CC_vecfunction ex_singles_dummy;
 
-            // check if singles converged
-            const bool singles_converged = iterate_cc2_singles(singles, doubles);
+    // first singles iteration
+    output.section("Initialize Singles to the Doubles");
 
-            // check if energy converged
-            const double omega_new = CCOPS.compute_cc2_correlation_energy(singles, doubles);
-            const double delta = omega_new - omega;
-            const bool omega_converged(delta < parameters.econv());
-            omega = omega_new;
-            if (world.rank() == 0)
-                std::cout << std::fixed << std::setprecision(10) << "Current Correlation Energy = " << omega << "\n";
-            if (world.rank() == 0)
-                std::cout << std::fixed << std::setprecision(10) << "Difference                  = " << delta << "\n";
+    // given the doubles, we can solve the singles equations
+    iterate_cc2_singles(world, singles, doubles, info);
+    // the doubles ansatz depends on the singles and must be updated: |\tau_ij> = |u_ij> + Q12 f12 |t_i t_j>
+    update_reg_residues_gs(world, singles, doubles, info);
+    omega = CCPotentials::compute_cc2_correlation_energy(world, singles, doubles, info);
 
-            if (doubles_converged and singles_converged and omega_converged) break;
+    for (size_t iter = 0; iter < parameters.iter_max(); iter++) {
+        CCTimer time_miter(world, "Macroiteration " + std::to_string(int(iter)) + " of CC2");
+        output.section("Macroiteration " + std::to_string(int(iter)) + " of CC2");
 
-            time_miter.info();
+        if (world.rank()==0) print("computing the constant part via macrotasks -- output redirected");
+        timer timer1(world);
+
+        std::vector<CCPair> pair_vec=Pairs<CCPair>::pairs2vector(doubles,triangular_map);
+        MacroTaskConstantPart t;
+        MacroTask task(world, t);
+        std::vector<real_function_6d> constant_part_vec = task(pair_vec, singles.get_vecfunction(),
+            ex_singles_dummy.get_vecfunction(), info) ;
+        for (int i=0; i<pair_vec.size(); ++i) pair_vec[i].constant_part=constant_part_vec[i];
+
+        if (parameters.debug()) {
+            for (auto& pair: pair_vec) pair.constant_part.print_size("size of constant part macrotask "+pair.name());
         }
-        omega = CCOPS.compute_cc2_correlation_energy(singles, doubles);
-        output.section("CC2 Iterations Eneded");
-    } else {
-        output.section("Found no_compute_cc2 Key: Reiterating Singles to check convergence");
-        // need the singles potential for the constant part of LRCC2 so we recompute it (also good to check if it is converged)
-        bool sconv = iterate_cc2_singles(singles, doubles);
-        if (not sconv) output.warning("Singles not Converged");
+
+        timer1.tag("computing constant part via macrotasks");
+
+
+        // compute the coupling between the pair functions
+        if (world.rank()==0) print("computing local coupling in the universe");
+        Pairs<real_function_6d> coupling=compute_local_coupling(pair_vec, info);
+        auto coupling_vec=Pairs<real_function_6d>::pairs2vector(coupling,triangular_map);
+        timer1.tag("computing local coupling");
+
+        if (world.rank()==0) print("update the pair functions via macrotasks -- output redirected");
+        MacroTaskIteratePair t1;
+        MacroTask task1(world, t1);
+        CC_vecfunction dummy_ex_singles;
+        std::vector<real_function_3d> vdummy_3d;         // dummy vectors
+        const std::size_t maxiter=3;
+        auto unew = task1(pair_vec, coupling_vec, singles, dummy_ex_singles,
+            info, maxiter);
+
+
+        std::vector<real_function_6d> u_old;
+        for (auto p : pair_vec) u_old.push_back(p.function());
+
+        auto residual=u_old-unew;
+        timer1.tag("computing pair function update via macrotasks");
+
+        for (int i=0; i<pair_vec.size(); ++i) pair_vec[i].update_u(unew[i]);
+        doubles=Pairs<CCPair>::vector2pairs(pair_vec,triangular_map);
+
+        // save latest iteration
+        if (world.rank()==0) print("saving latest iteration to file");
+        for (const auto& pair : pair_vec) {
+            save(pair.constant_part, pair.name() + "_const");
+            save(pair.function(), pair.name());
+            singles.save_restartdata(world,madness::name(singles.type));
+        }
+
+        auto [rmsrnorm,maxrnorm]=CCPotentials::residual_stats(residual);
+        bool doubles_converged=rmsrnorm<parameters.dconv_6D();
+
+        // check if singles converged
+        const bool singles_converged = iterate_cc2_singles(world, singles, doubles, info);
+
+        // check if energy converged
+        const double omega_new = CCPotentials::compute_cc2_correlation_energy(world, singles, doubles, info);
+        timer1.tag("computing cc2 energy");
+        const double delta = omega_new - omega;
+        const bool omega_converged(delta < parameters.econv());
+        omega = omega_new;
+        if (world.rank() == 0)
+            std::cout << std::fixed << std::setprecision(10) << "Current Correlation Energy = " << omega << "\n";
+        if (world.rank() == 0)
+            std::cout << std::fixed << std::setprecision(10) << "Difference                  = " << delta << "\n";
+
+        if (world.rank()==0) {
+            CCPotentials::print_convergence("CC2 macro",rmsrnorm,maxrnorm,delta,iter);
+            printf("finished CC2 macro iteration %2d at time %8.1fs with energy  %12.8f\n",
+                    int(iter), wall_time(), omega);
+        }
+        if (doubles_converged and singles_converged and omega_converged) break;
+
+        time_miter.info();
     }
+    omega = CCPotentials::compute_cc2_correlation_energy(world, singles, doubles, info);
+    output.section("CC2 Iterations Eneded");
 
     if (world.rank() == 0)
         std::cout << std::fixed << std::setprecision(10) << "Current Correlation Energy = " << omega << "\n";
@@ -781,6 +872,76 @@ CC2::solve_cc2(CC_vecfunction& singles, Pairs<CCPair>& doubles) {
 }
 
 
+/// solve the excited state LR-CC2 equations for a given excitation
+
+/// @param[in] gs_doubles: the ground state doubles
+/// @param[in] gs_singles: the ground state singles
+/// @param[in] cis: the CIS singles
+/// @param[in] excitation: the excitation number
+/// @return a tuple with the excited state doubles, the excited state singles and the excitation energy
+std::tuple<Pairs<CCPair>, CC_vecfunction, double>
+CC2::solve_lrcc2(Pairs<CCPair>& gs_doubles, const CC_vecfunction& gs_singles, const CC_vecfunction& cis,
+    const std::size_t excitation, Info& info) const {
+    CCTimer time(world, "Whole LRCC2 Calculation");
+
+    std::vector<std::pair<std::string, double>> results;
+    std::vector<std::pair<std::string, std::pair<double, double>>> timings;
+    std::vector<std::pair<std::string, std::pair<double, double>>> results_ex;
+
+    auto ex_singles = copy(cis);
+
+    Pairs<CCPair> ex_doubles;
+    bool found_lrcc2d = initialize_pairs(ex_doubles, EXCITED_STATE, CT_LRCC2, gs_singles, ex_singles, excitation, info);
+
+    if (found_lrcc2d) iterate_lrcc2_singles(world, gs_singles, gs_doubles, ex_singles, ex_doubles, info);
+    else iterate_ccs_singles(ex_singles, info);
+    const double omega_cis = ex_singles.omega;
+
+    for (size_t iter = 0; iter < parameters.iter_max(); iter++) {
+        if (world.rank()==0) print_header2("Macroiteration " + std::to_string(int(iter)) + " of LRCC2 for excitation energy "+std::to_string(ex_singles.omega));
+        update_reg_residues_ex(world, gs_singles, ex_singles, ex_doubles, info);
+        bool dconv = iterate_lrcc2_pairs(world, gs_singles, ex_singles, ex_doubles, info);
+        bool sconv = iterate_lrcc2_singles(world, gs_singles, gs_doubles, ex_singles, ex_doubles, info);
+        // update_reg_residues_ex(world, gs_singles, ex_singles, ex_doubles, info);
+        if (sconv and dconv) break;
+    }
+
+    const double omega_cc2 = ex_singles.omega;
+    const std::string msg = "Excitation " + std::to_string(int(excitation));
+    results_ex.push_back(std::make_pair(msg, std::make_pair(omega_cis, omega_cc2)));
+    // timings.push_back(std::make_pair(msg, time_ex.current_time(true)));
+
+
+    timings.push_back(std::make_pair("Whole LRCC2", time.current_time(true)));
+    output.section("LRCC2 Finished");
+    output("Ground State Results:");
+    for (const auto& res : results)
+    {
+        if (world.rank() == 0)
+            std::cout << std::fixed << std::setprecision(10)
+                << res.first << "=" << res.second << "\n";
+    }
+    output("Response Results:");
+    for (const auto& res : results_ex)
+    {
+        if (world.rank() == 0)
+            std::cout << std::fixed << std::setprecision(10)
+                << res.first << ": " << res.second.first << " (CIS)*, " << res.second.second << " (CC2)\n";
+    }
+    if (world.rank() == 0) std::cout << "*only if CIS vectors where given in the beginning (not for CC2 restart)\n";
+    output("\nTimings");
+    for (const auto& time : timings)
+    {
+        if (world.rank() == 0)
+            std::cout << std::scientific << std::setprecision(2)
+                << std::setfill(' ') << std::setw(15) << time.first
+                << ": " << time.second.first << " (Wall), " << time.second.second << " (CPU)" << "\n";
+    }
+
+    return std::make_tuple(ex_doubles, ex_singles, omega_cc2);
+
+};
+
 bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const {
     output.section("Iterate Pair " + pair.name());
     if (pair.ctype == CT_CC2) MADNESS_ASSERT(singles.type == PARTICLE);
@@ -804,8 +965,11 @@ bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const {
     bool converged = false;
 
     double omega = 0.0;
-    if (pair.type == GROUND_STATE) omega = CCOPS.compute_pair_correlation_energy(pair, singles);
-    if (pair.type == EXCITED_STATE) omega = CCOPS.compute_excited_pair_energy(pair, singles);
+    Info info;
+    info.mo_bra=CCOPS.mo_bra_.get_vecfunction();
+    info.parameters=parameters;
+    if (pair.type == GROUND_STATE) omega = CCOPS.compute_pair_correlation_energy(world, info,pair, singles);
+    if (pair.type == EXCITED_STATE) omega = CCOPS.compute_excited_pair_energy(world, pair, singles, info);
 
     if (world.rank() == 0)
         std::cout << "Correlation Energy of Pair " << pair.name() << " =" << std::fixed << std::setprecision(10)
@@ -851,8 +1015,8 @@ bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const {
 
         double omega_new = 0.0;
         double delta = 0.0;
-        if (pair.type == GROUND_STATE) omega_new = CCOPS.compute_pair_correlation_energy(pair, singles);
-        else if (pair.type == EXCITED_STATE) omega_new = CCOPS.compute_excited_pair_energy(pair, singles);
+        if (pair.type == GROUND_STATE) omega_new = CCOPS.compute_pair_correlation_energy(world, info, pair, singles);
+        else if (pair.type == EXCITED_STATE) omega_new = CCOPS.compute_excited_pair_energy(world, pair, singles, info);
         delta = omega - omega_new;
 
         const double current_norm = pair.function().norm2();
@@ -885,42 +1049,44 @@ bool CC2::iterate_pair(CCPair& pair, const CC_vecfunction& singles) const {
 }
 
 
-bool
-CC2::initialize_singles(CC_vecfunction& singles, const FuncType type, const int ex) const {
-    MADNESS_ASSERT(singles.size() == 0);
-    bool restarted = false;
+CC_vecfunction
+CC2::initialize_singles(const FuncType type, const int ex) const {
+
+    std::string fname=madness::name(type,ex);
+    if (world.rank()==0) print("initializing singles",fname);
+    CC_vecfunction singles(type);
+    try {
+        singles=CC_vecfunction::load_restartdata(world,fname);
+        if (world.rank()==0) print(" .. singles found on file");
+        return singles;
+    } catch (...) {
+        if (world.rank()==0) print(" .. singles not found on file");
+    }
 
-    std::vector<CCFunction<double,3>> vs;
+    if (world.rank()==0) print(" .. initializing singles to zero functions");
     for (size_t i = parameters.freeze(); i < CCOPS.mo_ket().size(); i++) {
-        CCFunction<double,3> single_i;
-        single_i.type = type;
-        single_i.i = i;
-        std::string name;
-        if (ex < 0) name = single_i.name();
-        else name = std::to_string(ex) + "_" + single_i.name();
         real_function_3d tmpi = real_factory_3d(world);
-        const bool found = CCOPS.load_function<double, 3>(tmpi, name);
-        if (found) restarted = true;
-        else output("Initialized " + single_i.name() + " of type " + assign_name(type) + " as zero-function");
-        single_i.function = copy(tmpi);
-        vs.push_back(single_i);
+        CCFunction<double,3> single_i(tmpi, i, type);
+        singles.insert(i,single_i);
     }
-
-    singles = CC_vecfunction(vs, type);
-//    if (type == RESPONSE) singles.excitation = ex;
-
-    return restarted;
+    return singles;
 }
 
+
 bool
 CC2::initialize_pairs(Pairs<CCPair>& pairs, const CCState ftype, const CalcType ctype, const CC_vecfunction& tau,
-                      const CC_vecfunction& x, const size_t excitation) const {
+                      const CC_vecfunction& x, const size_t excitation, const Info& info) const {
     MADNESS_ASSERT(tau.type == PARTICLE);
     MADNESS_ASSERT(x.type == RESPONSE);
     MADNESS_ASSERT(pairs.empty());
-    output("Initialize " + assign_name(ctype) + " Pairs for " + assign_name(ftype));
+
+    std::string fname=assign_name(ftype);
+    if (world.rank()==0) print("initializing doubles",fname);
+    // output("Initialize " + assign_name(ctype) + " Pairs for " + assign_name(ftype));
 
     bool restarted = false;
+    // std::vector<real_function_6d> vconst_part;
+    // load_function(world,vconst_part,"constant_part");
 
     for (size_t i = parameters.freeze(); i < CCOPS.mo_ket().size(); i++) {
         for (size_t j = i; j < CCOPS.mo_ket().size(); j++) {
@@ -932,65 +1098,70 @@ CC2::initialize_pairs(Pairs<CCPair>& pairs, const CCState ftype, const CalcType
                 if (found) restarted = true; // if a single pair was found then the calculation is not from scratch
                 real_function_6d const_part;
                 CCOPS.load_function(const_part, name + "_const");
-                CCPair tmp = CCOPS.make_pair_gs(utmp, tau, i, j);
+                CCPair tmp;
+                if (ctype==CT_MP2) tmp=CCPotentials::make_pair_mp2(utmp, i, j, info);
+                if (ctype==CT_CC2) tmp=CCPotentials::make_pair_cc2(utmp, tau, i, j, info);
                 tmp.constant_part = const_part;
                 pairs.insert(i, j, tmp);
 
-                //const double omega = CCOPS.compute_pair_correlation_energy(tmp);
-                //if(world.rank()==0) std::cout << "initialized pair " << tmp.name() << " with correlation energy=" << std::fixed << std::setprecision(10) << omega << "\n";
-
             } else if (ftype == EXCITED_STATE) {
-                name = std::to_string(int(excitation)) + "_" + name;
+                // name = std::to_string(int(excitation)) + "_" + name;
                 real_function_6d utmp = real_factory_6d(world);
                 const bool found = CCOPS.load_function(utmp, name);
                 if (found) restarted = true;
                 real_function_6d const_part;
                 CCOPS.load_function(const_part, name + "_const");
                 CCPair tmp = CCOPS.make_pair_ex(utmp, tau, x, i, j, ctype);
-//                tmp.excitation = excitation;
+
+                {
+                    CCPair tmp2=CCPotentials::make_pair_lrcc2(world, ctype, utmp, tau, x, i, j, info);
+                    std::swap(tmp,tmp2);
+                    print("going on with Florian's pair");
+                    // print("going on with Jakob's pair");
+                }
+
                 tmp.constant_part = const_part;
                 pairs.insert(i, j, tmp);
+                // CCPotentials::compute_excited_pair_energy(world, pairs(i, j), x, info);
             } else error("Unknown pairtype");
         }
     }
     return restarted;
 }
 
-void CC2::update_reg_residues_gs(const CC_vecfunction& singles, Pairs<CCPair>& doubles) const {
+void CC2::update_reg_residues_gs(World& world, const CC_vecfunction& singles, Pairs<CCPair>& doubles, const Info& info)
+{
     CCTimer time(world, "Updated Regularization Residues of the Ground State");
     MADNESS_ASSERT(singles.type == PARTICLE);
     Pairs<CCPair> updated_pairs;
-    //    output("Correlation energy with old pairs");
-    //    CCOPS.compute_cc2_correlation_energy(singles,doubles);
     for (auto& tmp:doubles.allpairs) {
         MADNESS_ASSERT(tmp.second.type == GROUND_STATE);
         CCPair& pair = tmp.second;
         const size_t i = pair.i;
         const size_t j = pair.j;
-        const CCPair updated_pair = CCOPS.make_pair_gs(pair.function(), singles, i, j);
+        // const CCPair updated_pair = CCOPS.make_pair_gs(pair.function(), singles, i, j);
+        const CCPair updated_pair = CCPotentials::make_pair_cc2(pair.function(), singles, i, j, info);
         updated_pairs.insert(i, j, updated_pair);
     }
-    //    output("Correlation energy with updated pairs");
-    //    CCOPS.compute_cc2_correlation_energy(singles,updated_pairs);
     doubles.swap(updated_pairs);
-    //    output("Correlation energy with swapped pairs");
-    //    CCOPS.compute_cc2_correlation_energy(singles,updated_pairs);
     time.info();
 }
 
-void CC2::update_reg_residues_ex(const CC_vecfunction& singles, const CC_vecfunction& response,
-                                 Pairs<CCPair>& doubles) const {
+void CC2::update_reg_residues_ex(World& world, const CC_vecfunction& singles,
+                                 const CC_vecfunction& response, Pairs<CCPair>& doubles, const Info& info)
+{
     CCTimer time(world, "Updated Regularization Residues of the Excited State");
     MADNESS_ASSERT(singles.type == PARTICLE);
     MADNESS_ASSERT(response.type == RESPONSE);
+    CalcType ctype = doubles.allpairs.begin()->second.ctype;
     Pairs<CCPair> updated_pairs;
     for (auto& tmp:doubles.allpairs) {
         MADNESS_ASSERT(tmp.second.type == EXCITED_STATE);
         CCPair& pair = tmp.second;
-        const size_t i = pair.i;
-        const size_t j = pair.j;
-        CCPair updated_pair = CCOPS.make_pair_ex(pair.function(), singles, response, i, j, pair.ctype);
-        updated_pairs.insert(i, j, updated_pair);
+        // CCPair updated_pair = CCPotentials::make_pair_ex(pair.function(), singles, response, i, j, pair.ctype);
+        CCPair updated_pair =
+            CCPotentials::make_pair_lrcc2(world, ctype, pair.function(), singles, response, pair.i, pair.j, info);
+        updated_pairs.insert(pair.i, pair.j, updated_pair);
     }
     doubles.swap(updated_pairs);
     time.info();
diff --git a/src/madness/chem/CC2.h b/src/madness/chem/CC2.h
index c5d13aba5c0..4720e927e76 100644
--- a/src/madness/chem/CC2.h
+++ b/src/madness/chem/CC2.h
@@ -21,6 +21,8 @@
 #include<madness/chem/TDHF.h>
 #include <madness/mra/nonlinsol.h>
 
+#include "BSHApply.h"
+
 namespace madness {
 
 class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
@@ -128,13 +130,15 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
     CCPotentials CCOPS;
     /// Formated Output (same as used in CC2Potentials structure)
     CCMessenger& output;
+    /// map Pair struct to vector
+    PairVectorMap triangular_map;
 
     /// solve the CC2 ground state equations, returns the correlation energy
     void solve();
 
 
     std::vector<CC_vecfunction>
-    solve_ccs();
+    solve_ccs() const;
 
     double compute_mp3(const Pairs<CCPair>& mp2pairs) const {
         MP3 mp3(CCOPS);
@@ -143,45 +147,62 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
     }
 
     double
-    solve_cc2(CC_vecfunction& tau, Pairs<CCPair>& u);
+    solve_cc2(CC_vecfunction& tau, Pairs<CCPair>& u, Info& info) const;
+
+    /// solve the excited state LR-CC2 equations for a given excitation
+
+    /// @param[in] gs_doubles: the ground state doubles
+    /// @param[in] gs_singles: the ground state singles
+    /// @param[in] cis: the CIS singles
+    /// @param[in] excitation: the excitation number
+    /// @return a tuple with the excited state doubles, the excited state singles and the excitation energy
+    std::tuple<Pairs<CCPair>, CC_vecfunction, double>
+    solve_lrcc2(Pairs<CCPair>& gs_doubles, const CC_vecfunction& gs_singles, const CC_vecfunction& cis,
+        const std::size_t excitation, Info& info) const;
 
     double
     solve_cispd(Pairs<CCPair>& doubles, const Pairs<CCPair>& mp2_pairs, const CC_vecfunction& cis_singles);
 
     /// convencience function to iterate the CC2 ground state singles,
     /// makes the right call on the iterate_singles functions
-    bool
-    iterate_cc2_singles(CC_vecfunction& singles, Pairs<CCPair>& doubles) {
-        CCOPS.clear_potentials(singles);
+    static bool
+    iterate_cc2_singles(World& world, CC_vecfunction& singles, Pairs<CCPair>& doubles, Info& info) {
+        // CCOPS.clear_potentials(singles);
+        info.intermediate_potentials.clear_all();
         Pairs<CCPair> empty;
-        return iterate_singles(singles, CC_vecfunction(RESPONSE), doubles, empty, CT_CC2, parameters.iter_max_3D());
+        return iterate_singles(world, singles, CC_vecfunction(RESPONSE), doubles,
+            empty, CT_CC2, info.parameters.iter_max_3D(), info);
     }
 
     bool
-    iterate_adc2_singles(Pairs<CCPair>& mp2, CC_vecfunction& singles, Pairs<CCPair>& x) {
+    iterate_adc2_singles(Pairs<CCPair>& mp2, CC_vecfunction& singles, Pairs<CCPair>& x, Info& info) {
         MADNESS_ASSERT(singles.type == RESPONSE);
-        CCOPS.clear_potentials(singles);
-        return iterate_singles(singles, CC_vecfunction(UNDEFINED), mp2, x, CT_ADC2, parameters.iter_max_3D());
+        // CCOPS.clear_potentials(singles);
+        info.intermediate_potentials.clear_response();
+        return iterate_singles(world, singles, CC_vecfunction(UNDEFINED), mp2, x, CT_ADC2, parameters.iter_max_3D(), info);
     }
 
-    bool
-    iterate_lrcc2_singles(CC_vecfunction& cc2_s, Pairs<CCPair>& cc2_d, CC_vecfunction& lrcc2_s, Pairs<CCPair> lrcc2_d) {
+    static bool
+    iterate_lrcc2_singles(World& world, const CC_vecfunction& cc2_s, Pairs<CCPair>& cc2_d, CC_vecfunction& lrcc2_s, Pairs<CCPair> lrcc2_d, Info& info) {
         MADNESS_ASSERT(cc2_s.type == PARTICLE);
         MADNESS_ASSERT(lrcc2_s.type == RESPONSE);
-        CCOPS.clear_potentials(lrcc2_s);
-        return iterate_singles(lrcc2_s, cc2_s, cc2_d, lrcc2_d, CT_LRCC2, parameters.iter_max_3D());
+        info.intermediate_potentials.clear_response();
+        // CCOPS.clear_potentials(lrcc2_s);
+        return iterate_singles(world, lrcc2_s, cc2_s, cc2_d, lrcc2_d,
+            CT_LRCC2, info.parameters.iter_max_3D(), info);
     }
 
     /// convencience function to iterate the CCS Response singles,
     /// makes the right call on the iterate_singles functions
     bool
-    iterate_ccs_singles(CC_vecfunction& x) {
+    iterate_ccs_singles(CC_vecfunction& x, Info& info) const {
         Pairs<CCPair> empty;
-        CCOPS.clear_potentials(x);
-        return iterate_singles(x, CC_vecfunction(PARTICLE), empty, empty, CT_LRCCS, 1);
+        // CCOPS.clear_potentials(x);
+        info.intermediate_potentials.clear_response();
+        return iterate_singles(world, x, CC_vecfunction(PARTICLE), empty, empty, CT_LRCCS, info.parameters.iter_max_3D(), info);
     }
 
-    bool
+    static bool
     /// Iterates the singles equations for CCS, CC2, LRCC2
     /// The corresponding regulairzation tails of the doubles are updated in every iteration (therefore doubles are not marked as const)
     /// @param[in] : singles, the singles that are iterated
@@ -191,8 +212,9 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
     /// @param[in] : ctype: the calculation type: CCS, CC2, CC2_response_
     /// @param[in] : maxiter: maxmial number of iterations
     /// @param[out]: true if the overall change of the singles is below 10*donv_6D
-    iterate_singles(CC_vecfunction& singles, const CC_vecfunction singles2, Pairs<CCPair>& gs_doubles,
-                    Pairs<CCPair>& ex_doubles, const CalcType ctype, const std::size_t maxiter) {
+    iterate_singles(World& world, CC_vecfunction& singles, const CC_vecfunction singles2, Pairs<CCPair>& gs_doubles,
+                    Pairs<CCPair>& ex_doubles, const CalcType ctype, const std::size_t maxiter, Info& info) {
+        CCMessenger output(world);
         output.subsection("Iterate " + assign_name(ctype) + "-Singles");
         CCTimer time_all(world, "Overall Iteration of " + assign_name(ctype) + "-Singles");
         bool converged = true;
@@ -200,77 +222,85 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
         CC_vecfunction old_singles(singles);
         for (auto& tmp : singles.functions)
             old_singles(tmp.first).function = copy(tmp.second.function);
+        double old_omega=0.0;
 
         // KAIN solver
         typedef vector_function_allocator<double, 3> allocT;
         typedef XNonlinearSolver<std::vector<Function<double, 3> >, double, allocT> solverT;
-        allocT alloc(world, singles.size());
         solverT solver(allocT(world, singles.size()));
         solver.do_print = (world.rank() == 0);
 
+        print_size(world, singles.get_vecfunction(), "singles before iteration");
+
         for (size_t iter = 0; iter < maxiter; iter++) {
-            output.subsection("Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles");
-            CCTimer time(world, "Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles");
+            // output.subsection("Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles");
+            // CCTimer time(world, "Microiteration " + std::to_string(iter) + " of " + assign_name(ctype) + "-Singles");
             double omega = 0.0;
             if (ctype == CT_LRCC2) omega = singles.omega;
             else if (ctype == CT_LRCCS) omega = singles.omega;
             else if (ctype == CT_ADC2) omega = singles.omega;
+            print("omega 1" ,omega);
 
             // consistency check
             switch (ctype) {
-                case CT_CC2:
-                    if (singles.type != PARTICLE)
-                        output.warning("iterate_singles: CC2 demanded but singles are not of type PARTICLE");
-                    break;
-                case CT_MP2: MADNESS_EXCEPTION("Demanded Singles Calculation for MP2 ????", 1);
-                    break;
-                case CT_LRCC2:
-                    if (singles.type != RESPONSE or singles2.type != PARTICLE)
-                        output.warning("iterate_singles: CC2_response_ singles have wrong types");
-                    break;
-                case CT_LRCCS:
-                    if (singles.type != RESPONSE)
-                        output.warning("iterate_singles: CCS_response_ singles have wrong types");
-                    break;
-                case CT_CISPD: MADNESS_EXCEPTION("Demanded Singles Calculation for CIS(D)", 1);
-                    break;
-                case CT_ADC2:
-                    MADNESS_ASSERT(singles.type == RESPONSE);
-                    break;
-                case CT_TEST: MADNESS_EXCEPTION("Iterate Singles not implemented for Experimental calculation", 1);
-                    break;
-                default: MADNESS_EXCEPTION(
-                        ("Unknown calculation type in iterate singles: " + assign_name(ctype)).c_str(), 1);
+            case CT_CC2:
+                if (singles.type != PARTICLE)
+                    output.warning("iterate_singles: CC2 demanded but singles are not of type PARTICLE");
+                break;
+            case CT_MP2: MADNESS_EXCEPTION("Demanded Singles Calculation for MP2 ????", 1);
+                break;
+            case CT_LRCC2:
+                if (singles.type != RESPONSE or singles2.type != PARTICLE)
+                    output.warning("iterate_singles: CC2_response_ singles have wrong types");
+                break;
+            case CT_LRCCS:
+                if (singles.type != RESPONSE)
+                    output.warning("iterate_singles: CCS_response_ singles have wrong types");
+                break;
+            case CT_CISPD: MADNESS_EXCEPTION("Demanded Singles Calculation for CIS(D)", 1);
+                break;
+            case CT_ADC2:
+                MADNESS_ASSERT(singles.type == RESPONSE);
+                break;
+            case CT_TEST: MADNESS_EXCEPTION("Iterate Singles not implemented for Experimental calculation", 1);
+                break;
+            default: MADNESS_EXCEPTION(
+                    ("Unknown calculation type in iterate singles: " + assign_name(ctype)).c_str(), 1);
             }
 
             // get potentials
             CCTimer time_V(world, assign_name(ctype) + "-Singles Potential");
             vector_real_function_3d V;
-            if (ctype == CT_CC2) V = CCOPS.get_CC2_singles_potential_gs(singles, gs_doubles);
+            if (ctype == CT_CC2) V = CCPotentials::get_CC2_singles_potential_gs(world, singles, gs_doubles, info);
             else if (ctype == CT_LRCC2)
-                V = CCOPS.get_CC2_singles_potential_ex(singles2, gs_doubles, singles, ex_doubles);
-            else if (ctype == CT_LRCCS) V = CCOPS.get_CCS_potential_ex(singles);
-            else if (ctype == CT_ADC2) V = CCOPS.get_ADC2_singles_potential(gs_doubles, singles, ex_doubles);
+                V = CCPotentials::get_CC2_singles_potential_ex(world, singles2, gs_doubles, singles, ex_doubles, info);
+            else if (ctype == CT_LRCCS) V = CCPotentials::get_CCS_potential_ex(world,singles,false, info);
+            //            else if (ctype == CT_ADC2) V = CCOPS.get_ADC2_singles_potential(world, gs_doubles, singles, ex_doubles, info);
             else MADNESS_EXCEPTION("iterate singles: unknown type", 1);
+
+            // add local coupling
+            V-=compute_local_coupling(singles.get_vecfunction(),info);
+            truncate(world, V);
             time_V.info(true, norm2(world, V));
 
-            if (ctype == CT_LRCCS or ctype == CT_LRCC2 or ctype == CT_ADC2) {
-                omega = singles.omega; // computed with the potential
+            // update excitation energy
+            if (ctype==CT_LRCC2 or ctype==CT_LRCCS or ctype==CT_ADC2) {
+                old_omega=omega;
+                omega = CCPotentials::compute_cis_expectation_value(world, singles, V, true, info);
+                singles.omega = omega;
             }
-
-            scale(world, V, -2.0);
-            truncate(world, V);
+            if (world.rank()==0 and info.parameters.debug())
+                print("omega entering the update in the singles" ,omega);
 
             // make bsh operators
-            CCTimer time_makebsh(world, "Make G-Operators");
+            scale(world, V, -2.0); // moved to BSHApply
             std::vector<std::shared_ptr<SeparatedConvolution<double, 3> > > G(singles.size());
             for (size_t i = 0; i < G.size(); i++) {
-                const double bsh_eps = CCOPS.get_orbital_energies()[i + parameters.freeze()] + omega;
+                const double bsh_eps = info.orbital_energies[i + info.parameters.freeze()] + omega;
                 G[i] = std::shared_ptr<SeparatedConvolution<double, 3> >(
-                        BSHOperatorPtr3D(world, sqrt(-2.0 * bsh_eps), parameters.lo(), parameters.thresh_bsh_3D()));
+                        BSHOperatorPtr3D(world, sqrt(-2.0 * bsh_eps), info.parameters.lo(), info.parameters.thresh_bsh_3D()));
             }
             world.gop.fence();
-            time_makebsh.info();
 
             // apply bsh operators
             CCTimer time_applyG(world, "Apply G-Operators");
@@ -279,17 +309,13 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
             time_applyG.info();
 
             // apply Q-Projector to result
-            GV = CCOPS.apply_Qt(GV, CCOPS.mo_ket());
+            QProjector<double,3> Q(info.mo_bra,info.mo_ket);
+            GV = Q(GV);
 
             // Normalize Singles if it is excited state
             if (ctype == CT_LRCCS or ctype == CT_LRCC2 or ctype == CT_ADC2) {
                 output("Normalizing new singles");
-                const vector_real_function_3d x = GV;
-                const vector_real_function_3d xbra = mul(world, nemo->ncf->square(), GV);
-                const double norm = sqrt(inner(world, xbra, x).sum());
-                if (world.rank() == 0)
-                    std::cout << " Norm was " << std::fixed << std::setprecision(parameters.output_prec()) << norm
-                              << "\n";
+                const double norm=inner(GV,info.R_square*GV);
                 scale(world, GV, 1.0 / norm);
             } else output("Singles not normalized");
 
@@ -297,24 +323,25 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
             const vector_real_function_3d residual = sub(world, singles.get_vecfunction(), GV);
 
             // information
-            const Tensor<double> R2xinnerx = inner(world, mul(world, nemo->ncf->square(), singles.get_vecfunction()),
+            const Tensor<double> R2xinnerx = inner(world, info.R_square*singles.get_vecfunction(),
                                                    singles.get_vecfunction());
-            const Tensor<double> R2GVinnerGV = inner(world, mul(world, nemo->ncf->square(), GV), GV);
-            const Tensor<double> R2rinnerr = inner(world, mul(world, nemo->ncf->square(), residual), residual);
+            const Tensor<double> R2GVinnerGV = inner(world, info.R_square*GV, GV);
+            const Tensor<double> R2rinnerr = inner(world, info.R_square*residual, residual);
             const double R2vector_error = sqrt(R2rinnerr.sum());
+            auto [rmsresidual, maxresidual]=CCPotentials::residual_stats(residual);
 
             // print information
             if (world.rank() == 0) std::cout << "\n\n-----Results of current interation:-----\n";
             if (world.rank() == 0)
                 std::cout << "\nName: ||" << singles.name(0) << "||, ||GV" << singles.name(0) << ", ||residual||" << "\n";
             if (world.rank() == 0)
-                std::cout << singles.name(0) << ": " << std::scientific << std::setprecision(parameters.output_prec())
+                std::cout << singles.name(0) << ": " << std::scientific << std::setprecision(info.parameters.output_prec())
                           << sqrt(R2xinnerx.sum()) << ", " << sqrt(R2GVinnerGV.sum()) << ", " << sqrt(R2rinnerr.sum())
                           << "\n----------------------------------------\n";
             for (size_t i = 0; i < GV.size(); i++) {
                 if (world.rank() == 0)
-                    std::cout << singles(i + parameters.freeze()).name() << ": " << std::scientific
-                              << std::setprecision(parameters.output_prec())
+                    std::cout << singles(i + info.parameters.freeze()).name() << ": " << std::scientific
+                              << std::setprecision(info.parameters.output_prec())
                               << sqrt(R2xinnerx(i)) << ", " << sqrt(R2GVinnerGV(i)) << ", " << sqrt(R2rinnerr(i))
                               << "\n";
             }
@@ -322,80 +349,76 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
 
             // make second order update (only for response)
             if (ctype == CT_LRCC2 or ctype == CT_LRCCS) {
-                output("\nMake 2nd order energy update:");
-                // include nuclear factors
-                {
-                    vector_real_function_3d bra_res = mul(world, nemo->ncf->square(), residual);
-                    vector_real_function_3d bra_GV = mul(world, nemo->ncf->square(), GV);
-                    double Rtmp = inner(world, bra_res, V).sum();
-                    double Rtmp2 = inner(world, bra_GV, GV).sum();
-                    const double Rdelta = (0.5 * Rtmp / Rtmp2);
-                    double old_omega = omega;
-                    output("Delta-Update is not used");
-                    if (world.rank() == 0)
-                        std::cout << "omega, old_omega, delta" << std::fixed
-                                  << std::setprecision(parameters.output_prec() + 2) << omega << ", " << old_omega << ", "
-                                  << Rdelta << "\n\n";
-                }
-
+                double Rtmp = inner(world, info.R_square*residual, V).sum();
+                double Rtmp2 = inner(world, info.R_square*GV, GV).sum();
+                const double Rdelta = (0.5 * Rtmp / Rtmp2);
+                if (world.rank() == 0) std::cout << "omega, second-order update (FYI): " << std::fixed
+                              << std::setprecision(info.parameters.output_prec() + 2) << omega << ", " << Rdelta << "\n\n";
             }
 
             // update singles
             singles.omega = omega;
-            vector_real_function_3d new_singles = GV;
-            if (parameters.kain()) new_singles = solver.update(singles.get_vecfunction(), residual);
-            print_size(world, new_singles, "new_singles");
-            truncate(world, new_singles);
-            print_size(world, new_singles, "new_singles");
+            vector_real_function_3d new_singles = truncate(GV);
+            if (info.parameters.kain()) new_singles = solver.update(singles.get_vecfunction(), residual);
+            if (info.parameters.debug()) print_size(world, new_singles, "new_singles");
+            // if (ctype == CT_LRCCS or ctype == CT_LRCC2 or ctype == CT_ADC2) Nemo::normalize(new_singles, info.R);
+            // if (info.parameters.debug()) print_size(world, new_singles, "new_singles normalized");
+
             for (size_t i = 0; i < GV.size(); i++) {
-                singles(i + parameters.freeze()).function = copy(new_singles[i]);
+                singles(i + info.parameters.freeze()).function = copy(new_singles[i]);
             }
 
-            // update intermediates
-            CCOPS.update_intermediates(singles);
-
             // update reg_residues of doubles
-            //if(ctype==CC2_) update_reg_residues_gs(singles,gs_doubles);
-            //else if(ctype==LRCC2_) update_reg_residues_ex(singles2,singles,ex_doubles);
+            if (ctype==CT_CC2) update_reg_residues_gs(world, singles,gs_doubles, info);
+            else if(ctype==CT_LRCC2) update_reg_residues_ex(world, singles2,singles,ex_doubles, info);
 
-            converged = (R2vector_error < parameters.dconv_3D());
+            if (world.rank()==0) CCPotentials::print_convergence(singles.name(0),rmsresidual,
+                rmsresidual,omega-old_omega,iter);
+            converged = (R2vector_error < info.parameters.dconv_3D());
 
-            time.info();
+            // time.info();
             if (converged) break;
             if (ctype == CT_LRCCS) break; // for CCS just one iteration to check convergence
         }
         time_all.info();
+        print_size(world, singles.get_vecfunction(), "singles after iteration");
 
         // Assign the overall changes
         bool no_change = true;
         if (world.rank() == 0)
-            std::cout << "Change in Singles functions after all the CC2-Single-Microiterations" << std::endl;
+            std::cout << "Change in Singles functions after all the Microiterations" << std::endl;
         for (auto& tmp : singles.functions) {
             const double change = (tmp.second.function - old_singles(tmp.first).function).norm2();
             tmp.second.current_error = change;
-            if (change > parameters.dconv_3D()) no_change = false;
+            if (change > info.parameters.dconv_3D()) no_change = false;
             if (world.rank() == 0)
                 std::cout << "Change of " << tmp.second.name() << "=" << tmp.second.current_error << std::endl;
         }
         // update reg_residues of doubles
-        if (ctype == CT_CC2) update_reg_residues_gs(singles, gs_doubles);
-        else if (ctype == CT_LRCC2) update_reg_residues_ex(singles2, singles, ex_doubles);
+        if (ctype == CT_CC2) update_reg_residues_gs(world, singles, gs_doubles, info);
+        else if (ctype == CT_LRCC2) update_reg_residues_ex(world, singles2, singles, ex_doubles, info);
 
         //CCOPS.plot(singles);
-        if (no_change) output("Change of Singles was below  = " + std::to_string(parameters.dconv_3D()) + "!");
+        if (no_change) output("Change of Singles was below  = " + std::to_string(info.parameters.dconv_3D()) + "!");
         return no_change;
     }
 
+    /// store singles to file
+    void store_singles(const CC_vecfunction& singles, const int ex = -1) const;
 
-    bool initialize_singles(CC_vecfunction& singles, const FuncType type, const int ex = -1) const;
+    /// read singles from file or initialize new ones
+    CC_vecfunction initialize_singles(const FuncType type, const int ex = -1) const;
 
+    /// read pairs from file or initialize new ones
     bool initialize_pairs(Pairs<CCPair>& pairs, const CCState ftype, const CalcType ctype, const CC_vecfunction& tau,
-                          const CC_vecfunction& x, const size_t extitation = 0) const;
+                          const CC_vecfunction& x, const size_t extitation, const Info& info) const;
 
-    void update_reg_residues_gs(const CC_vecfunction& singles, Pairs<CCPair>& doubles) const;
+    static void
+    update_reg_residues_gs(World& world, const CC_vecfunction& singles, Pairs<CCPair>& doubles, const Info& info);
 
-    void
-    update_reg_residues_ex(const CC_vecfunction& singles, const CC_vecfunction& response, Pairs<CCPair>& doubles) const;
+    static void
+    update_reg_residues_ex(World& world, const CC_vecfunction& singles, const CC_vecfunction& response, Pairs<CCPair>& doubles,
+        const Info& info);
 
     /// Iterates a pair of the CC2 doubles equations
     bool
@@ -404,9 +427,9 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
     bool
     iterate_adc2_pairs(Pairs<CCPair>& cispd, const CC_vecfunction& ccs);
 
-    bool
-    iterate_lrcc2_pairs(const CC_vecfunction& cc2_s, const Pairs<CCPair>& cc2_d, const CC_vecfunction lrcc2_s,
-                        Pairs<CCPair>& lrcc2_d);
+    static bool
+    iterate_lrcc2_pairs(World& world, const CC_vecfunction& cc2_s, const CC_vecfunction lrcc2_s,
+                        Pairs<CCPair>& lrcc2_d, const Info& info);
 
     bool update_constant_part_cc2_gs(const CC_vecfunction& tau, CCPair& pair) {
         MADNESS_ASSERT(pair.ctype == CT_CC2);
@@ -473,19 +496,36 @@ class CC2 : public OptimizationTargetInterface, public QCPropertyInterface {
     }
 
     /// forward to the other function (converting CCPair to real_function)
-    Pairs<real_function_6d> compute_local_coupling(const Pairs<CCPair> &pairs) const {
+    static Pairs<real_function_6d> compute_local_coupling(const std::vector<CCPair> &vpairs, const Info& info) {
+        // create new pairs structure
+        Pairs<CCPair> pairs;
+        for (auto& tmp_pair : vpairs) pairs.insert(tmp_pair.i, tmp_pair.j, tmp_pair);
         auto ccpair2function = [](const CCPair& a) {return a.function();};
-        return compute_local_coupling(pairs.convert<real_function_6d>(pairs,ccpair2function));
-
+        return compute_local_coupling(pairs.convert<real_function_6d>(pairs,ccpair2function), info);
     };
 
+    /// compute the coupling of singles function if orbitals are localized
+
+    /// @return the coupling terms c_i = -\sum_(j\neq i) f_ij |\phi_j>  (for whatever phi is)
+    static std::vector<real_function_3d> compute_local_coupling(const std::vector<real_function_3d>& singles,
+        const Info& info) {
+
+        MADNESS_CHECK_THROW(singles.size()>0,"compute_local_coupling: singles vector is empty");
+        World& world=singles.front().world();
+        auto active=Slice(info.parameters.freeze(),-1);
+        Tensor<double> Fact=info.fock(active,active);
+        for (int i=0; i<Fact.dim(0); ++i) Fact(i,i)=0.0;
+        vector_real_function_3d fock_coupling = madness::transform(world, singles, Fact);
+        return fock_coupling;
+    }
+
     /// add the coupling terms for local MP2
 
     /// \sum_{k\neq i} f_ki |u_kj> + \sum_{l\neq j} f_lj |u_il>
-    Pairs<real_function_6d> compute_local_coupling(const Pairs<real_function_6d>& pairs) const;
+    static Pairs<real_function_6d> compute_local_coupling(const Pairs<real_function_6d>& pairs, const Info& info);
 
 
-    double solve_mp2_coupled(Pairs<CCPair> &doubles);
+    double solve_mp2_coupled(Pairs<CCPair> &doubles, Info& info);
 
     bool check_core_valence_separation(const Tensor<double>& fmat) const;
 
diff --git a/src/madness/chem/CCPotentials.cc b/src/madness/chem/CCPotentials.cc
index a658f532c30..e524b0ed4bb 100644
--- a/src/madness/chem/CCPotentials.cc
+++ b/src/madness/chem/CCPotentials.cc
@@ -26,16 +26,16 @@ CCPotentials::CCPotentials(World& world_,  std::shared_ptr<Nemo> nemo, const CCP
           //orbital_energies_(init_orbital_energies(nemo))
 //          g12(std::shared_ptr<CCConvolutionOperator(world, OT_G12, param)), f12(world, OT_F12, param),
           corrfac(world, param.gamma(), 1.e-7, nemo->get_calc()->molecule),
-          get_potentials(world, param),
+          get_potentials(param),
           output(world) {
     g12=std::shared_ptr<CCConvolutionOperator<double,3>>(new CCConvolutionOperator<double,3>(world,OpType::OT_G12,param));
     f12=std::shared_ptr<CCConvolutionOperator<double,3>>(new CCConvolutionOperator<double,3>(world,OpType::OT_F12,param));
     output.debug = parameters.debug();
-//    reset_nemo(nemo);
-//    g12.update_elements(mo_bra_, mo_ket_);
-//    g12.sanity();
-//    f12.update_elements(mo_bra_, mo_ket_);
-//    f12.sanity();
+    //    reset_nemo(nemo);
+    //    g12.update_elements(mo_bra_, mo_ket_);
+    //    g12.sanity();
+    //    f12.update_elements(mo_bra_, mo_ket_);
+    //    f12.sanity();
 }
 
 madness::CC_vecfunction
@@ -72,13 +72,108 @@ CCPotentials::init_orbital_energies(const Nemo& nemo) const {
     return eps;
 }
 
+CCPair CCPotentials::make_pair_mp2(const real_function_6d& u, const size_t i, const size_t j, const Info& info) {
+    World& world=u.world();
+
+    // construct Q12 f12 |ij>
+    auto phi=info.mo_ket;
+    auto phi_bra=info.mo_bra;
+    StrongOrthogonalityProjector<double,3> Q12(world);
+    Q12.set_spaces(phi_bra,phi,phi_bra,phi);
+
+    auto f12=CCConvolutionOperatorPtr<double,3>(world,OT_F12,info.parameters);
+    CCPairFunction<double,6> fij(f12, phi[i], phi[j]);
+    std::vector<CCPairFunction<double,6>> tmp=Q12(std::vector<CCPairFunction<double,6>>(1,fij));
+
+    // first term is the 6d function u, then follows Q12 f12 |ij>
+    std::vector<CCPairFunction<double,6>> functions;
+    functions+=CCPairFunction<double,6>(u);
+    functions+=tmp;
+
+    auto pair=CCPair(i,j,GROUND_STATE,CT_MP2,functions);
+    pair.bsh_eps=get_epsilon(i,j,info);
+    return pair;
+}
+
+CCPair CCPotentials::make_pair_cc2(const real_function_6d& u, const CC_vecfunction& gs_singles, const size_t i, const size_t j,
+    const Info& info) {
+    World& world=u.world();
+
+    // construct Q12 f12 |ij>
+    auto phi=info.mo_ket;
+    auto phi_bra=info.mo_bra;
+    auto t=make_full_t_intermediate(gs_singles,info).get_vecfunction();
+    StrongOrthogonalityProjector<double,3> Q12(world);
+    Q12.set_spaces(phi_bra,t,phi_bra,t);
+
+    auto f12=CCConvolutionOperatorPtr<double,3>(world,OT_F12,info.parameters);
+    CCPairFunction<double,6> fij(f12, t[i], t[j]);
+    std::vector<CCPairFunction<double,6>> tmp=Q12(std::vector<CCPairFunction<double,6>>(1,fij));
+
+    // first term is the 6d function u, then follows Q12 f12 |ij>
+    std::vector<CCPairFunction<double,6>> functions;
+    functions+=CCPairFunction<double,6>(u);
+    functions+=tmp;
+
+    auto pair=CCPair(i,j,GROUND_STATE,CT_CC2,functions);
+    pair.bsh_eps=get_epsilon(i,j,info);
+    return pair;
+}
+
+/// follow eq. (23) of Kottmann, JCTC 13, 5956 (2017)
+CCPair CCPotentials::make_pair_lrcc2(World& world, const CalcType& ctype, const real_function_6d& u,
+                                     const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles, const size_t i, const size_t j, const Info& info) {
+    MADNESS_ASSERT(gs_singles.type == PARTICLE || gs_singles.type == HOLE);
+    MADNESS_ASSERT(ex_singles.type == RESPONSE);
+    MADNESS_ASSERT(ctype == CT_CISPD || ctype == CT_LRCC2 || ctype == CT_ADC2);
+    MADNESS_ASSERT(!(i < info.parameters.freeze()));
+    MADNESS_ASSERT(!(j < info.parameters.freeze()));
+
+    // compute the t intermediates for active orbitals only -- they go into the ansatz
+    const auto t = CC_vecfunction(info.get_active_mo_ket()+gs_singles.get_vecfunction(),MIXED,info.parameters.freeze());
+    MADNESS_ASSERT(t.size() == (info.mo_ket.size()-info.parameters.freeze()));
+
+    // compute the t intermediates for all orbitals -- they go into the projector
+    const CC_vecfunction pt = copy(make_full_t_intermediate(gs_singles,info));
+    MADNESS_ASSERT(pt.size() == info.mo_ket.size());
+
+    auto f12=CCConvolutionOperatorPtr<double,3>(world,OT_F12,info.parameters);
+
+    // set up projectors -- they project out the occupied space from the response pair function
+
+    // dQ12t = -(Qt(1) Ox(2) + Ox(1) Qt(2))      eq. (22) of the excited state paper
+    QProjector<double,3> Qt(info.mo_bra,pt.get_vecfunction());
+    Projector<double,3> Ox(info.get_active_mo_bra(),ex_singles.get_vecfunction());  // this works on active orbitals only
+    auto dQt_1 = outer(Qt,Ox);
+    auto dQt_2 = outer(Ox,Qt);
+
+    StrongOrthogonalityProjector<double,3> Q12t(world); // eq. (21) of the ground state paper
+    Q12t.set_spaces(info.mo_bra,pt.get_vecfunction(),info.mo_bra,pt.get_vecfunction());
+
+    typedef CCPairFunction<double,6> cpT;
+    auto functions=std::vector<cpT>(1,cpT(u));
+
+    auto f_xt=std::vector<cpT>(1,cpT(f12, ex_singles(i), t(j)));
+    auto f_tx=std::vector<cpT>(1,cpT(f12, t(i), ex_singles(j)));
+    auto f_tt=std::vector<cpT>(1,cpT(f12, t(i), t(j)));
+
+    functions+=(Q12t(f_xt) + Q12t(f_tx) - dQt_1(f_tt) -dQt_2(f_tt));     // note the sign change in the last two terms
+    functions=consolidate(functions);
+
+    CCPair pair(i, j, EXCITED_STATE, ctype, functions);
+    MADNESS_ASSERT(ex_singles.omega != 0.0);
+    const double bsh_eps = get_epsilon(i, j, info) + ex_singles.omega;
+    pair.bsh_eps = bsh_eps;
+    return pair;
+}
+
 madness::CCPair
 CCPotentials::make_pair_gs(const real_function_6d& u, const CC_vecfunction& tau, const size_t i, const size_t j) const {
     CCTimer time(world, "make pair u" + std::to_string(int(i)) + std::to_string(int(j)));
     MADNESS_ASSERT(tau.type == PARTICLE || tau.type == HOLE);
     // for  MP2: tau is empty or Hole states, the function will give back mo_ket_
     // for freeze!=0 the function will give back (mo0,mo1,...,t_freeze,t_freeze+1,...)
-    const CC_vecfunction t = make_t_intermediate(tau);
+    const CC_vecfunction t = make_t_intermediate(tau,parameters);
     // functions for the projector
     CC_vecfunction pt;
     if (!parameters.QtAnsatz()) pt = mo_ket_;
@@ -222,12 +317,12 @@ CCPotentials::make_pair_ex(const real_function_6d& u, const CC_vecfunction& tau,
     MADNESS_ASSERT(!(j < parameters.freeze()));
     // for  CIS(D): tau is empty or Hole states, the function will give back mo_ket_
     // for freeze!=0 the function will give back (mo0,mo1,...,t_freeze,t_freeze+1,...)
-    const CC_vecfunction t = make_t_intermediate(tau).copy();
+    const CC_vecfunction t = copy(make_t_intermediate(tau,parameters));
     // functions for the projector
     CC_vecfunction pt;
-    if (!parameters.QtAnsatz()) pt = mo_ket_.copy();
+    if (!parameters.QtAnsatz()) pt = copy(mo_ket_);
     else {
-        pt = make_full_t_intermediate(tau).copy();
+        pt = copy(make_full_t_intermediate(tau));
     }
     MADNESS_ASSERT(pt.size() == mo_ket_.size());
     std::vector<CCPairFunction<double,6>> functions;
@@ -302,6 +397,8 @@ CCPotentials::make_pair_ex(const real_function_6d& u, const CC_vecfunction& tau,
             MADNESS_ASSERT(functions.size() == 7);
     } else
         MADNESS_ASSERT(functions.size() == 2);
+    functions=consolidate(functions);
+    MADNESS_ASSERT(functions.size() == 3);
 
     MADNESS_ASSERT(x.omega != 0.0);
     const double bsh_eps = get_epsilon(i, j) + x.omega;
@@ -310,26 +407,31 @@ CCPotentials::make_pair_ex(const real_function_6d& u, const CC_vecfunction& tau,
 }
 
 double
-CCPotentials::compute_pair_correlation_energy(const CCPair& u, const CC_vecfunction& singles) const {
+CCPotentials::compute_pair_correlation_energy(World& world, const Info& info,
+    const CCPair& u, const CC_vecfunction& singles) {
+
     CCTimer timer(world, "Compute Correlation Energy");
     MADNESS_ASSERT(u.type == GROUND_STATE);
     if (singles.functions.empty()) MADNESS_ASSERT(u.ctype == CT_MP2);
 
-    const bool print_details=(world.rank()==0 and parameters.debug());
-    if (parameters.debug()) output("Compute pair-correlation energy of pair " + u.name());
+    const bool print_details=(world.rank()==0 and info.parameters.debug());
     double result = 0.0;
-    const CCFunction<double,3>& mobi = mo_bra_(u.i);
-    const CCFunction<double,3>& mobj = mo_bra_(u.j);
+    const CCFunction<double,3>& mobi = info.mo_bra[u.i];
+    const CCFunction<double,3>& mobj = info.mo_bra[u.j];
     const bool symmetric = (u.i == u.j);
 
+    auto g12=CCConvolutionOperatorPtr<double,3>(world,OpType::OT_G12,info.parameters);
+    CCPairFunction<double,6> ij(mobi.f(),mobj.f());
+    CCPairFunction<double,6> ji(mobj.f(),mobi.f());
 
     for (size_t mm = 0; mm < u.functions.size(); mm++) {
         double tmp = 0.0;
-        const double part1 = make_xy_op_u(mobi, mobj, *g12, u.functions[mm]);
+        // const double part1 = make_xy_op_u(mobi, mobj, *g12, u.functions[mm]);
+        const double part1 = inner(ij,g12*u.functions[mm]);
         if (symmetric) tmp = part1;
-        else     //if(world.rank()==0) std::cout << std::fixed << std::setprecision(10) << part1 << "\n";
-        {
-            const double part2 = make_xy_op_u(mobj, mobi, *g12, u.functions[mm]);
+        else {
+            // const double part2 = make_xy_op_u(mobj, mobi, *g12, u.functions[mm]);
+            const double part2 = inner(ji,g12*u.functions[mm]);
             tmp = 2.0 * (2.0 * part1 - part2);     // non symmetric pairs -> offdiagonal -> count twice
         }
         result += tmp;
@@ -349,20 +451,21 @@ CCPotentials::compute_pair_correlation_energy(const CCPair& u, const CC_vecfunct
     }
     // if (world.rank() == 0) std::cout << "------------\n" << std::fixed << std::setprecision(10) << result << "\n\n";
 
-    timer.info(parameters.debug());
+    timer.info(info.parameters.debug());
     return result;
 }
 
 double
-CCPotentials::compute_cc2_correlation_energy(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const {
+CCPotentials::compute_cc2_correlation_energy(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, const Info& info)
+{
     MADNESS_ASSERT(singles.type == PARTICLE);
     CCTimer time(world, "Computing CC2 Correlation Energy");
-    output.section("Computing CC2 Correlation Energy");
+    // output.section("Computing CC2 Correlation Energy");
     double result = 0.0;
     for (const auto& tmp : doubles.allpairs) {
         const size_t i = tmp.second.i;
         const size_t j = tmp.second.j;
-        const double omega = compute_pair_correlation_energy(tmp.second, singles);
+        const double omega = compute_pair_correlation_energy(world, info, tmp.second, singles);
         result += omega;
         if (world.rank() == 0)
             std::cout << std::fixed << "omega  " << i << j << " =" << std::setprecision(10) << omega << "\n";
@@ -374,7 +477,8 @@ CCPotentials::compute_cc2_correlation_energy(const CC_vecfunction& singles, cons
 }
 
 double
-CCPotentials::compute_kinetic_energy(const vector_real_function_3d& xbra, const vector_real_function_3d& xket) const {
+CCPotentials::compute_kinetic_energy(World& world, const vector_real_function_3d& xbra, const vector_real_function_3d& xket)
+{
     Kinetic<double, 3> T(world);
     double kinetic = 0.0;
     for (size_t k = 0; k < xket.size(); k++)
@@ -382,16 +486,23 @@ CCPotentials::compute_kinetic_energy(const vector_real_function_3d& xbra, const
     return kinetic;
 }
 
+
 double
-CCPotentials::compute_cis_expectation_value(const CC_vecfunction& x, const vector_real_function_3d& V,
-                                            const bool print) const {
-    const vector_real_function_3d xbra = make_bra(x);
+CCPotentials::compute_cis_expectation_value(World& world, const CC_vecfunction& x,
+                                            const vector_real_function_3d& V, const bool print, const Info& info)
+{
+    // following eq. (34) of the CIS paper Kottmann et al, PCCP, 17, 31453, (2015)
+    // doi: https://doi.org/10.1039/C5CP00345H
+    // the expectation value of the CIS wave function is computed by projecting the
+    // CIS wave function onto eq. (22)
+    // the potential V must contain the coupling term when using localized orbitals
+    const vector_real_function_3d xbra = info.R_square*(x.get_vecfunction());
     const vector_real_function_3d xket = x.get_vecfunction();
-    const double kinetic = compute_kinetic_energy(xbra, xket);
+    const double kinetic = compute_kinetic_energy(world, xbra, xket);
     const double norm = sqrt(inner(world, xbra, xket).sum());
     double eps = 0.0;
     for (size_t k = 0; k < xket.size(); k++)
-        eps -= get_orbital_energies()[k + parameters.freeze()] * xbra[k].inner(xket[k]);
+        eps -= info.orbital_energies[k + info.parameters.freeze()] * xbra[k].inner(xket[k]);
     double potential = inner(world, xbra, V).sum();
     const double result = 1.0 / (norm * norm) * (potential + kinetic + eps);
     if (world.rank() == 0 && print) {
@@ -406,16 +517,22 @@ CCPotentials::compute_cis_expectation_value(const CC_vecfunction& x, const vecto
 }
 
 double
-CCPotentials::compute_excited_pair_energy(const CCPair& d, const CC_vecfunction& x) const {
-    const CC_vecfunction xbra(make_bra(x), RESPONSE, parameters.freeze());
+CCPotentials::compute_excited_pair_energy(World& world, const CCPair& d, const CC_vecfunction& x, const Info& info) {
+    // const CC_vecfunction xbra(make_bra(x), RESPONSE, info.parameters.freeze());
+    // for (const auto& f: d.functions) f.print_size("doubles functions in ex pair energy");
+    MADNESS_CHECK_THROW(x.type == RESPONSE, "x must be of type RESPONSE");
+    MADNESS_CHECK_THROW(x.size()==info.get_active_mo_bra().size(), "x must have the same size as the active space");
+    const CC_vecfunction xbra(info.R_square*x.get_vecfunction(), RESPONSE, info.parameters.freeze());
     const CCFunction<double,3>& xbi = xbra(d.i);
-    const CCFunction<double,3>& mobj = mo_bra_(d.j);
+    const CCFunction<double,3>& mobj = info.mo_bra[d.j];
+    auto g12=CCConvolutionOperatorPtr<double,3>(world,OT_G12,info.parameters);
     double result = 0.0;
     double s2b = 2.0 * make_xy_op_u(xbi, mobj, *g12, d.functions) - make_xy_op_u(mobj, xbi, *g12, d.functions);
     double s2c = 0.0;
     for (const auto& ktmp : x.functions) {
         const size_t k = ktmp.first;
-        const real_function_3d j_igk = (*g12)(mo_bra_(d.i), mo_ket_(k)) * mo_bra_(d.j).function;
+        // const real_function_3d j_igk = (*g12)(info.mo_bra[d.i], info.mo_ket[k]) * info.mo_bra[d.j].function;
+        const real_function_3d j_igk = (*g12)(info.mo_bra[d.i]* info.mo_ket[k]) * info.mo_bra[d.j];
         s2c -= 2.0 * make_xy_u(xbra(k), j_igk, d.functions) - make_xy_u(j_igk, xbra(k), d.functions);
     }
     result = s2b + s2c;
@@ -483,17 +600,17 @@ CCPotentials::compute_cc2_excitation_energy(const CC_vecfunction& stau, const CC
     truncate(world, tmp);
     CC_vecfunction xbra(tmp, RESPONSE, parameters.freeze());
     const double xbrax = inner(world, xbra.get_vecfunction(), sx.get_vecfunction()).sum();
-    double result = potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s3a_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s3b_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s3c_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s5b_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s5c_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s6_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s2b_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s2c_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s4a_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s4b_);
-    result += potential_energy_ex(xbra, stau, dtau, sx, dx, POT_s4c_);
+    double result = potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s3a_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s3b_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s3c_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s5b_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s5c_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s6_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s2b_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s2c_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s4a_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s4b_);
+    result += potential_energy_ex(world, xbra, stau, dtau, sx, dx, POT_s4c_);
     return 1.0 / xbrax * result;
 }
 
@@ -661,7 +778,7 @@ CCPotentials::fock_residue_6d_macrotask(World& world, const CCPair& u, const CCP
             double tight_thresh = parameters.thresh_6D();
             real_function_6d x = CompositeFactory<double, 6, 3>(world).ket(copy(Du)).V_for_particle2(
                     copy(U1_axis)).thresh(tight_thresh).special_points(sp6d);
-             x.fill_nuclear_cuspy_tree(op_mod, 2);
+            x.fill_nuclear_cuspy_tree(op_mod, 2);
             if (parameters.debug()) x.print_size("Un_axis_" + stringify(axis));
             Un2 += x;
         }
@@ -679,6 +796,180 @@ CCPotentials::fock_residue_6d_macrotask(World& world, const CCPair& u, const CCP
     return vphi;
 }
 
+/// the constant part is the contribution to the doubles that are independent of the doubles
+
+/// CC-equations from Kottmann et al., JCTC 13, 5956 (2017)
+/// MP2:
+///    cp = G Q g~ |ij>
+///    g~ = Ue - KffK
+/// GS-CC2: eqs. (6,7)
+///   cp  = G Qt g~ |t_i t_j>
+///    g~ = Ue - KffK - Fock_commutator - reduced_Fock
+/// LRCC2: eqs. (24-29)
+///   cp  = G d(Qt g~ d|t_i t_j>)
+///       = G (Qt g~ d|t_i t_j> + Qt dg~ |t_i t_j> + dQt g~ |t_i t_j>)
+madness::real_function_6d
+CCPotentials::make_constant_part_macrotask(World& world, const CCPair& pair,
+            const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+            const Info& info) {
+    const CalcType targetstate=pair.ctype;
+    const auto& parameters=info.parameters;
+    std::string msg="compute constant part of pair "+std::to_string(pair.i) + " " + std::to_string(pair.j);
+    print_header3(msg);
+    timer t1(world);
+    // construct the projectors
+    // Q12 = (1-|i><i|)  (1-|j><j|)
+    StrongOrthogonalityProjector<double, 3> Q12(world);
+    Q12.set_spaces(info.mo_bra,info.mo_ket,info.mo_bra,info.mo_ket);
+
+    // Q12t = (1-|t_i><i|)(1-|t_j><j|)
+    StrongOrthogonalityProjector<double, 3> Q12t(world);
+
+    // t1-transformed orbitals
+    CC_vecfunction t(MIXED);
+    if (targetstate==CT_CC2 or targetstate==CT_LRCC2) {
+        t=CCPotentials::make_full_t_intermediate(gs_singles,info);
+        Q12t.set_spaces(info.mo_bra,t.get_vecfunction(),info.mo_bra,t.get_vecfunction());
+    }
+
+
+    // dQ12t = -(Qt(1) Ox(2) + Ox(1) Qt(2))      eq. (22)
+    QProjector<double,3> Qt;
+    Projector<double,3> Ox;
+    if (targetstate==CT_LRCC2) {
+        Qt.set_spaces(info.mo_bra,t.get_vecfunction());
+        Ox.set_spaces(info.get_active_mo_bra(),ex_singles.get_vecfunction());
+    }
+    auto dQt_1 = outer(Qt,Ox);
+    auto dQt_2 = outer(Ox,Qt);
+
+    std::size_t i=pair.i;
+    std::size_t j=pair.j;
+    auto phi = [&](size_t i) { return CCFunction<double,3>(info.mo_ket[i],i,HOLE); };
+    // auto t = [&](size_t i) { return CCFunction<double,3>(info.mo_ket[i]+gs_singles(i).function); };
+    auto x = [&](size_t i) { return ex_singles(i); };
+
+    // save memory:
+    // split application of the BSH operator into high-rank, local part U|ij>, and
+    // low-rank, delocalized part (-O1 -O2 +O1O2) U|ij> by splitting the SO operator
+    auto apply_in_separated_form = [](const StrongOrthogonalityProjector<double,3>& Q,
+        const std::vector<CCPairFunction<double,6>>& ccp) {
+
+        std::vector<CCPairFunction<double,6>> result;
+        for (const auto& cc : ccp) {
+            if (cc.is_pure()) {
+                auto [left,right]=Q.get_vectors_for_outer_product(cc.get_function());
+                result.push_back(cc);
+                result.push_back(CCPairFunction<double,6>(left,right));
+            } else if (cc.is_decomposed()) {
+                result.push_back(Q(cc));
+            }
+        }
+        return result;
+    };
+
+    auto GG = BSHOperator<6>(world, sqrt(-2.0 * pair.bsh_eps), parameters.lo(), parameters.thresh_bsh_6D());
+    GG.destructive() = true;
+    GG.print_timings=false;
+    auto apply_G_and_print = [&](const std::vector<CCPairFunction<double,6>>& cc, std::string name) {
+        std::vector<CCPairFunction<double,6>> tmp1;
+        print("cc in apply_G_and_print:",name,cc.size());
+        for (const auto& tt : cc) {
+            print(tt.name());
+            tt.print_size();
+        }
+        for (const auto& tt : cc) tmp1 += GG(copy(tt));
+        print("tmp1 after apply G");
+        for (const auto& tt : tmp1) {
+            print(tt.name());
+            tt.print_size();
+        }
+        tmp1=consolidate(tmp1);
+        tmp1=-2.0*tmp1;
+        MADNESS_CHECK(tmp1.size()==1);
+        tmp1[0].get_function().print_size(name);
+    };
+
+    // compute all 6d potentials without applying the SO projector
+    std::vector<CCPairFunction<double,6>> V;
+    if (targetstate==CT_MP2) {
+        std::vector<std::string> argument={"Ue","KffK"};
+        auto Vreg=apply_Vreg(world,phi(i),phi(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+        V=consolidate(apply_in_separated_form(Q12,Vreg));
+    } else if (targetstate==CT_CC2) {       // Eq. (42) of Kottmann, JCTC 13, 5945 (2017)
+        std::vector<std::string> argument={"Ue","KffK","comm_F_Qt_f12","reduced_Fock"};
+        auto Vreg=apply_Vreg(world,t(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+        V=consolidate(Q12t(Vreg));
+    } else if (targetstate==CT_LRCC2) {
+        // Eq. (25) of Kottmann, JCTC 13, 5956 (2017)
+        // eq. (25) Q12t (g~ - omega f12) (|x_i t_j> + |t_i x_j> )
+        // note the term omega f12 is included in the reduced_Fock term, see eq. (34)
+        if (1)
+        {
+            print_header3("Q12t g~ |x_i t_j + t_i x_j>");
+            std::vector<std::string> argument={"Ue","KffK","comm_F_Qt_f12","reduced_Fock"};
+            auto Vreg=apply_Vreg(world,x(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+            Vreg+=apply_Vreg(world,t(i),x(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+            V=consolidate(apply_in_separated_form(Q12t,Vreg));
+            // apply_G_and_print(V,"functional response");
+        }
+
+        if (0) {
+            print_header3("[F12,Qt] f12 |x_i t_j + t_i x_j>");
+            std::vector<std::string> argument={"comm_F_Qt_f12"};
+            auto Vreg=apply_Vreg(world,x(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+            Vreg+=apply_Vreg(world,t(i),x(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+            // auto Q12V=Q12t(Vreg);
+            // apply_G_and_print(Q12V,"commutator response in old terminology: Q12V direct");
+        }
+
+        // eq. (29) first term: dQt g~ |t_i t_j>
+        if (1) {
+            print_header3("dQt g~ |t_i t_j> ");
+            const std::vector<std::string> argument={"Ue","KffK","comm_F_Qt_f12","reduced_Fock"};
+            // const std::vector<std::string> argument={"Ue","KffK","reduced_Fock"};
+            auto Vreg1=apply_Vreg(world,t(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+
+            auto tmp=consolidate(dQt_1(Vreg1) + dQt_2(Vreg1));
+            V-=tmp;
+
+            // MADNESS_CHECK_THROW(tmp.size()==1,"tmp size is incorrect");
+            // for (auto& t : tmp) t.print_size("dQt g~ |t_i t_j>");
+            // apply_G_and_print(tmp,"projector response");
+        }
+
+
+        // eq. (29) second term = eq. (31): [F12, dQt] f12 |t_i t_j> + omega dQ12t f12 |t_i t_j>
+        if (1) {
+            print_header3("[F12, dQt] f12 |t_i t_j>");
+            const std::vector<std::string> argument={"comm_F_dQt_f12"};
+            auto tmp=apply_Vreg(world,t(i),t(j),gs_singles,ex_singles,info,argument,pair.bsh_eps);
+            tmp=consolidate(tmp);
+            V+=tmp;
+            // apply_G_and_print(tmp,"commutator projector response");
+        }
+    }
+
+    V=consolidate(V);
+    MADNESS_CHECK(V.size()==2);     // term 1: 6d, hi-rank, local; term 2: 3d, low-rank, delocalized
+    t1.end("finished computing potential for constant part");
+
+    // the Green's function
+    auto G = BSHOperator<6>(world, sqrt(-2.0 * pair.bsh_eps), parameters.lo(), parameters.thresh_bsh_6D());
+    G.destructive() = true;
+
+    real_function_6d GV=real_factory_6d(world).empty();
+    for (const auto& vv : V) GV+= (G(vv)).get_function();      // note V is destroyed here
+    GV=-2.0*Q12(GV).truncate().reduce_rank();
+
+    GV.print_size("GVreg");
+    t1.end("finished applying G on potential for constant part");
+    return GV;
+}
+
+
+
+
 madness::real_function_6d
 CCPotentials::make_constant_part_mp2_macrotask(World& world, const CCPair& pair,
                                                const std::vector<real_function_3d>& mo_ket,
@@ -719,13 +1010,13 @@ CCPotentials::make_constant_part_mp2_macrotask(World& world, const CCPair& pair,
     StrongOrthogonalityProjector<double, 3> Q(world);
     Q.set_spaces(mo_bra, mo_ket, mo_bra, mo_ket);
 
-//    V = Q(V);
-//
-//    V.print_size("QVreg");
+    //    V = Q(V);
+    //
+    //    V.print_size("QVreg");
     real_convolution_6d G = BSHOperator<6>(world, sqrt(-2.0 * epsilon), parameters.lo(),
                                            parameters.thresh_bsh_6D());
     G.destructive() = true;
-//    real_function_6d GV = -2.0 * G(V);
+    //    real_function_6d GV = -2.0 * G(V);
 
     // save memory:
     // split application of the BSH operator into high-rank, local part U|ij>, and
@@ -783,6 +1074,7 @@ CCPotentials::update_pair_mp2_macrotask(World& world, const CCPair& pair, const
 
     CCTimer timer_G(world, "Apply Greens Operator on MP2-Potential of pair " + pair.name());
     const real_function_6d GVmp2 = G(mp2_potential);
+    if (parameters.debug()) GVmp2.print_size("GVmp2");
     timer_G.info(true, GVmp2.norm2());
 
     //CCTimer timer_addup(world, "Add constant parts and update pair " + pair.name());
@@ -793,7 +1085,7 @@ CCPotentials::update_pair_mp2_macrotask(World& world, const CCPair& pair, const
     Q.set_spaces(mo_bra, mo_ket, mo_bra, mo_ket);
     unew = Q(unew);
 
-    if (parameters.debug())unew.print_size("truncated-unew");
+    if (parameters.debug())unew.print_size("Q12(unew)");
     timer_mp2.info();
 
     real_function_6d residue = (pair.function() - unew);
@@ -801,9 +1093,111 @@ CCPotentials::update_pair_mp2_macrotask(World& world, const CCPair& pair, const
     residue.truncate(FunctionDefaults<6>::get_thresh()*0.1);
     if (parameters.debug()) residue.print_size("bsh residual, truncated");
 
-    return residue;
+    // return residue;
+    return unew;
 }
 
+
+CCPair CCPotentials::iterate_pair_macrotask(World& world,
+                                            const CCPair& pair,
+                                            const CC_vecfunction& gs_singles,
+                                            const CC_vecfunction& ex_singles,
+                                            const real_function_6d& coupling,
+                                            const Info& info,
+                                            const long maxiter) {
+    if (world.rank()==0) print_header2("Iterate Pair " + pair.name());
+    if (pair.ctype == CT_CC2) MADNESS_ASSERT(gs_singles.type == PARTICLE);
+    if (pair.ctype == CT_CISPD) MADNESS_ASSERT(ex_singles.type == RESPONSE);
+    if (pair.ctype == CT_MP2) MADNESS_ASSERT(gs_singles.get_vecfunction().empty());
+    if (pair.ctype == CT_MP2) MADNESS_ASSERT(ex_singles.get_vecfunction().empty());
+    if (pair.ctype == CT_ADC2)MADNESS_ASSERT(ex_singles.type == RESPONSE);
+
+    real_function_6d constant_part = pair.constant_part;
+    constant_part.truncate().reduce_rank();
+    pair.function().truncate().reduce_rank();
+
+    StrongOrthogonalityProjector<double,3> Q12(world);
+    Q12.set_spaces(info.mo_bra,info.mo_ket,info.mo_bra,info.mo_ket);
+
+    double bsh_eps = pair.bsh_eps; //CCOPS.get_epsilon(pair.i,pair.j)+omega;
+    real_convolution_6d G = BSHOperator<6>(world, sqrt(-2.0 * bsh_eps), info.parameters.lo(), info.parameters.thresh_bsh_6D());
+    G.destructive() = true;
+
+    NonlinearSolverND<6> solver(info.parameters.kain_subspace());
+    solver.do_print = (world.rank() == 0);
+
+    CCPair result=pair;
+
+    // only the u-part of omega
+    double omega_partial=0.0;
+    if (result.ctype == CT_MP2) omega_partial = CCPotentials::compute_pair_correlation_energy(world, info, result);
+    else if (result.type == EXCITED_STATE) omega_partial = CCPotentials::compute_excited_pair_energy(world, result, ex_singles, info);
+
+    for (size_t iter = 0; iter < maxiter; iter++) {
+        if (world.rank()==0) print_header3(assign_name(result.ctype) + "-Microiteration");
+        CCTimer timer_mp2(world, "MP2-Microiteration of pair " + result.name());
+
+
+        CCTimer timer_mp2_potential(world, "MP2-Potential of pair " + result.name());
+        // real_function_6d mp2_potential = -2.0 * CCOPS.fock_residue_6d(result);
+        real_function_6d mp2_potential = -2.0 * fock_residue_6d_macrotask(world,result,info.parameters,
+                                                                           info.molecular_coordinates,info.mo_ket,info.mo_bra,
+                                                                           info.U1,info.U2);
+        mp2_potential += 2.0 * coupling;
+
+        if (info.parameters.debug()) mp2_potential.print_size(assign_name(result.ctype) + " Potential");
+        mp2_potential.truncate().reduce_rank();
+        timer_mp2_potential.info(true, mp2_potential.norm2());
+
+        CCTimer timer_G(world, "Apply Greens Operator on MP2-Potential of pair " + result.name());
+        const real_function_6d GVmp2 = G(mp2_potential);
+        if (info.parameters.debug()) GVmp2.print_size("GVmp2");
+        timer_G.info(true, GVmp2.norm2());
+
+        CCTimer timer_addup(world, "Add constant parts and update pair " + result.name());
+        real_function_6d unew = Q12(GVmp2 + constant_part);
+        if (info.parameters.debug()) unew.print_size("Q12(unew)");
+
+        const real_function_6d residual =  result.function() - unew;
+        double rmsresidual=residual.norm2();
+
+        if (info.parameters.kain()) {
+
+            real_function_6d kain_update = copy(solver.update(result.function(), residual));
+            // kain_update = CCOPS.apply_Q12t(kain_update, CCOPS.mo_ket());
+            kain_update = Q12(kain_update);
+            if (info.parameters.debug()) kain_update.print_size("Kain-Update-Function");
+            result.update_u(copy(kain_update));
+        } else {
+            result.update_u(unew);
+        }
+
+        timer_addup.info(true, result.function().norm2());
+
+        double omega_new = 0.0;
+        if (result.ctype == CT_MP2) omega_new = CCPotentials::compute_pair_correlation_energy(world, info, result);
+        else if (result.type == EXCITED_STATE) omega_new = CCPotentials::compute_excited_pair_energy(world, result, ex_singles, info);
+        double delta = omega_partial - omega_new;
+        omega_partial = omega_new;
+
+        if (world.rank()==0)
+            print_convergence(pair.name(),rmsresidual,rmsresidual,delta,iter);
+
+        // output("\n--Iteration " + stringify(iter) + " ended--");
+        // save(result.function(), result.name());
+        // timer_mp2.info();
+        bool converged=(rmsresidual < info.parameters.dconv_6D())  and (fabs(delta) < info.parameters.econv_pairs());
+        if (converged) {
+            if (world.rank()==0) print("Iteration converged after",iter,"iterations");
+            break;
+        } else {
+            if (world.rank()==0) print("Iteration not converged after",iter,"iterations");
+        }
+    }
+    return result;
+}
+
+
 madness::real_function_6d
 CCPotentials::make_constant_part_cc2_gs(const CCPair& u, const CC_vecfunction& tau,
                                         const real_convolution_6d *Gscreen) const {
@@ -904,6 +1298,8 @@ CCPotentials::make_constant_part_cc2_Qt_gs(const CCPair& u, const CC_vecfunction
     real_convolution_6d G = BSHOperator<6>(world, sqrt(-2.0 * get_epsilon(ti.i, tj.i)), parameters.lo(),
                                            parameters.thresh_bsh_6D());
     G.destructive() = true;
+    G.particle_=1;
+    // G.particle_=-1;
     // calculate [F,Qt] commutator which is [F1,Q1t]Q2t + Q1t [F2,Q2t]
     // and [F1,Q1t] = - [F1,O1t] = - (F-e_k) |tk><k| = - (F-e_k) |tauk><k| = |Vk><k|
     // commutator is applied to f12|titj>
@@ -1294,12 +1690,14 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction
     CCTimer time_cpr(world, "Commutator-Projector Response");
     // Make functional response part: G(QtVreg|xitj + tixj>)
     real_function_6d functional_response;
-    {
+    if (1) {
         time_fr.start();
         const real_function_6d Vxt = (apply_Vreg(xi, tj, Gscreen)).truncate().reduce_rank();
         if (symmetric) {
             real_function_6d V = apply_Q12t(Vxt, t);
+            V.print_size("Q12tVreg");
             const real_function_6d tmp = -2.0 * G(V);
+            tmp.print_size("G(Q12tVreg)");
             functional_response = tmp + swap_particles(tmp);
         } else {
             const real_function_6d Vtx = apply_Vreg(ti, xj, Gscreen);
@@ -1308,22 +1706,28 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction
             functional_response = -2.0 * G(V);
         }
         time_fr.stop();
-    }     // make Projector Response: -G(OxQt+QtOx)Vreg|titj>
+    }
+    functional_response.print_size("G functional response");
+
+    // make Projector Response: -G(OxQt+QtOx)Vreg|titj>
     real_function_6d projector_response;
-    {
+    if (1) {
         time_pr.start();
         // here is an inconsistency: The Vreg potential will apply (F12-eij) to the |titj> state but we have here (F12-eij-omega)
         // in the future this part here is supposed to be entirely 3D and not use the 6D apply_Vreg function, so right now this is a workaround
         // however, we have to add the missing -omega|titj>
         real_function_6d Vtt_tmp = apply_Vreg(ti, tj, Gscreen);
         real_function_6d titj = make_f_xy(ti, tj);
-        Vtt_tmp = Vtt_tmp - x.omega * titj;
+        print("skipping omega term 1");
+        // Vtt_tmp = Vtt_tmp - x.omega * titj;
         CCPairFunction<double,6> Vtt(Vtt_tmp);
         real_function_6d tmp1;
         real_function_6d tmp2;
         {
             CCPairFunction<double,6> Ox = apply_Ot(Vtt, x, 1);
             CCPairFunction<double,6> OxQt = apply_Qt(Ox, t, 2);
+            OxQt.convert_to_pure_no_op_inplace();
+            OxQt.get_function().print_size("Q12t_FQtQtF_f12");
             tmp1 = -2.0 * apply_G(OxQt, G);
         }
         if (symmetric) tmp2 = swap_particles(tmp1);
@@ -1335,9 +1739,12 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction
         projector_response = tmp1 + tmp2;
         time_pr.stop();
     }
+    projector_response.print_size("G projector response");
+
     // make commutator response: [F12,Qt12]f12|xitj+tixj> = (O1VQ2t + Q1tO2V)f12|xitj+tixj>
     real_function_6d commutator_response;
     {
+        print_header3("[F12,Qt12]f12|xitj+tixj> = (Ov Qt + Qt Ov) f12 |xitj+tixj>");
         time_cr.start();
         real_function_6d part1;     // the xt parts
         const vector_real_function_3d Vtmp = get_potentials(tau, POT_singles_);
@@ -1367,6 +1774,8 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction
         commutator_response = part1 + part2;
         time_cr.stop();
     }
+    commutator_response.print_size("G commutator response");
+
     // make Commutator Projector response Response: [F,d/dtau(Qt)] part of d/dtau{([F,Qt])f12|xitj + tixj>}
     // {-O1x[F,Q2t] - Q1t[F,O2x] - [F,O1x]Q2t - [F,Q1t]O2x , used d/dtau(Qt) = -Ox
     //  O1x[F,O2t] - Q1t[F,O2x] - [F,O1x]Q2t + [F,O1t]O2x ,  used [F,Qt] = -[F,Ot]
@@ -1377,10 +1786,14 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction
     // }f12|titj>
     real_function_6d commutator_projector_response;
     {
+        print_header3("[F12,dQt] f12 |t_i t_j> = (Ox OVt + Qt OVx) f12 |t_i t_j>");
         time_cpr.start();
-        const vector_real_function_3d Vxtmp = sub(world, get_potentials(x, POT_singles_),
-                                                  x.omega * x.get_vecfunction());
+        print("skipping omega term 2");
+        // const vector_real_function_3d Vxtmp = sub(world, get_potentials(x, POT_singles_),
+                                                  // x.omega * x.get_vecfunction());
+        const vector_real_function_3d Vxtmp = get_potentials(x, POT_singles_);
         const vector_real_function_3d Vttmp = get_potentials(tau, POT_singles_);
+
         const CC_vecfunction Vx(Vxtmp, UNDEFINED, parameters.freeze());
         const CC_vecfunction Vt(Vttmp, UNDEFINED, parameters.freeze());
         CCPairFunction<double,6> ftt(f12, ti, tj);
@@ -1408,9 +1821,13 @@ CCPotentials::make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction
         commutator_projector_response = tmp1 + tmp2;
         time_cpr.stop();
     }
+    commutator_projector_response.print_size("G commutator projector response");
+    print_header3("add all up");
     real_function_6d result =
             functional_response - projector_response + commutator_response + commutator_projector_response;
+    result.print_size("result");
     result = apply_Q12t(result, mo_ket_);
+    result.print_size("Q12t result");
     output.section("Constant Term for Pair " + u.name() + " ended");
     time_fr.info(true, functional_response.norm2());
     time_pr.info(true, projector_response.norm2());
@@ -1425,7 +1842,7 @@ CCPotentials::apply_Vreg(const CCFunction<double,3>& ti, const CCFunction<double
     output("Applying Vreg to |" + ti.name() + tj.name() + ">");
     CCTimer timer(world, "Vreg|" + ti.name() + tj.name() + ">");
     CCTimer time_f(world, "F-Part");
-    const real_function_6d F_part = apply_reduced_F(ti, tj, Gscreen);
+    const real_function_6d F_part = apply_reduced_F1(ti, tj, Gscreen);
     time_f.stop();
     CCTimer time_u(world, "U-Part");
     const real_function_6d U_part = apply_transformed_Ue(ti, tj, Gscreen);
@@ -1449,6 +1866,69 @@ CCPotentials::apply_Vreg(const CCFunction<double,3>& ti, const CCFunction<double
     return result;
 }
 
+
+/// Apply the Regularization potential
+
+/// four terms can be calculated
+/// \f$ V_{reg} = [ U_e - [K,f12] + f12(F12-eij) + [F,Qt] ]|titj> \f$
+///   - Ue = [T,f12]
+///   - [K,f12]
+///   - [F12,Q12t] f12 or  [F12,dQ12t] f12
+///   - f12 (F - e_ij - omega) or f12 (F - e_ij)
+///  the last terms are computed using the converged singles potential, i.e. we assume that the following equation holds
+///  (see Kottmann et al., JCTC 13, 5945 (2017) eqs (30), (31), (44)
+///  (see Kottmann et al., JCTC 13, 5956 (2017) eqs (17), (19), (32)
+///  CC2:   (F - e_i ) |t_i t_j> = | Vtau >
+///  LRCC2: (F - e_i - omega) |x_i> = | Vx >
+/// @param[in] ti first function in the ket, for MP2 it is the Orbital, for CC2 the relaxed Orbital t_i=\phi_i + \tau_i
+/// @param[in] tj second function in the ket ...
+/// @param[in] gs_singles the converged ground state singles: with   (F - e_i ) |t_i t_j> = | Vtau >
+/// @param[in] ex_singles the converged excited state singles: with (F - e_i - omega) |x_i> = | Vx >
+/// @param[in] info Info structure holding the applied singles potentials Vtau and Vx and reference orbitals
+/// @param[out] the regularization potential (unprojected), see equation above
+std::vector<CCPairFunction<double,6>>
+    CCPotentials::apply_Vreg(World& world, const CCFunction<double,3>& ti, const CCFunction<double,3>& tj,
+                          const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                          const Info& info, const std::vector<std::string>& argument, const double bsh_eps) {
+
+    const auto parameters=info.parameters;
+    if (parameters.debug() and (world.rank()==0)) {
+        print("computing the following terms in constant_part for pair: (",ti.name(),",", tj.name(),"):" , argument);
+    }
+
+    real_convolution_6d Gscreen = BSHOperator<6>(world, sqrt(-2.0 * bsh_eps),
+                                                 parameters.lo(), parameters.thresh_bsh_6D());
+    Gscreen.modified() = true;
+
+    auto exists=[&](const std::string term) {
+        return std::find(argument.begin(), argument.end(), term) != argument.end();
+    };
+
+    // calculate the regularized potential
+    real_function_6d V=real_factory_6d(world);
+    std::vector<CCPairFunction<double,6>> V_lowrank;
+    if (exists("Ue")) V += apply_Ue(world,ti,tj,info,&Gscreen);
+    if (exists("KffK")) V -= apply_KffK(world,ti,tj,info,&Gscreen);
+    if (exists("reduced_Fock")) V += apply_reduced_F(world,ti,tj,info,&Gscreen);
+    if (exists("comm_F_Qt_f12")) {
+        V_lowrank += apply_commutator_F_Qt_f12(world,ti,tj,gs_singles,ex_singles,info,&Gscreen);
+    }
+    if (exists("comm_F_dQt_f12")) {
+        V_lowrank += apply_commutator_F_dQt_f12(world,ti,tj,gs_singles,ex_singles,info,&Gscreen);
+    }
+    V.truncate().reduce_rank();
+    if (parameters.debug()) {
+        V.print_size("Vreg -- pure component");
+        print("V_lowrank.size()",V_lowrank.size());
+    }
+
+    std::vector<CCPairFunction<double, 6>> result;
+    if (V.tree_size()>0) result+=CCPairFunction<double,6>(V);
+    result+=V_lowrank;
+    return result;
+
+}
+
 madness::real_function_6d
 CCPotentials::apply_Vreg_macrotask(World& world, const std::vector<real_function_3d>& mo_ket,
                                    const std::vector<real_function_3d>& mo_bra,
@@ -1509,7 +1989,7 @@ CCPotentials::apply_Vreg_macrotask(World& world, const std::vector<real_function
 }
 
 madness::real_function_6d
-CCPotentials::apply_reduced_F(const CCFunction<double,3>& ti, const CCFunction<double,3>& tj, const real_convolution_6d *Gscreen) const {
+CCPotentials::apply_reduced_F1(const CCFunction<double,3>& ti, const CCFunction<double,3>& tj, const real_convolution_6d *Gscreen) const {
     //CC_Timer time(world,"(F-eij)|"+ti.name()+tj.name()+">");
     // get singles potential
     const bool symmetric = (ti.type == tj.type && ti.i == tj.i);
@@ -1526,6 +2006,26 @@ CCPotentials::apply_reduced_F(const CCFunction<double,3>& ti, const CCFunction<d
     return result;
 }
 
+/// compute the reduced Fock term, either with or without the omega term
+/// using Eqs (33) and (34) of Kottmann et al., JCTC 13, 5956 (2017)
+/// f12 (F12 - e_ij) |ti tj>
+/// f12 (F12 - e_ij - omega) |ti xj>
+madness::real_function_6d
+CCPotentials::apply_reduced_F(World& world, const CCFunction<double,3>& ti, const CCFunction<double,3>& tj,
+                              const Info& info, const real_convolution_6d *Gscreen) {
+    //CC_Timer time(world,"(F-eij)|"+ti.name()+tj.name()+">");
+    // get singles potential
+    const bool symmetric = (ti == tj);
+    const real_function_3d Vti = info.intermediate_potentials(ti, POT_singles_);
+    const real_function_3d Vtj = info.intermediate_potentials(tj, POT_singles_);
+    const real_function_6d Vt = make_f_xy(world, Vti, tj, info, Gscreen);
+    real_function_6d tV;
+    if (symmetric) tV = madness::swap_particles(Vt);
+    else tV = make_f_xy(world, ti, Vtj, info, Gscreen);
+
+    const real_function_6d result = -1.0 * (Vt + tV);
+    return result;
+}
 
 madness::real_function_6d
 CCPotentials::apply_transformed_Ue(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const real_convolution_6d *Gscreen) const {
@@ -1610,6 +2110,101 @@ CCPotentials::apply_transformed_Ue(const CCFunction<double,3>& x, const CCFuncti
 }
 
 
+madness::real_function_6d
+CCPotentials::apply_Ue(World& world, const CCFunction<double,3>& phi_i, const CCFunction<double,3>& phi_j,
+        const Info& info, const real_convolution_6d *Gscreen) {
+
+    const std::string x_name = phi_i.name();
+    const std::string y_name = phi_j.name();
+    const auto& parameters=info.parameters;
+
+    if (parameters.debug()) print("Computing Ue|" + x_name + y_name + ">");
+
+    real_function_3d x_function=phi_i.function;
+    real_function_3d y_function=phi_j.function;
+    CorrelationFactor corrfac(world, parameters.gamma(), 1.e-7, parameters.lo());
+
+    const bool symmetric = (phi_i.type == phi_j.type && phi_i.i == phi_j.i);
+    CCTimer time_Ue(world, "Ue|" + x_name + y_name + ">");
+    double tight_thresh = parameters.thresh_6D();     // right now this is the std. thresh
+    // check if screening operator is in modified NS Form
+    if (Gscreen != NULL) {
+        if (!Gscreen->modified()) error("Demanded Screening for Ue but given BSH Operator is not in modified NS form");
+    }
+    if (parameters.debug()) print("Applying transformed Ue to \n" + x_name + y_name);
+
+    if (parameters.debug() && symmetric) print("Exploiting Pair Symmetry\n");
+
+    real_function_6d Uxy = real_factory_6d(world);
+    Uxy.set_thresh(tight_thresh);
+    // Apply the untransformed U Potential
+    Uxy = corrfac.apply_U(x_function, y_function, *Gscreen, symmetric);
+    Uxy.set_thresh(tight_thresh);
+    // Apply the double commutator R^{-1}[[T,f,R]
+    for (size_t axis = 0; axis < 3; axis++) {
+        // Make the local parts of the Nuclear and electronic U potentials
+        const real_function_3d Un_local = info.U1[axis];
+        const real_function_3d Un_local_x = (Un_local * x_function).truncate();
+        real_function_3d Un_local_y;
+        if (symmetric) Un_local_y = copy(Un_local_x);
+        else Un_local_y = (Un_local * y_function).truncate();
+
+        const real_function_6d Ue_local = corrfac.U1(axis);
+        // Now add the Un_local_x part to the first particle of the Ue_local potential
+        real_function_6d UeUnx = CompositeFactory<double, 6, 3>(world).g12(Ue_local).particle1(Un_local_x).particle2(
+                copy(y_function)).thresh(tight_thresh);
+        // Fill the Tree where it will be necessary
+        UeUnx.fill_cuspy_tree(*Gscreen);
+        // Set back the thresh
+        UeUnx.set_thresh(FunctionDefaults<6>::get_thresh());
+//        print_size(UeUnx, "UeUnx", parameters.debug());
+        // Now add the Un_local_y part to the second particle of the Ue_local potential
+        real_function_6d UeUny;
+        if (symmetric) UeUny = -1.0 * madness::swap_particles(UeUnx);     // Ue_local is antisymmetric
+        else {
+            UeUny = CompositeFactory<double, 6, 3>(world).g12(Ue_local).particle1(copy(x_function)).particle2(
+                    Un_local_y).thresh(tight_thresh);
+            // Fill the Tree were it will be necessary
+            UeUny.fill_cuspy_tree(*Gscreen);
+            // Set back the thresh
+            UeUny.set_thresh(FunctionDefaults<6>::get_thresh());
+        }
+//        print_size(UeUny, "UeUny", parameters.debug());
+        // Construct the double commutator part and add it to the Ue part
+        real_function_6d diff = (UeUnx - UeUny).scale(-1.0);
+        diff.truncate();
+        Uxy = (Uxy + diff).truncate();
+    }
+    if (parameters.debug()) time_Ue.info();
+
+    // sanity check: <xy|R2 [T,g12] |xy> = <xy |R2 U |xy> - <xy|R2 g12 | xy> = 0
+    CCTimer time_sane(world, "Ue-Sanity-Check");
+    real_function_6d tmp = CompositeFactory<double, 6, 3>(world).particle1(
+            copy(x_function * info.R_square)).particle2(copy(y_function * info.R_square));
+    const double a = inner(Uxy, tmp);
+    const real_function_3d xx = (x_function * x_function * info.R_square);
+    const real_function_3d yy = (y_function * y_function * info.R_square);
+//    const real_function_3d gxx = g12(xx);
+    real_convolution_3d poisson= CoulombOperator(world,parameters.lo(),parameters.thresh_3D());
+    const real_function_3d gxx= poisson(xx);
+
+    const double aa = inner(yy, gxx);
+    const double error = std::fabs(a - aa);
+    const double diff = a - aa;
+    time_sane.info(parameters.debug(), error);
+    if (world.rank() == 0) {
+        std::cout << std::fixed << std::setprecision(10) << "<" << x_name + y_name << "|U_R|" << x_name + y_name
+                  << "> =" << a << ", <" << x_name + y_name << "|g12|" << x_name + y_name
+                  << "> =" << aa << ", diff=" << error << "\n";
+        //printf("<xy| U_R |xy>  %12.8f\n",a);
+        //printf("<xy|1/r12|xy>  %12.8f\n",aa);
+        if (error > FunctionDefaults<6>::get_thresh() * 10.0) std::cout << ("Ue Potential plain wrong!\n");
+        else if (error > FunctionDefaults<6>::get_thresh()) std::cout << ("Ue Potential wrong!!!!\n");
+        else std::cout << ("Ue seems to be sane, diff=" + std::to_string(diff)) << std::endl;
+    }
+    return Uxy;
+}
+
 madness::real_function_6d
 CCPotentials::apply_transformed_Ue_macrotask(World& world, const std::vector<real_function_3d>& mo_ket,
                                              const CCParameters& parameters, const real_function_3d& Rsquare,
@@ -1705,6 +2300,161 @@ CCPotentials::apply_transformed_Ue_macrotask(World& world, const std::vector<rea
     return Uxy;
 }
 
+
+/// calculate [F,Qt] f12 |rhs>
+
+/// From Eqs. (42) - (44) of Kottmann et al. JCTC 13, 5945 (2017)
+/// and eq. (30) of Kottmann et al. JCTC 13, 5956 (2017)
+/// [F,Qt] = [F1,Q1t]Q2t + Q1t [F2,Q2t]
+/// and [F1,Q1t] = - [F1,O1t] = - (F-e_k) |tk><k| = - (F-e_k) |tauk><k| = |Vk><k|
+/// commutator is applied to f12|titj>
+/// @return the commutator [F,Qt] f12 |phi_i phi_j>
+madness::CCPairFunction<double,6>
+CCPotentials::apply_commutator_F_Qt_f12(World& world, const CCFunction<double,3>& phi_i, const CCFunction<double,3>& phi_j,
+                                                  const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                                                  const Info& info, const real_convolution_6d *Gscreen) {
+    const auto& parameters=info.parameters;
+
+    // if ground-state use Eqs (43)-(44) of Kottmann et al. JCTC 13, 5945 (2017)
+    auto f12=CCConvolutionOperatorPtr<double,3>(world,OT_F12,parameters);
+    auto ftt=std::vector<CCPairFunction<double,6>>({CCPairFunction<double,6>(f12, phi_i.function, phi_j.function)});
+
+    const vector_real_function_3d Vtau=info.intermediate_potentials(gs_singles, POT_singles_);
+    Projector<double,3> OVtau(info.get_active_mo_bra(),Vtau);
+    QProjector<double,3> Qt(info.get_active_mo_bra(),gs_singles.get_vecfunction());
+
+    auto p1=outer(OVtau,Qt);
+    auto p2=outer(Qt,OVtau);
+
+    // result=Qt2(Ov1(ftt)) + Qt1(Ov2(ftt));
+    auto result=p1(ftt) + p2(ftt);
+
+    result=consolidate(result,{});     // will collect similar terms only
+    MADNESS_CHECK_THROW(result.size()==1 and result[0].is_decomposed(),"apply_Fock_commutator should return a single CCPairFunction");
+    return result[0];
+}
+
+/// calculate [F,dQt] f12 |rhs>
+
+/// Using eq. (31) of Kottmann et al. JCTC 13, 5956 (2017)
+/// note that we leave the omega dQ12t term out, as it cancels with eq. (29)
+/// @return [F,Qt] f12 |rhs> - omega dQ12 f12 |phi_i phi_j>
+madness::CCPairFunction<double,6>
+CCPotentials::apply_commutator_F_dQt_f12(World& world, const CCFunction<double,3>& phi_i, const CCFunction<double,3>& phi_j,
+                                                  const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                                                  const Info& info, const real_convolution_6d *Gscreen) {
+    const auto& parameters=info.parameters;
+
+    auto f12=CCConvolutionOperatorPtr<double,3>(world,OT_F12,parameters);
+    auto ftt=std::vector<CCPairFunction<double,6>>({CCPairFunction<double,6>(f12, phi_i.function, phi_j.function)});
+
+    auto t=CCPotentials::make_active_t_intermediate(gs_singles,info);
+    const vector_real_function_3d Vtau=info.intermediate_potentials(gs_singles, POT_singles_);
+    const vector_real_function_3d Vx=info.intermediate_potentials(ex_singles, POT_singles_);
+    auto bra=info.get_active_mo_bra();
+
+    Projector<double,3> OVtau(bra,Vtau);
+    Projector<double,3> Ox(bra,ex_singles.get_vecfunction());
+    Projector<double,3> OVx(bra,Vx);
+    QProjector<double,3> Qt(bra,t.get_vecfunction());
+
+    auto OvxQt=outer(OVx,Qt);
+    auto QtOvx=outer(Qt,OVx);
+    auto OxOvt=outer(Ox,OVtau);
+    auto OvtOx=outer(OVtau,Ox);
+
+    auto result=OvxQt(ftt) + QtOvx(ftt) - OxOvt(ftt) - OvtOx(ftt);
+    result=consolidate(result);     // will collect similar terms only
+    MADNESS_CHECK_THROW(result.size()==1 and result[0].is_decomposed(),"apply_Fock_commutator should return a single CCPairFunction");
+    return result[0];
+}
+
+
+madness::real_function_6d
+CCPotentials::apply_KffK(World& world, const CCFunction<double,3>& phi_i, const CCFunction<double,3>& phi_j,
+                                                  const Info& info, const real_convolution_6d *Gscreen) {
+    real_function_3d x_ket = phi_i.function;
+    real_function_3d y_ket = phi_j.function;
+    real_function_3d x_bra = (info.R_square*phi_i.function).truncate();
+    real_function_3d y_bra = (info.R_square*phi_j.function).truncate();
+    const std::string x_name = phi_i.name();
+    const std::string y_name = phi_j.name();
+
+    const auto& parameters=info.parameters;
+
+    //apply Kf
+    if (parameters.debug()) print("\nComputing [K,f]|" + x_name + y_name + ">\n");
+
+    CCTimer time(world, "[K,f]|" + x_name + y_name + ">");
+    CCTimer part1_time(world, "Kf" + x_name + y_name + ">");
+
+    bool symmetric_kf = false;
+    if ((phi_i.type == phi_j.type) && (phi_i.i == phi_j.i)) symmetric_kf = true;
+
+    // First make the 6D function f12|x,y>
+    real_function_6d f12xy = make_f_xy_macrotask(world, x_ket, y_ket, x_bra, y_bra, phi_i.i, phi_j.i,
+        parameters, phi_i.type, phi_j.type, Gscreen);
+    f12xy.truncate().reduce_rank();
+    // Apply the Exchange Operator
+    real_function_6d Kfxy = K_macrotask(world, info.mo_ket, info.mo_bra, f12xy, symmetric_kf, parameters);
+
+    if (parameters.debug()) part1_time.info();
+
+    //apply fk
+    CCTimer part2_time(world, "fK" + x_name + y_name + ">");
+
+    const bool symmetric_fk = (phi_i==phi_j);
+    const real_function_3d Kx = K_macrotask(world, info.mo_ket, info.mo_bra, x_ket, parameters);
+    const FuncType Kx_type = UNDEFINED;
+    const real_function_6d fKphi0b = make_f_xy_macrotask(world, Kx, y_ket, x_bra, y_bra, phi_i.i, phi_j.i,
+        parameters, Kx_type, phi_j.type, Gscreen);
+    real_function_6d fKphi0a;
+    if (symmetric_fk) fKphi0a = madness::swap_particles(fKphi0b);
+    else {
+        real_function_3d Ky = K_macrotask(world, info.mo_ket, info.mo_bra, y_ket, parameters);
+        const FuncType Ky_type = UNDEFINED;
+        fKphi0a = make_f_xy_macrotask(world, x_ket, Ky, x_bra, y_bra, phi_i.i, phi_j.i,
+            parameters, phi_i.type, Ky_type, Gscreen);
+    }
+    const real_function_6d fKxy = (fKphi0a + fKphi0b);
+
+    if (parameters.debug()) part2_time.info();
+
+    //final result
+    Kfxy.print_size("Kf" + x_name + y_name);
+    Kfxy.set_thresh(parameters.thresh_6D());
+    Kfxy.truncate().reduce_rank();
+    Kfxy.print_size("Kf after truncation" + x_name + y_name);
+    fKxy.print_size("fK" + x_name + y_name);
+    real_function_6d result = (Kfxy - fKxy);
+    result.set_thresh(parameters.thresh_6D());
+    result.print_size("[K,f]" + x_name + y_name);
+    result.truncate().reduce_rank();
+    result.print_size("[K,f]" + x_name + y_name);
+
+    //sanity check
+    CCTimer sanity(world, "[K,f] sanity check");
+    // make the <xy| bra state which is <xy|R2
+    const real_function_3d brax = (x_ket * info.R_square);
+    const real_function_3d bray = (y_ket * info.R_square);
+    real_function_3d xres = result.project_out(brax, 0);
+    const double test = bray.inner(xres);
+    const double diff = test;
+    if (world.rank() == 0) {
+        std::cout << std::fixed << std::setprecision(10)
+                  << "<" << x_name << y_name << "[K,f]" << x_name << y_name << "> =" << test << "\n";
+    }
+    if (world.rank() == 0 && fabs(diff) > parameters.thresh_6D()) print("Exchange Commutator Plain Wrong");
+    else print("Exchange Commutator seems to be sane, diff=" + std::to_string(diff));
+
+    if (parameters.debug()) sanity.info(diff);
+
+    if (parameters.debug()) print("\n");
+
+    return result;
+}
+
+
 madness::real_function_6d
 CCPotentials::apply_exchange_commutator_macrotask(World& world, const std::vector<real_function_3d>& mo_ket,
                                                   const std::vector<real_function_3d>& mo_bra, const real_function_3d& Rsquare,
@@ -1845,22 +2595,25 @@ CCPotentials::apply_exchange_commutator1(const CCFunction<double,3>& x, const CC
 double
 CCPotentials::make_xy_gf_ab(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCFunction<double,3>& a, const CCFunction<double,3>& b) const {
     const real_function_3d xa = (x.function * a.function).truncate();
-    const real_function_3d x_gf_a = apply_gf(xa);
+    const real_function_3d x_gf_a = apply_gf(world, xa, info);
     const double result = y.function.inner(x_gf_a * b.function);
     return result;
 }
 
 madness::real_function_3d
-CCPotentials::apply_gf(const real_function_3d& f) const {
-    std::shared_ptr<real_convolution_3d> fBSH = std::shared_ptr<real_convolution_3d>(
-            BSHOperatorPtr3D(world, parameters.gamma(), parameters.lo(), parameters.thresh_poisson()));
-    double bsh_prefactor = 4.0 * constants::pi;
-    double prefactor = 1.0 / (2.0 * parameters.gamma());
-    return prefactor * ((*g12)(f) - bsh_prefactor * (*fBSH)(f)).truncate();
+CCPotentials::apply_gf(World& world, const real_function_3d& f, const Info& info) {
+    // std::shared_ptr<real_convolution_3d> fBSH = std::shared_ptr<real_convolution_3d>(
+            // BSHOperatorPtr3D(world, info.parameters.gamma(), info.parameters.lo(), info.parameters.thresh_poisson()));
+    auto fg=CCConvolutionOperator<double,3>(world,OpType::OT_FG12,info.parameters);
+
+    // double bsh_prefactor = 4.0 * constants::pi;
+    // double prefactor = 1.0 / (2.0 * info.parameters.gamma());
+    return fg(f).truncate();
+    // return prefactor * ((*g12)(f) - bsh_prefactor * (*fBSH)(f)).truncate();
 }
 
 double
-CCPotentials::make_xy_u(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const std::vector<CCPairFunction<double,6>>& u) const {
+CCPotentials::make_xy_u(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const std::vector<CCPairFunction<double,6>>& u) {
     double result = 0.0;
     for (size_t mm = 0; mm < u.size(); mm++) {
         result += u[mm].make_xy_u(x, y);
@@ -1870,33 +2623,36 @@ CCPotentials::make_xy_u(const CCFunction<double,3>& x, const CCFunction<double,3
 
 double
 CCPotentials::make_xy_op_u(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCConvolutionOperator<double,3>& op,
-                           const CCPairFunction<double,6>& u) const {
-    double result = 0.0;
-    if (u.component->is_pure()) {
-        real_function_6d xy_op = CompositeFactory<double, 6, 3>(world).particle1(copy(x.function)).particle2(
-                copy(y.function)).g12(op.get_kernel());
-        result = inner(u.get_function(), xy_op);
-    } else if (u.component->is_decomposed()) {
-        if (u.component->has_operator()) {
-            if (op.type() == OpType::OT_G12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12)
-                result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]);
-            else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_G12)
-                result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]);
-            else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12)
-                result = make_xy_ff_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]);
-            else MADNESS_EXCEPTION(("xy_" + op.name() + u.name() + " not implemented").c_str(), 1);
-        } else {
-            for (size_t i = 0; i < u.decomposed().get_a().size(); i++)
-                result += (x.function * u.decomposed().get_a()[i]).inner(op(y, u.decomposed().get_b()[i]));
-        }
-    } else error("Unknown CCPairFunction type in make_xy_op_u");
-
-    return result;
+                           const CCPairFunction<double,6>& u) {
+    auto ket=CCPairFunction<double,6>(x.f(),y.f());
+    auto bra=std::make_shared<CCConvolutionOperator<double,3>>(op)*u;
+    return inner(bra,ket);
+//    double result = 0.0;
+//    if (u.component->is_pure()) {
+//        real_function_6d xy_op = CompositeFactory<double, 6, 3>(world).particle1(copy(x.function)).particle2(
+//                copy(y.function)).g12(op.get_kernel());
+//        result = inner(u.get_function(), xy_op);
+//    } else if (u.component->is_decomposed()) {
+//        if (u.component->has_operator()) {
+//            if (op.type() == OpType::OT_G12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12)
+//                result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]);
+//            else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_G12)
+//                result = make_xy_gf_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]);
+//            else if (op.type() == OpType::OT_F12 and u.decomposed().get_operator_ptr()->type() == OpType::OT_F12)
+//                result = make_xy_ff_ab(x, y, u.decomposed().get_a()[0], u.decomposed().get_b()[0]);
+//            else MADNESS_EXCEPTION(("xy_" + op.name() + u.name() + " not implemented").c_str(), 1);
+//        } else {
+//            for (size_t i = 0; i < u.decomposed().get_a().size(); i++)
+//                result += (x.function * u.decomposed().get_a()[i]).inner(op(y, u.decomposed().get_b()[i]));
+//        }
+//    } else error("Unknown CCPairFunction type in make_xy_op_u");
+//
+//    return result;
 }
 
 double
 CCPotentials::make_xy_op_u(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCConvolutionOperator<double,3>& op,
-                           const std::vector<CCPairFunction<double,6>>& u) const {
+                           const std::vector<CCPairFunction<double,6>>& u) {
     double result = 0.0;
     for (size_t mm = 0; mm < u.size(); mm++) {
         const double tmp = make_xy_op_u(x, y, op, u[mm]);
@@ -1920,7 +2676,7 @@ CCPotentials::make_xy_op_ab(const CCFunction<double,3>& x, const CCFunction<doub
 }
 
 std::vector<CCPairFunction<double,6>>
-CCPotentials::get_pair_function(const Pairs<CCPair>& pairs, const size_t i, const size_t j) const {
+CCPotentials::get_pair_function(const Pairs<CCPair>& pairs, const size_t i, const size_t j) {
     if (i > j) {
         return swap_particles(pairs(j, i).functions);
     } else {
@@ -1929,8 +2685,11 @@ CCPotentials::get_pair_function(const Pairs<CCPair>& pairs, const size_t i, cons
 }
 
 madness::real_function_3d
-CCPotentials::apply_s2b_operation(const CCFunction<double,3>& bra, const CCPairFunction<double,6>& u, const size_t particle) const {
+CCPotentials::apply_s2b_operation(World& world, const CCFunction<double,3>& bra, const CCPairFunction<double,6>& u,
+    const size_t particle, const Info& info) {
     real_function_3d result;
+    auto g12=std::shared_ptr<CCConvolutionOperator<double,3>>(new CCConvolutionOperator<double,3>(world,OpType::OT_G12,info.parameters));
+
     MADNESS_ASSERT(particle == 1 || particle == 2);
     if (u.is_pure()) {
         result = u.dirac_convolution(bra, *g12, particle);
@@ -1938,20 +2697,24 @@ CCPotentials::apply_s2b_operation(const CCFunction<double,3>& bra, const CCPairF
         result = u.dirac_convolution(bra, *g12, particle);
     } else if (u.is_op_decomposed()) {
         // retunrns <x|g12f12|x(1)y(2)>_particle
-        CCFunction<double,3> a;
-        CCFunction<double,3> b;
-        if (particle == 1) {
-            a = u.get_a()[0];
-            b = u.get_b()[0];
-        } else {
-            a = u.get_b()[0];
-            b = u.get_a()[0];
-        }
-        const real_function_3d tmp = (bra.function * a.function).truncate();
-        const real_function_3d tmp2 = apply_gf(tmp);
-        real_function_3d tmp3 = tmp2 * b.function;
-        tmp3.truncate();
-        result = tmp3;
+        std::array<int,3> p1={0,1,2};
+        std::array<int,3> p2={3,4,5};
+        auto p = (particle == 1) ? p1 : p2;
+        result=inner(g12*u,bra.f(),p,p1);
+//        CCFunction<double,3> a;
+//        CCFunction<double,3> b;
+//        if (particle == 1) {
+//            a = u.get_a()[0];
+//            b = u.get_b()[0];
+//        } else {
+//            a = u.get_b()[0];
+//            b = u.get_a()[0];
+//        }
+//        const real_function_3d tmp = (bra.function * a.function).truncate();
+//        const real_function_3d tmp2 = apply_gf(world, tmp, info);
+//        real_function_3d tmp3 = tmp2 * b.function;
+//        tmp3.truncate();
+//        result = tmp3;
     } else MADNESS_EXCEPTION("apply_s2b_operation: unknown type", 1)
 
     ;
@@ -2032,6 +2795,9 @@ CCPotentials::apply_Ot(const CCPairFunction<double,6>& f, const CC_vecfunction&
     CC_vecfunction mbra;
     if (t.size() == mo_bra_.size()) mbra = CC_vecfunction(copy(world, mo_bra_.get_vecfunction()), HOLE);
     else mbra = CC_vecfunction(copy(world, get_active_mo_bra()), HOLE, parameters.freeze());
+    Projector<double,3> O(mbra.get_vecfunction(), t.get_vecfunction());
+    O.set_particle(particle-1); // shift particle index
+    return O(f);
 
     MADNESS_ASSERT(mbra.size() == t.size());
     if (f.is_pure()) {
@@ -2091,107 +2857,118 @@ CCPotentials::apply_G(const CCPairFunction<double,6>& u, const real_convolution_
 }
 
 madness::vector_real_function_3d
-CCPotentials::get_CC2_singles_potential_gs(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const {
+CCPotentials::get_CC2_singles_potential_gs(World& world, const CC_vecfunction& singles,
+                                           const Pairs<CCPair>& doubles, Info& info)
+{
     CCTimer time(world, "CC2 Singles potential");
-    vector_real_function_3d fock_residue = potential_singles_gs(singles, doubles, POT_F3D_);
+    vector_real_function_3d fock_residue = potential_singles_gs(world, singles, doubles, POT_F3D_, info);
+    Projector<double,3> Otau(info.get_active_mo_bra(), singles.get_vecfunction());
+    QProjector<double,3> Q(info.mo_bra, info.mo_ket);
     // CC2 Singles potential: Q(S4c) + Qt(ccs+s2b+s2c)
-    vector_real_function_3d Vccs = potential_singles_gs(singles, doubles, POT_ccs_);
-    vector_real_function_3d Vs2b = potential_singles_gs(singles, doubles, POT_s2b_);
-    vector_real_function_3d Vs2c = potential_singles_gs(singles, doubles, POT_s2c_);
-    vector_real_function_3d Vs4b = potential_singles_gs(singles, doubles, POT_s4b_);
-    vector_real_function_3d Vs4c = potential_singles_gs(singles, doubles, POT_s4c_);
-    vector_real_function_3d Vs4a = apply_projector(Vs2b, singles);     // need to subtract
+    vector_real_function_3d Vccs = potential_singles_gs(world, singles, doubles, POT_ccs_, info);
+    vector_real_function_3d Vs2b = potential_singles_gs(world, singles, doubles, POT_s2b_, info);
+    vector_real_function_3d Vs2c = potential_singles_gs(world, singles, doubles, POT_s2c_, info);
+    vector_real_function_3d Vs4b = potential_singles_gs(world, singles, doubles, POT_s4b_, info);
+    vector_real_function_3d Vs4c = potential_singles_gs(world, singles, doubles, POT_s4c_, info);
+    // vector_real_function_3d Vs4a = apply_projector(Vs2b, singles);     // need to subtract
+    vector_real_function_3d Vs4a = Otau(Vs2b);     // need to subtract
     vector_real_function_3d unprojected = add(world, Vccs, add(world, Vs2b, add(world, Vs2c, add(world, Vs4b,
                                                                                                  sub(world, Vs4c,
                                                                                                      Vs4a)))));
-    vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_);
+    // vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_);
+    vector_real_function_3d potential = Q(unprojected);
     truncate(world, potential);
-    get_potentials.insert(copy(world, potential), singles, POT_singles_);
+    info.intermediate_potentials.insert(copy(world, potential), singles, POT_singles_);
     time.info(true, norm2(world, potential));
     const vector_real_function_3d result = add(world, potential, fock_residue);
     return result;
 }
 
 madness::vector_real_function_3d
-CCPotentials::get_CCS_potential_ex(CC_vecfunction& x, const bool print) const {
+CCPotentials::get_CCS_potential_ex(World& world, const CC_vecfunction& x, const bool print, Info& info) {
     if (x.type != RESPONSE) error("get_CCS_response_potential: Wrong type of input singles");
 
     Pairs<CCPair> empty_doubles;
     CC_vecfunction empty_singles(PARTICLE);
-    const vector_real_function_3d fock_residue = potential_singles_ex(empty_singles, empty_doubles, x, empty_doubles,
-                                                                      POT_F3D_);
-    vector_real_function_3d potential = potential_singles_ex(empty_singles, empty_doubles, x, empty_doubles, POT_cis_);
+    const vector_real_function_3d fock_residue = potential_singles_ex(world, empty_singles, empty_doubles, x,
+                                                                      empty_doubles, POT_F3D_, info);
+    vector_real_function_3d potential = potential_singles_ex(world, empty_singles, empty_doubles, x, empty_doubles, POT_cis_, info);
     // the fock residue does not get projected, but all the rest
-    potential = apply_Qt(potential, mo_ket_);
+    QProjector<double,3> Q(info.mo_bra, info.mo_ket);
+    // potential = apply_Qt(potential, mo_ket_);
+    potential=Q(potential);
     truncate(world, potential);
-    get_potentials.insert(copy(world, potential), x, POT_singles_);
+    info.intermediate_potentials.insert(copy(world, potential), x, POT_singles_);
     vector_real_function_3d result = add(world, fock_residue, potential);
     truncate(world, result);
-    const double omega = compute_cis_expectation_value(x, result, print);
-    x.omega = omega;
     return result;
 }
 
 madness::vector_real_function_3d
-CCPotentials::get_CC2_singles_potential_ex(const CC_vecfunction& gs_singles, const Pairs<CCPair>& gs_doubles,
-                                           CC_vecfunction& ex_singles, const Pairs<CCPair>& response_doubles) const {
+CCPotentials::get_CC2_singles_potential_ex(World& world, const CC_vecfunction& gs_singles,
+                                           const Pairs<CCPair>& gs_doubles, const CC_vecfunction& ex_singles,
+                                           const Pairs<CCPair>& response_doubles, Info& info)
+{
     MADNESS_ASSERT(gs_singles.type == PARTICLE);
     MADNESS_ASSERT(ex_singles.type == RESPONSE);
-    const vector_real_function_3d fock_residue = potential_singles_ex(gs_singles, gs_doubles, ex_singles,
-                                                                      response_doubles, POT_F3D_);
-    vector_real_function_3d Vccs = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_ccs_);
-    vector_real_function_3d Vs2b = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s2b_);
-    vector_real_function_3d Vs2c = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s2c_);
-    vector_real_function_3d Vs4b = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s4b_);
-    vector_real_function_3d Vs4c = potential_singles_ex(gs_singles, gs_doubles, ex_singles, response_doubles, POT_s4c_);
+    Projector<double,3> Ox(info.get_active_mo_bra(),ex_singles.get_vecfunction());
+    Projector<double,3> Ot(info.get_active_mo_bra(),gs_singles.get_vecfunction());
+    const vector_real_function_3d fock_residue = potential_singles_ex(world, gs_singles, gs_doubles,
+                                                                      ex_singles, response_doubles, POT_F3D_, info);
+    vector_real_function_3d Vccs = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_ccs_, info);
+    vector_real_function_3d Vs2b = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s2b_, info);
+    vector_real_function_3d Vs2c = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s2c_, info);
+    vector_real_function_3d Vs4b = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s4b_, info);
+    vector_real_function_3d Vs4c = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles, response_doubles,POT_s4c_, info);
     // make low scaling s4a potential
     // -Otau(s2b_response) + -Ox(s2b_gs)
     // maybe store full s2b potential of gs
     // both need to be subtracted
-    vector_real_function_3d s2b_gs = potential_singles_gs(gs_singles, gs_doubles, POT_s2b_);
-    vector_real_function_3d Vs4a =
-            -1.0 * add(world, apply_projector(s2b_gs, ex_singles), apply_projector(Vs2b, gs_singles));
+    vector_real_function_3d s2b_gs = potential_singles_gs(world, gs_singles, gs_doubles, POT_s2b_, info);
+    // vector_real_function_3d Vs4a =
+            // -1.0 * add(world, apply_projector(s2b_gs, ex_singles), apply_projector(Vs2b, gs_singles));
+    vector_real_function_3d Vs4a = -1.0 * (Ox(s2b_gs)+ Ot(Vs2b));
     //add up
     vector_real_function_3d unprojected = add(world, Vccs, add(world, Vs2b, add(world, Vs2c, add(world, Vs4a,
                                                                                                  add(world, Vs4b,
                                                                                                      Vs4c)))));
-    vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_);
-    if (parameters.debug()) {
+    QProjector<double,3> Q(info.mo_bra, info.mo_ket);
+    // vector_real_function_3d potential = apply_Qt(unprojected, mo_ket_);
+    vector_real_function_3d potential = Q(unprojected);
+    if (info.parameters.debug()) {
         // debug
-        vector_real_function_3d xbra = mul(world, nemo_->ncf->square(), ex_singles.get_vecfunction());
+        vector_real_function_3d xbra = info.R_square* ex_singles.get_vecfunction();
         const double ccs = inner(world, xbra, Vccs).sum();
         const double s2b = inner(world, xbra, Vs2b).sum();
         const double s2c = inner(world, xbra, Vs2c).sum();
         const double s4a = inner(world, xbra, Vs4a).sum();
         const double s4b = inner(world, xbra, Vs4b).sum();
         const double s4c = inner(world, xbra, Vs4c).sum();
-        std::cout << std::fixed << std::setprecision(10) << "functional response energies:" << "\n<x|ccs>=" << ccs
+        if (world.rank()==0) std::cout << std::fixed << std::setprecision(10) << "functional response energies:" << "\n<x|ccs>=" << ccs
                   << "\n<x|S2b>=" << s2b << "\n<x|S2c>=" << s2c << "\n<x|s4a>=" << s4a << "\n<x|s4b>="
                   << s4b << "\n<x|s4c>=" << s4c << "\n";
         // debug end
     }
     // storing potential
-    get_potentials.insert(copy(world, potential), ex_singles, POT_singles_);
+    info.intermediate_potentials.insert(copy(world, potential), ex_singles, POT_singles_);
     vector_real_function_3d result = add(world, fock_residue, potential);
     truncate(world, result);
-    const double omega = compute_cis_expectation_value(ex_singles, result);
-    ex_singles.omega = omega;
     return result;
 }
 
 madness::vector_real_function_3d
-CCPotentials::get_ADC2_singles_potential(const Pairs<CCPair>& gs_doubles, CC_vecfunction& ex_singles,
-                                         const Pairs<CCPair>& response_doubles) const {
+CCPotentials::get_ADC2_singles_potential(World& world, const Pairs<CCPair>& gs_doubles,
+                                         CC_vecfunction& ex_singles, const Pairs<CCPair>& response_doubles, Info& info) const {
     MADNESS_ASSERT(ex_singles.type == RESPONSE);
     vector_real_function_3d zero = zero_functions<double, 3>(world, get_active_mo_ket().size());
     CC_vecfunction tau(zero, PARTICLE, parameters.freeze());
-    const vector_real_function_3d result = get_CC2_singles_potential_ex(tau, gs_doubles, ex_singles, response_doubles);
+    const vector_real_function_3d result = get_CC2_singles_potential_ex(world, tau, gs_doubles, ex_singles, response_doubles, info);
     return result;
 }
 
 double
-CCPotentials::potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunction& singles,
-                                  const Pairs<CCPair>& doubles, const PotentialType& name) const {
+CCPotentials::potential_energy_gs(World& world, const CC_vecfunction& bra,
+                                  const CC_vecfunction& singles, const Pairs<CCPair>& doubles, const PotentialType& name) const {
     // sanity check
     MADNESS_ASSERT(singles.type == PARTICLE);
     CCTimer timer(world, "potential energy of " + assign_name(name));
@@ -2209,7 +2986,7 @@ CCPotentials::potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunctio
     } else if (name == POT_s6_) {
         result = x_s6(bra, singles, singles, singles);
     } else if (name == POT_F3D_) {
-        result = x_s3a(bra, singles) - compute_kinetic_energy(bra.get_vecfunction(), singles.get_vecfunction());
+        result = x_s3a(bra, singles) - compute_kinetic_energy(world, bra.get_vecfunction(), singles.get_vecfunction());
     } else if (name == POT_ccs_) {
         result = x_s3c(bra, singles) + x_s5b(bra, singles, singles) + x_s5c(bra, singles, singles) +
                  x_s6(bra, singles, singles, singles);
@@ -2236,17 +3013,20 @@ CCPotentials::potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunctio
 }
 
 madness::vector_real_function_3d
-CCPotentials::potential_singles_gs(const CC_vecfunction& singles, const Pairs<CCPair>& doubles,
-                                   const PotentialType& name) const {
+CCPotentials::potential_singles_gs(World& world, const CC_vecfunction& singles,
+                                   const Pairs<CCPair>& doubles, const PotentialType& name, Info& info)
+{
     MADNESS_ASSERT(singles.type == PARTICLE);
     vector_real_function_3d result;
     CCTimer timer(world, "Singles-Potential:" + assign_name(name));
     if (name == POT_F3D_) {
-        result = fock_residue_closed_shell(singles);
+        result = fock_residue_closed_shell(world, singles, info);
     } else if (name == POT_ccs_) {
-        const CC_vecfunction t = make_t_intermediate(singles);
-        result = apply_Qt(ccs_unprojected(t, singles),
-                          t);     // this is not the full t projector, but the potential will be projeted afterwards and this will unclude th frozen mos
+        const CC_vecfunction t = make_active_t_intermediate(singles,info);
+        QProjector<double,3> Qt(info.get_active_mo_bra(),t.get_vecfunction());
+        result = Qt(ccs_unprojected(world, t, singles, info));
+        // result = apply_Qt(ccs_unprojected(world, t, singles, info), t);
+        // this is not the full t projector, but the potential will be projeted afterwards and this will unclude th frozen mos
     } else if (name == POT_s2b_) {
         //	// calculate the s2b potential and afterwards the s4a potential from the s2b potential
         //	// because:  Qt(S2b) = S2b + S4a
@@ -2261,15 +3041,15 @@ CCPotentials::potential_singles_gs(const CC_vecfunction& singles, const Pairs<CC
         //	    << get_size(world,result_s4a) << " (GB), " << timer.current_time().first << "s (wall), " << timer.current_time().second << "s (cpu)\n";
         //	result = add(world,result_s2b,result_s4a);
         // returns the s2b potential (unprojected)
-        result = s2b(singles, doubles);
+        result = s2b(world, singles, doubles, info);
     } else if (name == POT_s2c_) {
-        result = s2c(singles, doubles);
+        result = s2c(world, singles, doubles, info);
     } else if (name == POT_s4a_) {
         error("potential_singles: Demanded s4a potential -> this is calculated along with the s2b potential");
     } else if (name == POT_s4b_) {
-        result = s4b(singles, doubles);
+        result = s4b(world, singles, doubles, info);
     } else if (name == POT_s4c_) {
-        result = s4c(singles, doubles);
+        result = s4c(world, singles, doubles, info);
     } else MADNESS_EXCEPTION(("potential_singles: Unknown potential " + assign_name(name)).c_str(), 1)
 
     ;
@@ -2286,10 +3066,10 @@ CCPotentials::potential_singles_gs(const CC_vecfunction& singles, const Pairs<CC
 }
 
 double
-CCPotentials::potential_energy_ex(const CC_vecfunction& bra, const CC_vecfunction& singles_gs,
-                                  const Pairs<CCPair>& doubles_gs, const CC_vecfunction& singles_ex,
-                                  const Pairs<CCPair>& doubles_ex,
-                                  const PotentialType& name) const {
+CCPotentials::potential_energy_ex(World& world, const CC_vecfunction& bra,
+                                  const CC_vecfunction& singles_gs, const Pairs<CCPair>& doubles_gs,
+                                  const CC_vecfunction& singles_ex,
+                                  const Pairs<CCPair>& doubles_ex, const PotentialType& name) const {
     // sanity check
     MADNESS_ASSERT(singles_gs.type == PARTICLE);
     MADNESS_ASSERT(singles_ex.type == RESPONSE);
@@ -2309,7 +3089,7 @@ CCPotentials::potential_energy_ex(const CC_vecfunction& bra, const CC_vecfunctio
         result = x_s6(bra, singles_ex, singles_gs, singles_gs) + x_s6(bra, singles_gs, singles_ex, singles_gs) +
                  x_s6(bra, singles_gs, singles_gs, singles_ex);
     } else if (name == POT_F3D_) {
-        result = x_s3a(bra, singles_ex) - compute_kinetic_energy(bra.get_vecfunction(), singles_ex.get_vecfunction());
+        result = x_s3a(bra, singles_ex) - compute_kinetic_energy(world, bra.get_vecfunction(), singles_ex.get_vecfunction());
     } else if (name == POT_ccs_) {
         result = x_s3c(bra, singles_ex) + x_s5b(bra, singles_ex, singles_gs) + x_s5c(bra, singles_ex, singles_gs) +
                  x_s6(bra, singles_ex, singles_gs, singles_gs) + x_s5b(bra, singles_gs, singles_ex)
@@ -2338,40 +3118,49 @@ CCPotentials::potential_energy_ex(const CC_vecfunction& bra, const CC_vecfunctio
 }
 
 madness::vector_real_function_3d
-CCPotentials::potential_singles_ex(const CC_vecfunction& singles_gs, const Pairs<CCPair>& doubles_gs,
-                                   const CC_vecfunction& singles_ex, const Pairs<CCPair>& doubles_ex,
-                                   const PotentialType& name) const {
+CCPotentials::potential_singles_ex(World& world, const CC_vecfunction& singles_gs,
+                                   const Pairs<CCPair>& doubles_gs, const CC_vecfunction& singles_ex,
+                                   const Pairs<CCPair>& doubles_ex, const PotentialType& name, Info& info)
+{
     //if(mo_ket_.size()>1) output.warning("Potential for ExSingles is not ready for more than one orbital");
     // sanity check
     MADNESS_ASSERT(singles_gs.type == PARTICLE);
     MADNESS_ASSERT(singles_ex.type == RESPONSE);
+
+    Projector<double,3> Ox(info.get_active_mo_bra(),singles_ex.get_vecfunction());
+
     vector_real_function_3d result;
     CCTimer timer(world, "timer-ex-potential");
     if (name == POT_F3D_) {
-        result = fock_residue_closed_shell(singles_ex);
+        result = fock_residue_closed_shell(world, singles_ex, info);
     } else if (name == POT_ccs_) {
-        const CC_vecfunction t = make_t_intermediate(singles_gs);
-        vector_real_function_3d part1 = apply_Qt(ccs_unprojected(t, singles_ex), t);
-        vector_real_function_3d part2 = apply_Qt(ccs_unprojected(singles_ex, singles_gs), t);
-        vector_real_function_3d part3 = apply_projector(ccs_unprojected(t, singles_gs), singles_ex);
+        // const CC_vecfunction t = make_t_intermediate(singles_gs,info.parameters);
+        const CC_vecfunction t = make_active_t_intermediate(singles_gs,info);
+        QProjector<double,3> Qt(info.get_active_mo_bra(),t.get_vecfunction());
+        // vector_real_function_3d part1 = apply_Qt(ccs_unprojected(world, t, singles_ex, info), t);
+        // vector_real_function_3d part2 = apply_Qt(ccs_unprojected(world, singles_ex, singles_gs, info), t);
+        vector_real_function_3d part1 = Qt(ccs_unprojected(world, t, singles_ex, info));
+        vector_real_function_3d part2 = Qt(ccs_unprojected(world, singles_ex, singles_gs, info));
+        // vector_real_function_3d part3 = apply_projector(ccs_unprojected(world, t, singles_gs, info), singles_ex);
+        vector_real_function_3d part3 = Ox(ccs_unprojected(world, t, singles_gs, info));
         vector_real_function_3d tmp = add(world, part1, part2);
         result = sub(world, tmp, part3);
     } else if (name == POT_s2b_) {
-        result = s2b(singles_ex, doubles_ex);
+        result = s2b(world, singles_ex, doubles_ex, info);
     } else if (name == POT_s2c_) {
-        result = s2c(singles_ex, doubles_ex);
+        result = s2c(world, singles_ex, doubles_ex, info);
     } else if (name == POT_s4a_) {
         error("potential_singles: Demanded s4a potential -> this is calculated from the s2b potential");
     } else if (name == POT_s4b_) {
-        vector_real_function_3d s4b_part1 = s4b(singles_gs, doubles_ex);
-        vector_real_function_3d s4b_part2 = s4b(singles_ex, doubles_gs);
+        vector_real_function_3d s4b_part1 = s4b(world, singles_gs, doubles_ex, info);
+        vector_real_function_3d s4b_part2 = s4b(world, singles_ex, doubles_gs, info);
         result = add(world, s4b_part1, s4b_part2);
     } else if (name == POT_s4c_) {
-        vector_real_function_3d s4c_part1 = s4c(singles_gs, doubles_ex);
-        vector_real_function_3d s4c_part2 = s4c(singles_ex, doubles_gs);
+        vector_real_function_3d s4c_part1 = s4c(world, singles_gs, doubles_ex, info);
+        vector_real_function_3d s4c_part2 = s4c(world, singles_ex, doubles_gs, info);
         result = add(world, s4c_part1, s4c_part2);
     } else if (name == POT_cis_) {
-        result = ccs_unprojected(CC_vecfunction(get_active_mo_ket(), HOLE, parameters.freeze()), singles_ex);
+        result = ccs_unprojected(world, CC_vecfunction(info.get_active_mo_ket(), HOLE, info.parameters.freeze()), singles_ex, info);
     } else MADNESS_EXCEPTION(("potential_singles: Unknown potential " + assign_name(name)).c_str(), 1)
 
     ;
@@ -2389,19 +3178,24 @@ CCPotentials::potential_singles_ex(const CC_vecfunction& singles_gs, const Pairs
 }
 
 madness::vector_real_function_3d
-CCPotentials::fock_residue_closed_shell(const CC_vecfunction& singles) const {
+CCPotentials::fock_residue_closed_shell(World& world, const CC_vecfunction& singles, const Info& info)
+{
     //	vecfuncT tau = singles.get_vecfunction();
+    auto g12=CCConvolutionOperator<double,3>(world,OT_G12,info.parameters);
     CCTimer timer_J(world, "J");
     //	vecfuncT J = mul(world, intermediates_.get_hartree_potential(), tau);
-    vector_real_function_3d J;
-    for (const auto& tmpi : singles.functions) {
-        const CCFunction<double,3>& taui = tmpi.second;
-        real_function_3d hartree_potential = real_function_3d(world);
-        for (const auto& tmpk : mo_ket_.functions)
-            hartree_potential += (*g12)(mo_bra_(tmpk.first), tmpk.second);
-        const real_function_3d Ji = hartree_potential * taui.function;
-        J.push_back(Ji);
-    }
+    // vector_real_function_3d J;
+    real_function_3d density=dot(world, info.mo_bra,info.mo_ket);
+    real_function_3d hartree_potential=g12(density);
+    // for (const auto& tmpi : singles.functions) {
+        // const CCFunction<double,3>& taui = tmpi.second;
+        // real_function_3d hartree_potential = real_function_3d(world);
+        // for (const auto& tmpk : mo_ket_.functions)
+            // hartree_potential += (g12)(info.mo_bra[tmpk.first], tmpk.second);
+        // const real_function_3d Ji = hartree_potential * taui.function;
+        // J.push_back(Ji);
+    // }
+    vector_real_function_3d J = hartree_potential* singles.get_vecfunction();
     truncate(world, J);
     scale(world, J, 2.0);
     timer_J.info(true, norm2(world, J));
@@ -2409,13 +3203,14 @@ CCPotentials::fock_residue_closed_shell(const CC_vecfunction& singles) const {
     vector_real_function_3d vK;
     for (const auto& tmpi : singles.functions) {
         const CCFunction<double,3>& taui = tmpi.second;
-        const real_function_3d Ki = K(taui);
+        const real_function_3d Ki = K(world, taui, info);
         vK.push_back(Ki);
     }
     scale(world, vK, -1.0);
     timer_K.info(true, norm2(world, vK));
     // apply nuclear potential
-    Nuclear<double, 3> Uop(world, nemo_.get());
+    auto ncf=std::shared_ptr<AdhocNuclearCorrelationFactor>(new AdhocNuclearCorrelationFactor(world, info.U2, info.U1));
+    Nuclear<double, 3> Uop(world, ncf);
     vector_real_function_3d Upot = Uop(singles.get_vecfunction());
     vector_real_function_3d KU = add(world, vK, Upot);
     return add(world, J, KU);
@@ -2452,10 +3247,11 @@ CCPotentials::K_macrotask(World& world, const std::vector<real_function_3d>& mo_
 }
 
 madness::real_function_3d
-CCPotentials::K(const CCFunction<double,3>& f) const {
+CCPotentials::K(World& world, const CCFunction<double,3>& f, const Info& info) {
+    auto g12=CCConvolutionOperator<double,3>(world,OT_G12,info.parameters);
     real_function_3d result = real_factory_3d(world);
-    for (const auto& k_iterator : mo_ket_.functions) {
-        result += (*g12)(mo_bra_(k_iterator.first), f) * mo_ket_(k_iterator.first).function;
+    for (size_t k = 0; k < info.mo_ket.size(); k++) {
+        result += ((g12)(info.mo_bra[k] * f.f()).truncate()) *info.mo_ket[k];
     }
     return result;
 }
@@ -2526,12 +3322,12 @@ CCPotentials::apply_Kf(const CCFunction<double,3>& x, const CCFunction<double,3>
 madness::real_function_6d
 CCPotentials::apply_fK(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const real_convolution_6d *Gscreen) const {
     const bool symmetric = (x.type == y.type && x.i == y.i);
-    const real_function_3d Kx = K(x);
+    const real_function_3d Kx = K(world, x, info);
     const real_function_6d fKphi0b = make_f_xy(CCFunction<double,3>(Kx, x.i, UNDEFINED), y, Gscreen);
     real_function_6d fKphi0a;
     if (symmetric) fKphi0a = swap_particles(fKphi0b);
     else {
-        real_function_3d Ky = K(y);
+        real_function_3d Ky = K(world, y, info);
         fKphi0a = make_f_xy(x, CCFunction<double,3>(Ky, y.i, UNDEFINED), Gscreen);
     }
     const real_function_6d fKphi0 = (fKphi0a + fKphi0b);
@@ -2567,6 +3363,20 @@ CCPotentials::make_f_xy(const CCFunction<double,3>& x, const CCFunction<double,3
     return fxy;
 }
 
+madness::real_function_6d
+CCPotentials::make_f_xy(World& world, const CCFunction<double,3>& phi_i, const CCFunction<double,3>& phi_j,
+                        const Info& info, const real_convolution_6d *Gscreen) {
+    const auto& parameters=info.parameters;
+    CorrelationFactor corrfac(world, parameters.gamma(), 1.e-7, parameters.lo());
+
+    real_function_6d fxy = CompositeFactory<double, 6, 3>(world).g12(corrfac.f()).
+                                                    particle1(copy(phi_i.function)).particle2(copy(phi_j.function));
+    if (Gscreen == NULL) fxy.fill_tree().truncate().reduce_rank();
+    else fxy.fill_cuspy_tree(*Gscreen).truncate().reduce_rank();
+    return fxy;
+}
+
+
 madness::real_function_6d
 CCPotentials::make_f_xy_macrotask(World& world, const real_function_3d& x_ket, const real_function_3d& y_ket,
                                   const real_function_3d& x_bra, const real_function_3d& y_bra,
@@ -2609,33 +3419,23 @@ CCPotentials::make_f_xy_macrotask(World& world, const real_function_3d& x_ket, c
 }
 
 madness::vector_real_function_3d
-CCPotentials::ccs_unprojected(const CC_vecfunction& ti, const CC_vecfunction& tk) const {
+CCPotentials::ccs_unprojected(World& world, const CC_vecfunction& ti, const CC_vecfunction& tk, const Info& info) {
+    auto g12=CCConvolutionOperator<double,3>(world,OT_G12,info.parameters);
     vector_real_function_3d result;
     for (const auto& itmp : ti.functions) {
         real_function_3d kgtk = real_factory_3d(world);
         for (const auto& ktmp : tk.functions)
-            kgtk += (*g12)(mo_bra_(ktmp.first), ktmp.second);
+            kgtk += (g12)(info.mo_bra[ktmp.first], ktmp.second);
         const real_function_3d kgtk_ti = kgtk * ti(itmp.first).function;
         real_function_3d kgti_tk = real_factory_3d(world);
         for (const auto& ktmp : tk.functions)
-            kgti_tk += (*g12)(mo_bra_(ktmp.first), ti(itmp.first)) * tk(ktmp.first).function;
+            kgti_tk += (g12)(info.mo_bra[ktmp.first], ti(itmp.first)) * tk(ktmp.first).function;
         const real_function_3d resulti = 2.0 * kgtk_ti - kgti_tk;
         result.push_back(resulti);
     }
     return result;
 }
 
-madness::real_function_3d
-CCPotentials::make_density(const CC_vecfunction& x) const {
-    real_function_3d result = real_factory_3d(world);
-    for (const auto& ktmp : x.functions) {
-        const size_t k = ktmp.first;
-        result += 2.0 * mo_bra_(k).function * (x(k).function);
-    }
-    result.truncate();
-    return result;
-}
-
 double
 CCPotentials::x_s3a(const CC_vecfunction& x, const CC_vecfunction& t) const {
     MADNESS_ASSERT(x.size() == t.size());
@@ -2653,7 +3453,7 @@ CCPotentials::x_s3a(const CC_vecfunction& x, const CC_vecfunction& t) const {
             pot += (2.0 * gpart - xpart);
         }
     }
-    double kinetic = compute_kinetic_energy(x.get_vecfunction(), t.get_vecfunction());
+    double kinetic = compute_kinetic_energy(world, x.get_vecfunction(), t.get_vecfunction());
     return kinetic + pot + nuc;
 }
 
@@ -2838,10 +3638,14 @@ CCPotentials::x_s4c(const CC_vecfunction& x, const CC_vecfunction& t, const Pair
 }
 
 madness::vector_real_function_3d
-CCPotentials::s2b(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const {
+CCPotentials::s2b(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, Info& info)
+{
     vector_real_function_3d result;
+    // madness::print_size(world,singles.get_vecfunction(),"singles upon entry");
+    // auto functions=doubles.allpairs.begin()->second.functions;
+    // for (const auto& f : functions) f.print_size("functions");
     // see if we can skip the recalculation of the pure 6D part since this does not change during the singles iteration
-    vector_real_function_3d result_u = get_potentials(singles, POT_s2b_);
+    vector_real_function_3d result_u = info.intermediate_potentials(singles, POT_s2b_);
     bool recalc_u_part = false;
     if (result_u.empty()) recalc_u_part = true;
 
@@ -2855,35 +3659,32 @@ CCPotentials::s2b(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) c
             // check if the first function in the vector is really the pure 6D part
             MADNESS_ASSERT(uik[0].is_pure());
             if (recalc_u_part) {
-                resulti_u += 2.0 * apply_s2b_operation(mo_bra_(k), uik[0],
-                                                       2);     //2.0*uik[0].dirac_convolution(mo_bra_(k),g12,2);
-                resulti_u -= apply_s2b_operation(mo_bra_(k), uik[0],
-                                                 1);     //uik[0].dirac_convolution(mo_bra_(k),g12,1);
+                resulti_u += 2.0 * apply_s2b_operation(world, info.mo_bra[k], uik[0], 2, info);     //2.0*uik[0].dirac_convolution(mo_bra_(k),g12,2);
+                resulti_u -= apply_s2b_operation(world, info.mo_bra[k], uik[0], 1, info);     //uik[0].dirac_convolution(mo_bra_(k),g12,1);
             } else {
-                resulti_u = result_u[i - parameters.freeze()];
+                resulti_u = result_u[i - info.parameters.freeze()];
             }
             for (size_t mm = 1; mm < uik.size(); mm++) {
-                resulti_r += 2.0 * apply_s2b_operation(mo_bra_(k), uik[mm],
-                                                       2);     //2.0*uik[mm].dirac_convolution(mo_bra_(k),g12,2);
-                resulti_r -= apply_s2b_operation(mo_bra_(k), uik[mm],
-                                                 1);     //uik[mm].dirac_convolution(mo_bra_(k),g12,1);
+                resulti_r += 2.0 * apply_s2b_operation(world, info.mo_bra[k], uik[mm], 2, info);     //2.0*uik[mm].dirac_convolution(mo_bra_(k),g12,2);
+                resulti_r -= apply_s2b_operation(world, info.mo_bra[k], uik[mm], 1, info);     //uik[mm].dirac_convolution(mo_bra_(k),g12,1);
             }
         }
         result.push_back(resulti_r + resulti_u);
         if (recalc_u_part) result_u.push_back(resulti_u);
     }
-    if (recalc_u_part) get_potentials.insert(result_u, singles, POT_s2b_);
+    if (recalc_u_part) info.intermediate_potentials.insert(result_u, singles, POT_s2b_);
 
     return result;
 }
 
 madness::vector_real_function_3d
-CCPotentials::s2c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const {
+CCPotentials::s2c(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, Info& info) {
     vector_real_function_3d result;
     // see if we can skip the recalculation of the pure 6D part since this does not change during the singles iteration
-    vector_real_function_3d result_u = get_potentials(singles, POT_s2c_);
+    vector_real_function_3d result_u = info.intermediate_potentials(singles, POT_s2c_);
     bool recalc_u_part = false;
     if (result_u.empty()) recalc_u_part = true;
+    auto g12=CCConvolutionOperator<double,3>(world,OT_G12,info.parameters);
 
     for (const auto& itmp : singles.functions) {
         const size_t i = itmp.first;
@@ -2891,10 +3692,10 @@ CCPotentials::s2c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) c
         real_function_3d resulti_r = real_factory_3d(world);
         for (const auto& ktmp : singles.functions) {
             const size_t k = ktmp.first;
-            const real_function_3d kgi = (*g12)(mo_bra_(k), mo_ket_(i));
+            const real_function_3d kgi = (g12)(info.mo_bra[k], info.mo_ket[i]);
             for (const auto& ltmp : singles.functions) {
                 const size_t l = ltmp.first;
-                const real_function_3d l_kgi = mo_bra_(l).function * kgi;
+                const real_function_3d l_kgi = info.mo_bra[l] * kgi;
                 std::vector<CCPairFunction<double,6>> ukl = get_pair_function(doubles, k, l);
                 // check if the first function in the vector is really the pure 6D part
                 MADNESS_ASSERT(ukl[0].is_pure());
@@ -2902,7 +3703,7 @@ CCPotentials::s2c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) c
                     resulti_u += -2.0 * ukl[0].project_out(l_kgi, 2);
                     resulti_u += ukl[0].project_out(l_kgi, 1);
                 } else {
-                    resulti_u = result_u[i - parameters.freeze()];
+                    resulti_u = result_u[i - info.parameters.freeze()];
                 }
                 for (size_t mm = 1; mm < ukl.size(); mm++) {
                     resulti_r += -2.0 * ukl[mm].project_out(l_kgi, 2);
@@ -2913,7 +3714,7 @@ CCPotentials::s2c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) c
         result.push_back(resulti_r + resulti_u);
         if (recalc_u_part) result_u.push_back(resulti_u);
     }
-    if (recalc_u_part) get_potentials.insert(result_u, singles, POT_s2c_);
+    if (recalc_u_part) info.intermediate_potentials.insert(result_u, singles, POT_s2c_);
 
     return result;
 }
@@ -2947,23 +3748,25 @@ CCPotentials::s4a_from_s2b(const vector_real_function_3d& s2b, const CC_vecfunct
 }
 
 madness::vector_real_function_3d
-CCPotentials::s4b(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const {
+CCPotentials::s4b(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, const Info& info)
+{
+    auto g12=CCConvolutionOperator<double,3>(world,OT_G12,info.parameters);
     vector_real_function_3d result;
-    const vector_real_function_3d active_mo_bra = get_active_mo_bra();
+    const vector_real_function_3d active_mo_bra = info.get_active_mo_bra();
     for (const auto& itmp : singles.functions) {
         const size_t i = itmp.first;
         real_function_3d resulti = real_factory_3d(world);
         for (const auto& ktmp : singles.functions) {
             const size_t k = ktmp.first;
-            const real_function_3d kgi = (*g12)(mo_bra_(k), singles(i));
-            vector_real_function_3d l_kgi = mul_sparse(world, kgi, active_mo_bra, parameters.thresh_3D());
+            const real_function_3d kgi = (g12)(info.mo_bra[k], singles(i));
+            vector_real_function_3d l_kgi = mul_sparse(world, kgi, active_mo_bra, info.parameters.thresh_3D());
             truncate(world, l_kgi);
             for (const auto& ltmp : singles.functions) {
                 const size_t l = ltmp.first;
                 const std::vector<CCPairFunction<double,6>> ukl = get_pair_function(doubles, k, l);
                 for (size_t mm = 0; mm < ukl.size(); mm++) {
-                    resulti += -2.0 * ukl[mm].project_out(l_kgi[l - parameters.freeze()], 2);
-                    resulti += ukl[mm].project_out(l_kgi[l - parameters.freeze()], 1);
+                    resulti += -2.0 * ukl[mm].project_out(l_kgi[l - info.parameters.freeze()], 2);
+                    resulti += ukl[mm].project_out(l_kgi[l - info.parameters.freeze()], 1);
                 }
             }
         }
@@ -2973,9 +3776,11 @@ CCPotentials::s4b(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) c
 }
 
 madness::vector_real_function_3d
-CCPotentials::s4c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const {
+CCPotentials::s4c(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, const Info& info)
+{
     vector_real_function_3d result;
-    const vector_real_function_3d active_mo_bra = get_active_mo_bra();
+    auto g12=CCConvolutionOperator<double,3>(world,OT_G12,info.parameters);
+    const vector_real_function_3d active_mo_bra = info.get_active_mo_bra();
     for (const auto& itmp : singles.functions) {
         const size_t i = itmp.first;
         real_function_3d resulti = real_factory_3d(world);
@@ -2986,7 +3791,7 @@ CCPotentials::s4c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) c
         real_function_3d kgtauk = real_factory_3d(world);
         for (const auto& ktmp : singles.functions) {
             const size_t k = ktmp.first;
-            kgtauk += (*g12)(mo_bra_(k), singles(k));
+            kgtauk += (g12)(info.mo_bra[k], singles(k));
         }
         vector_real_function_3d l_kgtauk = mul(world, kgtauk, active_mo_bra);
         truncate(world, l_kgtauk);
@@ -2994,12 +3799,12 @@ CCPotentials::s4c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) c
             const size_t l = ltmp.first;
             const std::vector<CCPairFunction<double,6>> uil = get_pair_function(doubles, i, l);
             for (size_t mm = 0; mm < uil.size(); mm++) {
-                part1 += uil[mm].project_out(l_kgtauk[l - parameters.freeze()], 2);
-                part2 += uil[mm].project_out(l_kgtauk[l - parameters.freeze()], 1);
+                part1 += uil[mm].project_out(l_kgtauk[l - info.parameters.freeze()], 2);
+                part2 += uil[mm].project_out(l_kgtauk[l - info.parameters.freeze()], 1);
             }
             for (const auto& ktmp : singles.functions) {
                 const size_t k = ktmp.first;
-                const real_function_3d k_lgtauk = (mo_bra_(k).function * (*g12)(mo_bra_(l), singles(k))).truncate();
+                const real_function_3d k_lgtauk = (info.mo_bra[k] * (g12)(info.mo_bra[l], singles(k))).truncate();
                 for (size_t mm = 0; mm < uil.size(); mm++) {
                     part3 += uil[mm].project_out(k_lgtauk, 2);
                     part4 += uil[mm].project_out(k_lgtauk, 1);
@@ -3029,12 +3834,12 @@ void CCPotentials::plot(const real_function_3d& f, const std::string& msg, const
 /// makes the t intermediates
 /// t_i = mo_ket_(i) + factor*tau(i)
 /// if factor!=1 then we can not use intermediates and set the type to UNDEFINED
-CC_vecfunction CCPotentials::make_t_intermediate(const CC_vecfunction& tau, const double factor) const {
+CC_vecfunction CCPotentials::make_t_intermediate(const CC_vecfunction& tau, const CCParameters& parameters) const {
+
     FuncType returntype = MIXED;
-    if (factor != 1.0) returntype = UNDEFINED;
 
     if (tau.type == HOLE) {
-        output("make_t_intermediate: returning hole states");
+        // output("make_t_intermediate: returning hole states");
         return CC_vecfunction(get_active_mo_ket(), HOLE, parameters.freeze());
     }
     if (tau.size() == 0) {
@@ -3045,7 +3850,7 @@ CC_vecfunction CCPotentials::make_t_intermediate(const CC_vecfunction& tau, cons
     CC_vecfunction result(returntype);
     for (const auto& itmp:tau.functions) {
         const size_t i = itmp.first;
-        CCFunction<double,3> t(mo_ket_(i).function + factor * tau(i).function, i, MIXED);
+        CCFunction<double,3> t(mo_ket_(i).function + tau(i).function, i, MIXED);
         result.insert(i, t);
 
     }
@@ -3079,6 +3884,43 @@ CC_vecfunction CCPotentials::make_full_t_intermediate(const CC_vecfunction& tau)
     return result;
 }
 
+/// makes the t intermediates
+
+/// t_i = mo_ket_(i) + tau(i)
+/// if the core is frozen the core ti will just be mo_ket_
+CC_vecfunction CCPotentials::make_full_t_intermediate(const CC_vecfunction& tau, const Info& info) {
+
+    if (tau.type == HOLE or tau.size()==0) return CC_vecfunction(info.mo_ket,HOLE);
+
+    CC_vecfunction result(MIXED);
+    for (size_t i = 0; i < info.mo_ket.size(); i++) {
+        if (int(i) < info.parameters.freeze()) {
+            result.insert(i, CCFunction<double,3>(info.mo_ket[i],i,MIXED));
+        } else {
+            CCFunction<double,3> t(info.mo_ket[i] + tau(i).function, i, MIXED);
+            result.insert(i, t);
+        }
+    }
+    return result;
+}
+
+/// makes the t intermediates
+
+/// t_i = mo_ket_(i) + tau(i)
+/// skip frozen core orbitals
+CC_vecfunction CCPotentials::make_active_t_intermediate(const CC_vecfunction& tau, const Info& info) {
+
+    if (tau.type == HOLE or tau.size()==0) return CC_vecfunction(info.mo_ket,HOLE);
+
+    CC_vecfunction result(MIXED);
+    for (size_t i = info.parameters.freeze(); i < info.mo_ket.size(); i++) {
+        CCFunction<double,3> t(info.mo_ket[i] + tau(i).function, i, MIXED);
+        result.insert(i, t);
+    }
+    return result;
+}
+
+
 /// makes the t intermediates
 /// t_i = mo_ket_(i) + tau
 /// i = tau.i
@@ -3287,7 +4129,7 @@ void CCPotentials::test_pairs() {
 
 }
 
-void CCPotentials::test_singles_potential() const {
+void CCPotentials::test_singles_potential(Info& info) const {
 
     output("Test LRCC2 Singles Potential with empty doubles and compare to CIS");
     {
@@ -3298,10 +4140,10 @@ void CCPotentials::test_singles_potential() const {
         Pairs<CCPair> gs_doubles;
         Pairs<CCPair> ex_doubles;
 
-        vector_real_function_3d cis_potential = potential_singles_ex(gs_singles, gs_doubles, ex_singles, ex_doubles,
-                                                                     POT_cis_);
-        vector_real_function_3d ccs_potential = potential_singles_ex(gs_singles, gs_doubles, ex_singles, ex_doubles,
-                                                                     POT_ccs_);
+        vector_real_function_3d cis_potential = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles,
+                                                                     ex_doubles, POT_cis_, info);
+        vector_real_function_3d ccs_potential = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles,
+                                                                     ex_doubles, POT_ccs_, info);
         vector_real_function_3d diff = sub(world, cis_potential, ccs_potential);
         const double d = norm2(world, diff);
         madness::print_size<double, 3>(world, diff, "difference in potentials");
@@ -3338,9 +4180,9 @@ void CCPotentials::test_singles_potential() const {
     const CC_vecfunction xbra(tmp, RESPONSE, parameters.freeze());
 
     for (const auto pot:pots) {
-        const vector_real_function_3d potential = potential_singles_gs(gs_singles, gs_doubles, pot);
+        const vector_real_function_3d potential = potential_singles_gs(world, gs_singles, gs_doubles, pot, info);
         const double xpot1 = inner(world, xbra.get_vecfunction(), potential).sum();
-        const double xpot2 = potential_energy_gs(xbra, gs_singles, gs_doubles, pot);
+        const double xpot2 = potential_energy_gs(world, xbra, gs_singles, gs_doubles, pot);
         const double diff = xpot1 - xpot2;
         if (world.rank() == 0)
             std::cout << std::fixed << std::setprecision(10) <<
@@ -3353,7 +4195,7 @@ void CCPotentials::test_singles_potential() const {
         if (pot == POT_s2b_) {
             const vector_real_function_3d pot_s4a = -1.0 * apply_projector(potential, gs_singles);
             const double xxpot1 = inner(world, xbra.get_vecfunction(), pot_s4a).sum();
-            const double xxpot2 = potential_energy_gs(xbra, gs_singles, gs_doubles, POT_s4a_);
+            const double xxpot2 = potential_energy_gs(world, xbra, gs_singles, gs_doubles, POT_s4a_);
             const double xdiff = xxpot1 - xxpot2;
             if (world.rank() == 0)
                 std::cout <<
@@ -3371,10 +4213,10 @@ void CCPotentials::test_singles_potential() const {
     CCTimer time_ex(world, "CC2 Singles Response Test");
 
     for (const auto pot:pots) {
-        const vector_real_function_3d potential = potential_singles_ex(gs_singles, gs_doubles, ex_singles, ex_doubles,
-                                                                       pot);
+        const vector_real_function_3d potential = potential_singles_ex(world, gs_singles, gs_doubles, ex_singles,
+                                                                       ex_doubles, pot, info);
         const double xpot1 = inner(world, xbra.get_vecfunction(), potential).sum();
-        const double xpot2 = potential_energy_ex(xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, pot);
+        const double xpot2 = potential_energy_ex(world, xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, pot);
         const double diff = xpot1 - xpot2;
         if (world.rank() == 0)
             std::cout << std::fixed << std::setprecision(10) <<
@@ -3385,11 +4227,11 @@ void CCPotentials::test_singles_potential() const {
         if (fabs(diff) > parameters.thresh_6D()) output.warning("Test Failed");
         else output("Test Passed");
         if (pot == POT_s2b_) {
-            const vector_real_function_3d potential_gs = potential_singles_gs(gs_singles, gs_doubles, pot);
+            const vector_real_function_3d potential_gs = potential_singles_gs(world, gs_singles, gs_doubles, pot, info);
             const vector_real_function_3d pot_s4a = -1.0 * add(world, apply_projector(potential, gs_singles),
                                                                apply_projector(potential_gs, ex_singles));
             const double xxpot1 = inner(world, xbra.get_vecfunction(), pot_s4a).sum();
-            const double xxpot2 = potential_energy_ex(xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, POT_s4a_);
+            const double xxpot2 = potential_energy_ex(world, xbra, gs_singles, gs_doubles, ex_singles, ex_doubles, POT_s4a_);
             const double xdiff = xxpot1 - xxpot2;
             if (world.rank() == 0)
                 std::cout <<
@@ -3415,7 +4257,7 @@ void CCPotentials::test() {
     assign_name(test5);
     assign_name(test6);
 
-    test_singles_potential();
+    test_singles_potential(info);
     output.section("Testing Scalar Multiplication");
     {
         CC_vecfunction test = mo_ket_ * 2.0;
diff --git a/src/madness/chem/CCPotentials.h b/src/madness/chem/CCPotentials.h
index 05a1fe202d7..5abf3043af1 100644
--- a/src/madness/chem/CCPotentials.h
+++ b/src/madness/chem/CCPotentials.h
@@ -16,30 +16,46 @@
 #include<madness/chem/electronic_correlation_factor.h>
 
 namespace madness {
-
 /// Class which calculates all types of CC2 Potentials
 class CCPotentials {
 public:
     CCPotentials(World& world_, const std::shared_ptr<Nemo> nemo, const CCParameters& param);
 
-    void reset_nemo(const std::shared_ptr<Nemo> nemo){
-        nemo_=nemo;
-        mo_ket_=(make_mo_ket(*nemo));
-        mo_bra_=(make_mo_bra(*nemo));
-        orbital_energies_=init_orbital_energies(*nemo);
+    void reset_nemo(const std::shared_ptr<Nemo> nemo) {
+        nemo_ = nemo;
+        mo_ket_ = (make_mo_ket(*nemo));
+        mo_bra_ = (make_mo_bra(*nemo));
+        orbital_energies_ = init_orbital_energies(*nemo);
     };
 
+    Info update_info(const CCParameters& parameters, const std::shared_ptr<Nemo> nemo) const {
+        Info info;
+        info.mo_bra = mo_bra().get_vecfunction();
+        info.mo_ket = mo_ket().get_vecfunction();
+        info.molecular_coordinates = nemo->get_calc()->molecule.get_all_coords_vec();
+        info.parameters = parameters;
+        info.R_square = nemo->R_square;
+        info.R = nemo->R;
+        info.U1 = nemo->ncf->U1vec();
+        info.U2 = nemo->ncf->U2();
+        info.intermediate_potentials = get_potentials;
+        info.orbital_energies = orbital_energies_;
+        info.fock=nemo->compute_fock_matrix(nemo->get_calc()->amo, nemo->get_calc()->aocc);
+        return info;
+    }
+
     virtual
     ~CCPotentials() {};
 
     /// forms the regularized functions from Q and Qt Ansatz for CIS(D) where tau=0 and t=mo so that Qt=Q
-    void test_pair_consistency(const CCPairFunction<double,6>& u, const size_t i, const size_t j, const CC_vecfunction& x) const;
+    void test_pair_consistency(const CCPairFunction<double, 6>& u, const size_t i, const size_t j,
+                               const CC_vecfunction& x) const;
 
     bool test_compare_pairs(const CCPair& pair1, const CCPair& pair2) const;
 
     void test_pairs();
 
-    void test_singles_potential() const;
+    void test_singles_potential(Info& info) const;
 
     void test();
 
@@ -47,12 +63,13 @@ class CCPotentials {
     real_function_6d make_6D_pair(const CCPair& pair) const;
 
     /// Function to load a function from disc
-    /// @param[in] the function which will be loaded
+    /// @param[in] f the function which will be loaded
     /// @param[in] name of the file in which the function was stored
     /// @return true or false depending on if the data was found on disc
-    template<typename T, size_t NDIM>
+    template <typename T, size_t NDIM>
     bool load_function(Function<T, NDIM>& f, const std::string name) const {
-        bool exists = archive::ParallelInputArchive<archive::BinaryFstreamInputArchive>::exists(world, name.c_str());
+        bool exists = archive::ParallelInputArchive<
+            archive::BinaryFstreamInputArchive>::exists(world, name.c_str());
         if (exists) {
             if (world.rank() == 0) print("loading function", name);
             archive::ParallelInputArchive<archive::BinaryFstreamInputArchive> ar(world, name.c_str());
@@ -62,7 +79,10 @@ class CCPotentials {
             f.truncate();
             f.print_size(name);
             return true;
-        } else return false;
+        } else {
+            if (world.rank()==0) print("could not find function",name);
+        }
+        return false;
     }
 
     /// Plotting (convenience)
@@ -72,7 +92,7 @@ class CCPotentials {
     void plot(const real_function_3d& f, const std::string& msg, const bool doprint = true) const;
 
     /// print size of a function
-    template<size_t NDIM>
+    template <size_t NDIM>
     void print_size(const Function<double, NDIM>& f, const std::string& msg, const bool print = true) const {
         if (print) f.print_size(msg);
     }
@@ -85,6 +105,11 @@ class CCPotentials {
         return orbital_energies_[i] + orbital_energies_[j];
     }
 
+    /// returns epsilon_i + epsilon_j (needed for bsh operator of pairs)
+    static double get_epsilon(const size_t i, const size_t j, const Info& info) {
+        return info.orbital_energies[i] + info.orbital_energies[j];
+    }
+
     /// returns a vector of all active mos without nuclear correlation factor (nemos)
     vector_real_function_3d get_active_mo_ket() const {
         vector_real_function_3d result;
@@ -102,14 +127,14 @@ class CCPotentials {
     /// get the corresponding mo bra vectors to a ket vector
     vector_real_function_3d get_mo_bra(const CC_vecfunction& ket) const {
         vector_real_function_3d result;
-        for (const auto& ktmp:ket.functions) {
+        for (const auto& ktmp : ket.functions) {
             result.push_back(mo_bra_(ktmp.first).function);
         }
         return result;
     }
 
     /// returns a specific mo
-    CCFunction<double,3> mo_ket(const size_t& i) const {
+    CCFunction<double, 3> mo_ket(const size_t& i) const {
         return mo_ket_(i);
     }
 
@@ -119,7 +144,7 @@ class CCPotentials {
     }
 
     /// returns a specific mo multiplied with the squared nuclear correlation factor
-    CCFunction<double,3> mo_bra(const size_t& i) const {
+    CCFunction<double, 3> mo_bra(const size_t& i) const {
         return mo_bra_(i);
     }
 
@@ -142,17 +167,28 @@ class CCPotentials {
     /// makes the t intermediates
     /// t_i = mo_ket_(i) + factor*tau(i)
     /// if factor!=1 then we can not use intermediates and set the type to UNDEFINED
-    CC_vecfunction make_t_intermediate(const CC_vecfunction& tau, const double factor = 1.0) const;
+    CC_vecfunction make_t_intermediate(const CC_vecfunction& tau, const CCParameters& parameters) const;
 
     /// makes the t intermediates
     /// t_i = mo_ket_(i) + factor*tau(i)
     /// if the core is frozen the core ti will just be mo_ket_
     CC_vecfunction make_full_t_intermediate(const CC_vecfunction& tau) const;
 
+    /// makes the t intermediates
+    /// t_i = mo_ket_(i) + tau(i)
+    /// if the core is frozen the core ti will just be mo_ket_
+    static CC_vecfunction make_full_t_intermediate(const CC_vecfunction& tau, const Info& info);
+
+    /// makes the t intermediates
+
+    /// t_i = mo_ket_(i) + tau(i)
+    /// skip frozen orbitals
+    static CC_vecfunction make_active_t_intermediate(const CC_vecfunction& tau, const Info& info);
+
     /// makes the t intermediates
     /// t_i = mo_ket_(i) + tau
     /// i = tau.i
-    CCFunction<double,3> make_t_intermediate(const CCFunction<double,3>& tau) const;
+    CCFunction<double, 3> make_t_intermediate(const CCFunction<double, 3>& tau) const;
 
 private:
     /// Helper function to initialize the const mo_bra and ket elements adn orbital energies
@@ -168,6 +204,17 @@ class CCPotentials {
     init_orbital_energies(const Nemo& nemo) const;
 
 public:
+    /// return the regularized MP2 ansatz: |\tau_ij> = |u_ij> + Q12 f12 |ij>
+    static CCPair make_pair_mp2(const real_function_6d& u, const size_t i, const size_t j, const Info& info);
+
+    /// return the regularized CC2 ansatz: |\tau_ij> = |u_ij> + Q12t f12 |t_i t_j>
+    static CCPair make_pair_cc2(const real_function_6d& u, const CC_vecfunction& gs_singles,
+                                const size_t i, const size_t j, const Info& info);
+
+    /// return the regularized CC2 ansatz: |x_ij> = |u_ij> + Q12t f12 |t_i t_j> + ?????
+    static CCPair make_pair_lrcc2(World& world, const CalcType& ctype, const real_function_6d& u,
+                                  const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                                  const size_t i, const size_t j, const Info& info);
 
     // Pair functions
 
@@ -207,33 +254,39 @@ class CCPotentials {
 
     /// Compute pair correlation energies of MP2 or CC2 Ground State
     // Off diagonal pair energies are multiplied with 2.0 to acount for their permuted partners
-    /// @param[in] The Pair_function
-    /// @param[in] The Singles (for MP2 give empty function) for the energy contribution over disconnected doubles
+    /// @param[in] u the Pair_function
+    /// @param[in] singles the Singles (for MP2 give empty function) for the energy contribution over disconnected doubles
     /// @param[out] 2*<ij|g|u> - <ji|g|u> , where i and j are determined by u (see CC_Pair class)
-    double
-    compute_pair_correlation_energy(const CCPair& u, const CC_vecfunction& singles = CC_vecfunction(PARTICLE)) const;
+    static double
+    compute_pair_correlation_energy(World& world,
+                                    const Info& info,
+                                    const CCPair& u,
+                                    const CC_vecfunction& singles = CC_vecfunction(PARTICLE));
 
     /// Compute CC2 correlation energy
     /// @param[in] The Pair_function
     /// @param[out] \sum_{ij} 2*<ij|g|u> - <ji|g|u> + 2*<ij|g|\tau_i\tau_j> - <ji|g|\tau_i\tau_j> , where i and j are determined by u (see CC_Pair class)
     /// since we do not compute all pairs (symmetry reasons) the off diagonal pair energies are conted twice
     /// the cc2 pair functions are dependent on the doubles (see CC_Pair structure, and make_pair function) so make shure they are updated
-    double
-    compute_cc2_correlation_energy(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const;
+    /// @param world
+    /// @param info
+    static double
+    compute_cc2_correlation_energy(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles,
+                                   const Info& info);
 
 
-    double
-    compute_kinetic_energy(const vector_real_function_3d& xbra, const vector_real_function_3d& xket) const;
+    static double
+    compute_kinetic_energy(World& world, const vector_real_function_3d& xbra, const vector_real_function_3d& xket);
 
-    /// returns \f$  <x|T|x> + <x|V|x>  \f$
-    double
-    compute_cis_expectation_value(const CC_vecfunction& x, const vector_real_function_3d& V,
-                                  const bool print = true) const;
+    /// compute the expectation value excitation energy using the CIS/CCS/CC2 singles
+    static double
+    compute_cis_expectation_value(World& world, const CC_vecfunction& x,
+                                  const vector_real_function_3d& V, const bool print, const Info& info);
 
     /// Something like a pair energy for CIS(D)/LRCC2 to estimate energy convergence
     /// calculates the response part of s2b and s2c which are independent of the mp2 amplitudes
-    double
-    compute_excited_pair_energy(const CCPair& d, const CC_vecfunction& x) const;
+    static double
+    compute_excited_pair_energy(World& world, const CCPair& d, const CC_vecfunction& x, const Info& info);
 
     /// Compute the CIS(D) Energy Correction to CIS
     double
@@ -253,7 +306,7 @@ class CCPotentials {
     /// Static function for the 6D Fock residue for use in macrotask
     static madness::real_function_6d
     fock_residue_6d_macrotask(World& world, const CCPair& u, const CCParameters& parameters,
-                              const std::vector< madness::Vector<double,3> >& all_coords_vec,
+                              const std::vector<madness::Vector<double, 3>>& all_coords_vec,
                               const std::vector<real_function_3d>& mo_ket,
                               const std::vector<real_function_3d>& mo_bra,
                               const std::vector<real_function_3d>& U1,
@@ -262,19 +315,40 @@ class CCPotentials {
     /// Static version of make_constant_part_mp2 to be called from macrotask.
     static madness::real_function_6d
     make_constant_part_mp2_macrotask(World& world, const CCPair& pair, const std::vector<real_function_3d>& mo_ket,
-                                                   const std::vector<real_function_3d>& mo_bra,
-                                                   const CCParameters& parameters, const real_function_3d& Rsquare,
-                                                   const std::vector<real_function_3d>& U1,
-                                                   const std::vector<std::string> argument);
+                                     const std::vector<real_function_3d>& mo_bra,
+                                     const CCParameters& parameters, const real_function_3d& Rsquare,
+                                     const std::vector<real_function_3d>& U1,
+                                     const std::vector<std::string> argument);
+
+    /// Compute the constant part of MP2, CC2 or LR-CC2
+    ///
+    /// depending on pair.calc_type different terms are included in the constant part.
+    /// @param[in] pair         the (empty) pair function, determines the terms in the constant part, contains some bookkeeping information (bsh_eps, i, j)
+    /// @param[in] gs_singles   the ground-state singles for CC2 (used for the T1-transformed SO projector), may be left empty for MP2
+    /// @param[in] ex_singles   the excited-state singles for CC2 (used for the T1-transformed SO projector), may be left empty for MP2 and GS-CC2
+    /// @param[in] info         the Info object, containing the some basic quantities (MOs, parameters, etc)
+    /// @return            the constant part of the MP2, CC2 or LR-CC2: G(Q12(g~|titj>))
+    static madness::real_function_6d
+    make_constant_part_macrotask(World& world, const CCPair& pair,
+                                 const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                                 const Info& info);
+
 
     /// Static function to iterate the mp2 pairs from macrotask
     static madness::real_function_6d
     update_pair_mp2_macrotask(World& world, const CCPair& pair, const CCParameters& parameters,
-                                          const std::vector< madness::Vector<double,3> >& all_coords_vec,
-                                          const std::vector<real_function_3d>& mo_ket,
-                                          const std::vector<real_function_3d>& mo_bra,
-                                          const std::vector<real_function_3d>& U1,
-                                          const real_function_3d& U2, const real_function_6d& mp2_coupling);
+                              const std::vector<madness::Vector<double, 3>>& all_coords_vec,
+                              const std::vector<real_function_3d>& mo_ket,
+                              const std::vector<real_function_3d>& mo_bra,
+                              const std::vector<real_function_3d>& U1,
+                              const real_function_3d& U2, const real_function_6d& mp2_coupling);
+
+
+    /// iterate a pair for MP2, CC2, LRCC2 on constant singles
+    static CCPair iterate_pair_macrotask(World& world,
+                                         const CCPair& pair, const CC_vecfunction& gs_singles,
+                                         const CC_vecfunction& ex_singles,
+                                         const real_function_6d& coupling, const Info& info, const long maxiter);
 
 
     /// Function evaluates the consant part of the ground state for CC2
@@ -288,7 +362,7 @@ class CCPotentials {
     /// where t(1/2) = |i> + 1/2|tau_i> , t(1/2) = th
     real_function_6d
     make_constant_part_cc2_gs(const CCPair& u, const CC_vecfunction& tau,
-                              const real_convolution_6d *Gscreen = NULL) const;
+                              const real_convolution_6d* Gscreen = NULL) const;
 
     /// Function evaluates the consant part of the ground state for CC2 if the Qt Ansatz is used
     /// @param[out]The result is \f$ Q12(G(Qt12((Vreg+V_{coupling})|titj> + [F,Qt]f12|titj>))) \f$ with \f$ |t_k> = |tau_k> + |k>  and Qt = Q - \sum_k |tau_k><k| \f$
@@ -301,26 +375,27 @@ class CCPotentials {
     /// where t(1/2) = |i> + 1/2|tau_i> , t(1/2) = th
     real_function_6d
     make_constant_part_cc2_Qt_gs(const CCPair& u, const CC_vecfunction& tau,
-                                 const real_convolution_6d *Gscreen = NULL) const;
+                                 const real_convolution_6d* Gscreen = NULL) const;
 
     /// Function evaluates the consant part of the Excited state for CIS(D) if the Q Ansatz is used
     real_function_6d
-    make_constant_part_cispd(const CCPair& u, const CC_vecfunction& x, const real_convolution_6d *Gscreen = NULL) const;
+    make_constant_part_cispd(const CCPair& u, const CC_vecfunction& x,
+                             const real_convolution_6d* Gscreen = NULL) const;
 
     /// Function evaluates the consant part of the Excited state for CIS(D) if the Qt Ansatz is used
     real_function_6d
     make_constant_part_cispd_Qt(const CCPair& u, const CC_vecfunction& x,
-                                const real_convolution_6d *Gscreen = NULL) const;
+                                const real_convolution_6d* Gscreen = NULL) const;
 
     /// Function evaluates the consant part of the Excited state for CC2 if the Q Ansatz is used
     real_function_6d
     make_constant_part_cc2_ex(const CCPair& u, const CC_vecfunction& tau, const CC_vecfunction& x,
-                              const real_convolution_6d *Gscreen = NULL);
+                              const real_convolution_6d* Gscreen = NULL);
 
     /// Function evaluates the consant part of the Excited state for CC2 if the Qt Ansatz is used
     real_function_6d
     make_constant_part_cc2_Qt_ex(const CCPair& u, const CC_vecfunction& tau, const CC_vecfunction& x,
-                                 const real_convolution_6d *Gscreen = NULL);
+                                 const real_convolution_6d* Gscreen = NULL);
 
     /// Apply the Regularization potential
     /// \f$ V_{reg} = [ U_e - [K,f12] + f12(F12-eij) ]|titj> \f$
@@ -329,18 +404,30 @@ class CCPotentials {
     /// @param[in] pointer to bsh operator (in order to screen)
     /// @param[out] the regularization potential (unprojected), see equation above
     real_function_6d
-    apply_Vreg(const CCFunction<double,3>& ti, const CCFunction<double,3>& tj, const real_convolution_6d *Gscreen = NULL) const;
+    apply_Vreg(const CCFunction<double, 3>& ti, const CCFunction<double, 3>& tj,
+               const real_convolution_6d* Gscreen = NULL) const;
+
+    /// Apply the Regularization potential
+    /// \f$ V_{reg} = [ U_e - [K,f12] + f12(F12-eij) + [F,Qt] ]|titj> \f$
+    /// @param[in] ti, first function in the ket, for MP2 it is the Orbital, for CC2 the relaxed Orbital t_i=\phi_i + \tau_i
+    /// @param[in] tj, second function in the ket ...
+    /// @param[in] pointer to bsh operator (in order to screen)
+    /// @param[out] the regularization potential (unprojected), see equation above
+    std::vector<CCPairFunction<double, 6>>
+    static apply_Vreg(World& world, const CCFunction<double, 3>& ti, const CCFunction<double, 3>& tj,
+                      const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                      const Info& info, const std::vector<std::string>& argument, const double bsh_eps);
 
     /// Static version of apply_Vreg to be used from a macrotask. Will eventually replace former.
     madness::real_function_6d
     static
     apply_Vreg_macrotask(World& world, const std::vector<real_function_3d>& mo_ket,
-                                       const std::vector<real_function_3d>& mo_bra,
-                                       const CCParameters& parameters, const real_function_3d& Rsquare,
-                                       const std::vector<real_function_3d>& U1, const size_t& i, const size_t& j,
-                                       const FuncType& x_type, const FuncType& y_type,
-                                       const std::vector<std::string> argument,
-                                       const real_convolution_6d *Gscreen = NULL);
+                         const std::vector<real_function_3d>& mo_bra,
+                         const CCParameters& parameters, const real_function_3d& Rsquare,
+                         const std::vector<real_function_3d>& U1, const size_t& i, const size_t& j,
+                         const FuncType& x_type, const FuncType& y_type,
+                         const std::vector<std::string> argument,
+                         const real_convolution_6d* Gscreen = NULL);
 
     /// evaluates: \f$ (F(1)-ei)|ti> (x) |tj> + |ti> (x) (F(2)-ej)|tj> \f$ with the help of the singles potential
     /// singles equation is: (F-ei)|ti> = - V(ti)
@@ -351,8 +438,20 @@ class CCPotentials {
     /// @param[in] tj, second function in the ket ...
     /// @param[in] pointer to bsh operator (in order to screen)
     real_function_6d
-    apply_reduced_F(const CCFunction<double,3>& ti, const CCFunction<double,3>& tj, const real_convolution_6d *Gscreen = NULL) const;
+    apply_reduced_F1(const CCFunction<double, 3>& ti, const CCFunction<double, 3>& tj,
+                     const real_convolution_6d* Gscreen = NULL) const;
 
+    /// evaluates: \f$ (F(1)-ei)|ti> (x) |tj> + |ti> (x) (F(2)-ej)|tj> \f$ with the help of the singles potential
+    /// singles equation is: (F-ei)|ti> = - V(ti)
+    /// response singles equation: (F-ei-omega)|xi> = - V(xi)
+    /// response:  \f$ (F12-ei-ej-omega)|xitj> = (F1 - ei - omega)|xi> (x) |tj> + |xi> (x) (F2-ej)|tj> \f$
+    /// so in both cases the result will be: |V(ti),tj> + |ti,V(tj)>
+    /// @param[in] ti, first function in the ket, for MP2 it is the Orbital, for CC2 the relaxed Orbital t_i=\phi_i + \tau_i
+    /// @param[in] tj, second function in the ket ...
+    /// @param[in] pointer to bsh operator (in order to screen)
+    real_function_6d
+    static apply_reduced_F(World& world, const CCFunction<double, 3>& ti, const CCFunction<double, 3>& tj,
+                           const Info& info, const real_convolution_6d* Gscreen = NULL);
 
     /// Apply Ue on a tensor product of two 3d functions: Ue(1,2) |x(1)y(2)> (will be either |ij> or |\tau_i\tau_j> or mixed forms)
     /// The Transformed electronic regularization potential (Kutzelnigg) is R_{12}^{-1} U_e R_{12} with R_{12} = R_1*R_2
@@ -365,7 +464,8 @@ class CCPotentials {
     /// @param[in] The BSH operator to screen: Has to be in NS form, Gscreen->modified == true
     /// @return  R^-1U_eR|x,y> the transformed electronic smoothing potential applied on |x,y> :
     real_function_6d
-    apply_transformed_Ue(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const real_convolution_6d *Gscreen = NULL) const;
+    apply_transformed_Ue(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                         const real_convolution_6d* Gscreen = NULL) const;
 
     /// Static version of apply_transformed_Ue for the use in a macrotask.
     /// Will eventually replace the former.
@@ -374,7 +474,25 @@ class CCPotentials {
                                           const CCParameters& parameters, const real_function_3d& Rsquare,
                                           const std::vector<real_function_3d>& U1, const size_t& i, const size_t& j,
                                           const FuncType& x_type, const FuncType& y_type,
-                                          const real_convolution_6d *Gscreen = NULL);
+                                          const real_convolution_6d* Gscreen = NULL);
+
+    real_function_6d
+    static apply_Ue(World& world, const CCFunction<double, 3>& phi_i, const CCFunction<double, 3>& phi_j,
+                    const Info& info, const real_convolution_6d* Gscreen);
+
+
+    static real_function_6d
+    apply_KffK(World& world, const CCFunction<double, 3>& phi_i, const CCFunction<double, 3>& phi_j,
+               const Info& info, const real_convolution_6d* Gscreen);
+    static CCPairFunction<double, 6>
+    apply_commutator_F_Qt_f12(World& world, const CCFunction<double, 3>& phi_i, const CCFunction<double, 3>& phi_j,
+                              const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                              const Info& info, const real_convolution_6d* Gscreen);
+
+    static CCPairFunction<double, 6>
+    apply_commutator_F_dQt_f12(World& world, const CCFunction<double, 3>& phi_i, const CCFunction<double, 3>& phi_j,
+                               const CC_vecfunction& gs_singles, const CC_vecfunction& ex_singles,
+                               const Info& info, const real_convolution_6d* Gscreen);
 
     /// Apply Ue on a tensor product of two 3d functions: Ue(1,2) |x(1)y(2)> (will be either |ij> or |\tau_i\tau_j> or mixed forms)
     /// The Transformed electronic regularization potential (Kutzelnigg) is R_{12}^{-1} U_e R_{12} with R_{12} = R_1*R_2
@@ -388,20 +506,21 @@ class CCPotentials {
     /// the f12K|xy> part will be screened with the BSH while the Kf12|xy> can not be screened with the BSH operator but maybe with the coulomb
     /// @return  R^-1U_eR|x,y> the transformed electronic smoothing potential applied on |x,y> :
     real_function_6d
-    apply_exchange_commutator(const CCFunction<double,3>& x, const CCFunction<double,3>& y,
-                              const real_convolution_6d *Gscreen = NULL) const;
+    apply_exchange_commutator(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                              const real_convolution_6d* Gscreen = NULL) const;
 
-   real_function_6d
-   static apply_exchange_commutator_macrotask(World& world, const std::vector<real_function_3d>& mo_ket,
-                                              const std::vector<real_function_3d>& mo_bra, const real_function_3d& Rsquare,
-                                              const size_t& i, const size_t& j, const CCParameters& parameters,
-                                              const FuncType& x_type, const FuncType& y_type,
-                                              const real_convolution_6d *Gscreen = NULL);
+    real_function_6d
+    static apply_exchange_commutator_macrotask(World& world, const std::vector<real_function_3d>& mo_ket,
+                                               const std::vector<real_function_3d>& mo_bra,
+                                               const real_function_3d& Rsquare,
+                                               const size_t& i, const size_t& j, const CCParameters& parameters,
+                                               const FuncType& x_type, const FuncType& y_type,
+                                               const real_convolution_6d* Gscreen = NULL);
 
     /// This applies the exchange commutator, see apply_exchange_commutator function for information
     real_function_6d
-    apply_exchange_commutator1(const CCFunction<double,3>& x, const CCFunction<double,3>& y,
-                               const real_convolution_6d *Gscreen = NULL) const;
+    apply_exchange_commutator1(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                               const real_convolution_6d* Gscreen = NULL) const;
 
     /// Helper Function which performs the operation \f$ <xy|g12f12|ab> \f$
     /// @param[in] function x, if nuclear correlation is used make sure this is the correct bra function
@@ -409,38 +528,43 @@ class CCPotentials {
     /// @param[in] function a,
     /// @param[in] function b,
     double
-    make_xy_gf_ab(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCFunction<double,3>& a, const CCFunction<double,3>& b) const;
+    make_xy_gf_ab(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y, const CCFunction<double, 3>& a,
+                  const CCFunction<double, 3>& b) const;
 
-    double make_xy_ff_ab(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCFunction<double,3>& a, const CCFunction<double,3>& b) const {
+    double make_xy_ff_ab(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                         const CCFunction<double, 3>& a, const CCFunction<double, 3>& b) const {
         error("xy_ff_ab not yet implemented");
         return 0.0;
     }
 
     /// apply the operator gf = 1/(2\gamma)*(Coulomb - 4\pi*BSH_\gamma)
     /// works only if f = (1-exp(-\gamma*r12))/(2\gamma)
-    real_function_3d
-    apply_gf(const real_function_3d& f) const;
+    static real_function_3d
+    apply_gf(World& world, const real_function_3d& f, const Info& info);
 
     /// returns <xy|op|u>
     /// loops over every entry in the vector and accumulates results
     /// helper function for CIS(D) energy
-    double
-    make_xy_op_u(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCConvolutionOperator<double,3>& op,
-                 const std::vector<CCPairFunction<double,6>>& u) const;
+    static double
+    make_xy_op_u(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                 const CCConvolutionOperator<double, 3>& op,
+                 const std::vector<CCPairFunction<double, 6>>& u);
 
     /// returns <xy|u> for a vector of CCPairFunction
     /// the result is accumulated for every vercotr
     /// helper functions for CIS(D) energy
-    double
-    make_xy_u(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const std::vector<CCPairFunction<double,6>>& u) const;
+    static double
+    make_xy_u(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+              const std::vector<CCPairFunction<double, 6>>& u);
 
     /// Functions which operate with the CCPairFunction structure
     /// @param[in] function x, if nuclear correlation is used make sure this is the correct bra function
     /// @param[in] function y, if nuclear correlation is used make sure this is the correct bra function
     /// @param[in] CCPairFunction u,
-    double
-    make_xy_op_u(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCConvolutionOperator<double,3>& op,
-                 const CCPairFunction<double,6>& u) const;
+    static double
+    make_xy_op_u(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                 const CCConvolutionOperator<double, 3>& op,
+                 const CCPairFunction<double, 6>& u);
 
     /// Helper Function which returns
     /// @return <xy|op|ab>
@@ -449,19 +573,21 @@ class CCPotentials {
     /// @param[in] function a,
     /// @param[in] function b,
     double
-    make_xy_op_ab(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const CCConvolutionOperator<double,3>& op, const CCFunction<double,3>& a,
-                  const CCFunction<double,3>& b) const;
+    make_xy_op_ab(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                  const CCConvolutionOperator<double, 3>& op, const CCFunction<double, 3>& a,
+                  const CCFunction<double, 3>& b) const;
 
     /// get the correct pair function as vector of CCPairFunction functions
     /// @param[in] The pair functions
     /// @param[out] The demanded pair function as vector of CCPairFunction functions (includes regularization tails)
-    std::vector<CCPairFunction<double,6>>
-    get_pair_function(const Pairs<CCPair>& pairs, const size_t i, const size_t j) const;
+    static std::vector<CCPairFunction<double, 6>>
+    get_pair_function(const Pairs<CCPair>& pairs, const size_t i, const size_t j);
 
 
     /// returns <a|g12|u>_2
-    real_function_3d
-    apply_s2b_operation(const CCFunction<double,3>& bra, const CCPairFunction<double,6>& u, const size_t particle) const;
+    static real_function_3d
+    apply_s2b_operation(World& world, const CCFunction<double, 3>& bra, const CCPairFunction<double, 6>& u,
+                        const size_t particle, const Info& info);
 
     /// dummy to avoid confusion and for convenience
     real_function_6d swap_particles(const real_function_6d& f) const {
@@ -469,8 +595,8 @@ class CCPotentials {
     }
 
     /// swap the particles of the CCPairFunction and return a new vector of swapped functions
-    std::vector<CCPairFunction<double,6>> swap_particles(const std::vector<CCPairFunction<double,6>>& f) const {
-        std::vector<CCPairFunction<double,6>> swapped;
+    static std::vector<CCPairFunction<double, 6>> swap_particles(const std::vector<CCPairFunction<double, 6>>& f) {
+        std::vector<CCPairFunction<double, 6>> swapped;
         for (size_t i = 0; i < f.size(); i++) swapped.push_back(f[i].swap_particles());
         return swapped;
     }
@@ -479,8 +605,8 @@ class CCPotentials {
     /// @param[in] 6D function 1
     /// @param[in] 6D function 2
     double
-    overlap(const CCPairFunction<double,6>& f1, const CCPairFunction<double,6>& f2) const {
-        return inner(f1,f2,nemo_->ncf->square());
+    overlap(const CCPairFunction<double, 6>& f1, const CCPairFunction<double, 6>& f2) const {
+        return inner(f1, f2, nemo_->ncf->square());
     };
 
     /// Computes the squared norm of the pair function <x|x>
@@ -513,8 +639,9 @@ class CCPotentials {
 
     /// Apply the Qt projector on a CCPairFunction
     /// works in principle like apply_Ot
-    CCPairFunction<double,6>
-    apply_Qt(const CCPairFunction<double,6>& f, const CC_vecfunction& t, const size_t particle, const double c = 1.0) const;
+    CCPairFunction<double, 6>
+    apply_Qt(const CCPairFunction<double, 6>& f, const CC_vecfunction& t, const size_t particle,
+             const double c = 1.0) const;
 
     /// Apply Ot projector on decomposed or op_decomposed 6D function
     /// The function does not work with type==pure right now (not needed)
@@ -525,14 +652,14 @@ class CCPotentials {
     /// for CCPairFunction type == op_decomposd the function si f=op|xy> and we have for particle==1
     /// \f$ a_k = t_k \f$
     /// \f$ b_k = <mo_k|op|x>*y \f$
-    CCPairFunction<double,6>
-    apply_Ot(const CCPairFunction<double,6>& f, const CC_vecfunction& t, const size_t particle) const;
+    CCPairFunction<double, 6>
+    apply_Ot(const CCPairFunction<double, 6>& f, const CC_vecfunction& t, const size_t particle) const;
 
     /// Apply the Greens Operator to a CCPairFunction
     /// For CCPairFunction only type pure and type decomposed is supported
     /// for the op_decomposed type a pure function can be constructed (not needed therefore not implemented yet)
     real_function_6d
-    apply_G(const CCPairFunction<double,6>& u, const real_convolution_6d& G) const;
+    apply_G(const CCPairFunction<double, 6>& u, const real_convolution_6d& G) const;
 
     /// Apply BSH Operator and count time
     real_function_6d apply_G(const real_function_6d& f, const real_convolution_6d& G) const {
@@ -548,26 +675,28 @@ class CCPotentials {
 
     /// Calculates the CC2 singles potential for the ground state: result = Fock_residue + V
     /// the V part is stored in the intermediate_potentials structure
-    vector_real_function_3d
-    get_CC2_singles_potential_gs(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const;
+    static vector_real_function_3d
+    get_CC2_singles_potential_gs(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles,
+                                 Info& info);
 
     /// Calculates the CCS/CIS singles potential for the excited state: result = Fock_residue + V
     /// the V part is stored in the intermediate_potentials structure
     /// the expectation value is calculated and updated
-    vector_real_function_3d
-    get_CCS_potential_ex(CC_vecfunction& x, const bool print = false) const;
+    static vector_real_function_3d
+    get_CCS_potential_ex(World& world, const CC_vecfunction& x, const bool print, Info& info);
 
     /// Calculates the CC2 singles potential for the Excited state: result = Fock_residue + V
     /// the V part is stored in the intermediate_potentials structure
-    vector_real_function_3d
-    get_CC2_singles_potential_ex(const CC_vecfunction& gs_singles, const Pairs<CCPair>& gs_doubles,
-                                 CC_vecfunction& ex_singles, const Pairs<CCPair>& response_doubles) const;
+    static vector_real_function_3d
+    get_CC2_singles_potential_ex(World& world, const CC_vecfunction& gs_singles,
+                                 const Pairs<CCPair>& gs_doubles, const CC_vecfunction& ex_singles,
+                                 const Pairs<CCPair>& response_doubles, Info& info);
 
     /// Calculates the CC2 singles potential for the Excited state: result = Fock_residue + V
     /// the V part is stored in the intermediate_potentials structure
     vector_real_function_3d
-    get_ADC2_singles_potential(const Pairs<CCPair>& gs_doubles, CC_vecfunction& ex_singles,
-                               const Pairs<CCPair>& response_doubles) const;
+    get_ADC2_singles_potential(World& world, const Pairs<CCPair>& gs_doubles,
+                               CC_vecfunction& ex_singles, const Pairs<CCPair>& response_doubles, Info& info) const;
 
     /// The potential manager for the ground state potential
     /// CC2 singles potential parts of the ground state
@@ -577,9 +706,10 @@ class CCPotentials {
     /// @param[in] Doubles of the Ground State
     /// @param[in] Name of the potential
     /// @param[out] the potential (without Q application)
+    /// @param world
     double
-    potential_energy_gs(const CC_vecfunction& bra, const CC_vecfunction& singles, const Pairs<CCPair>& doubles,
-                        const PotentialType& name) const;
+    potential_energy_gs(World& world, const CC_vecfunction& bra, const CC_vecfunction& singles,
+                        const Pairs<CCPair>& doubles, const PotentialType& name) const;
 
     /// The potential manager for the ground state potential
     /// CC2 singles potential parts of the ground state
@@ -588,8 +718,10 @@ class CCPotentials {
     /// @param[in] Doubles of the Ground State
     /// @param[in] Name of the potential
     /// @param[out] the potential (without Q application)
-    vector_real_function_3d
-    potential_singles_gs(const CC_vecfunction& singles, const Pairs<CCPair>& doubles, const PotentialType& name) const;
+    /// @param world
+    static vector_real_function_3d
+    potential_singles_gs(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles,
+                         const PotentialType& name, Info& info);
 
     /// The integra manager for the excited state potential
     /// CC2 singles potential parts of the ground state
@@ -601,10 +733,11 @@ class CCPotentials {
     /// @param[in] Doubles of the Excited State
     /// @param[in] Name of the potential
     /// @param[out] the potential (without Q application)
+    /// @param world
     double
-    potential_energy_ex(const CC_vecfunction& bra, const CC_vecfunction& singles_gs, const Pairs<CCPair>& doubles_gs,
-                        const CC_vecfunction& singles_ex, const Pairs<CCPair>& doubles_ex,
-                        const PotentialType& name) const;
+    potential_energy_ex(World& world, const CC_vecfunction& bra, const CC_vecfunction& singles_gs,
+                        const Pairs<CCPair>& doubles_gs, const CC_vecfunction& singles_ex,
+                        const Pairs<CCPair>& doubles_ex, const PotentialType& name) const;
 
     /// The potential manager for the excited state potential
     /// CC2 singles potential parts of the ground state
@@ -615,21 +748,22 @@ class CCPotentials {
     /// @param[in] Doubles of the Excited State
     /// @param[in] Name of the potential
     /// @param[out] the potential (without Q application)
-    vector_real_function_3d
-    potential_singles_ex(const CC_vecfunction& singles_gs, const Pairs<CCPair>& doubles_gs,
-                         const CC_vecfunction& singles_ex, const Pairs<CCPair>& doubles_ex,
-                         const PotentialType& name) const;
+    /// @param world
+    static vector_real_function_3d
+    potential_singles_ex(World& world, const CC_vecfunction& singles_gs,
+                         const Pairs<CCPair>& doubles_gs, const CC_vecfunction& singles_ex,
+                         const Pairs<CCPair>& doubles_ex, const PotentialType& name, Info& info);
 
     /// The Fock operator is partitioned into F = T + Vn + R
     /// the fock residue R= 2J-K+Un for closed shell is computed here
     /// J_i = \sum_k <k|r12|k> |tau_i>
     /// K_i = \sum_k <k|r12|tau_i> |k>
-    vector_real_function_3d
-    fock_residue_closed_shell(const CC_vecfunction& singles) const;
+    static vector_real_function_3d
+    fock_residue_closed_shell(World& world, const CC_vecfunction& singles, const Info& info);
 
     /// the K operator runs over ALL orbitals (also the frozen ones)
-    real_function_3d
-    K(const CCFunction<double,3>& f) const;
+    static real_function_3d
+    K(World& world, const CCFunction<double, 3>& f, const Info& info);
 
     /// static version of k above for access from macrotask. will eventually replace former.
     real_function_3d
@@ -661,13 +795,13 @@ class CCPotentials {
     /// Static version of apply_K above for access from macrotask. Will eventually replace former.
     real_function_6d
     static apply_K_macrotask(World& world, const std::vector<real_function_3d>& mo_ket,
-                                    const std::vector<real_function_3d>& mo_bra,
-                                    const real_function_6d& u, const size_t& particle, const CCParameters& parameters);
+                             const std::vector<real_function_3d>& mo_bra,
+                             const real_function_6d& u, const size_t& particle, const CCParameters& parameters);
 
     /// Apply the Exchange operator on a tensor product multiplied with f12
     /// !!! Prefactor of (-1) is not inclued in K here !!!!
     real_function_6d
-    apply_Kf(const CCFunction<double,3>& x, const CCFunction<double,3>& y) const;
+    apply_Kf(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y) const;
 
     /// Apply fK on a tensor product of two 3D functions
     /// fK|xy> = fK_1|xy> + fK_2|xy>
@@ -675,28 +809,56 @@ class CCPotentials {
     /// @param[in] y, the second 3D function in |xy>  structure holds index i and type (HOLE, PARTICLE, MIXED, UNDEFINED)
     /// @param[in] BSH operator to screen, has to be in modified NS form, Gscreen->modified()==true;
     real_function_6d
-    apply_fK(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const real_convolution_6d *Gscreen = NULL) const;
+    apply_fK(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+             const real_convolution_6d* Gscreen = NULL) const;
 
     /// Creates a 6D function with the correlation factor and two given CCFunctions
     real_function_6d
-    make_f_xy(const CCFunction<double,3>& x, const CCFunction<double,3>& y, const real_convolution_6d *Gscreen = NULL) const;
+    make_f_xy(const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+              const real_convolution_6d* Gscreen = NULL) const;
+
+    /// Creates a 6D function with the correlation factor and two given CCFunctions
+    real_function_6d
+    static make_f_xy(World& world, const CCFunction<double, 3>& x, const CCFunction<double, 3>& y,
+                     const Info& info, const real_convolution_6d* Gscreen = NULL);
 
     real_function_6d
-    static make_f_xy_macrotask( World& world, const real_function_3d& x_ket, const real_function_3d& y_ket,
-                                const real_function_3d& x_bra, const real_function_3d& y_bra,
-                                const size_t& i, const size_t& j, const CCParameters& parameters,
-                                const FuncType& x_type, const FuncType& y_type,
-                                const real_convolution_6d *Gscreen = NULL);
+    static make_f_xy_macrotask(World& world, const real_function_3d& x_ket, const real_function_3d& y_ket,
+                               const real_function_3d& x_bra, const real_function_3d& y_bra,
+                               const size_t& i, const size_t& j, const CCParameters& parameters,
+                               const FuncType& x_type, const FuncType& y_type,
+                               const real_convolution_6d* Gscreen = NULL);
 
     /// unprojected ccs potential
     /// returns 2kgtk|ti> - kgti|tk>
     /// the ccs potential: ti = ti and tk = tauk
-    vector_real_function_3d
-    ccs_unprojected(const CC_vecfunction& ti, const CC_vecfunction& tk) const;
-
+    static vector_real_function_3d
+    ccs_unprojected(World& world, const CC_vecfunction& ti, const CC_vecfunction& tk, const Info& info);
+
+    /// return RMS norm and max norm of residuals
+    template <typename T, std::size_t NDIM>
+    static std::pair<double, double> residual_stats(const std::vector<Function<T, NDIM>>& residual) {
+        if (residual.size() == 0) return std::make_pair(0.0, 0.0);
+        World& world = residual.front().world();
+        auto errors = norm2s(world, residual);
+        double rnorm = 0.0, maxrnorm = 0.0;
+        for (double& e : errors) {
+            maxrnorm = std::max(maxrnorm, e);
+            rnorm += e * e;
+        }
+        rnorm = sqrt(rnorm / errors.size());
+        return std::make_pair(rnorm, maxrnorm);
+    }
 
-    real_function_3d
-    make_density(const CC_vecfunction& x) const;
+    static void print_convergence(const std::string name, const double rmsresidual, const double maxresidual,
+                                  const double energy_diff, const int iteration) {
+        const std::size_t bufsize = 255;
+        char msg[bufsize];
+        std::snprintf(msg, bufsize,
+                      "convergence of %s in iteration %2d at time %8.1fs: rms/max residual, energy change %.1e %.1e %.1e",
+                      name.c_str(), iteration, wall_time(), rmsresidual, maxresidual,energy_diff);
+        print(msg);
+    }
 
     // integrals from singles potentials
 
@@ -722,7 +884,8 @@ class CCPotentials {
 
     /// -(2<lk|g|t3i,t1k> - <lk|g|t1k,t3i>)* <xi|t2l>
     double
-    x_s6(const CC_vecfunction& x, const CC_vecfunction& t1, const CC_vecfunction& t2, const CC_vecfunction& t3) const;
+    x_s6(const CC_vecfunction& x, const CC_vecfunction& t1, const CC_vecfunction& t2,
+         const CC_vecfunction& t3) const;
 
     /// 2.0 <xik|g|uik>- <kxi|g|uik>
     double
@@ -746,22 +909,26 @@ class CCPotentials {
 
     // result: \sum_k( 2<k|g|uik>_2 - <k|g|uik>_1 )
     // singles are not needed explicitly but to determine if it is response or ground state
+    ///@param world
     ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed
     ///@param[in] doubles:Pairs of CC_Pairs (GS or Response)
+    ///@param info
     ///@param[out] \f$ \sum_k( 2<k|g|uik>_2 - <k|g|uik>_1 ) \f$
     /// Q-Projector is not applied, sign is correct
     /// if the s2b potential has already been calculated it will be loaded from the intermediate_potentials structure
-    vector_real_function_3d
-    s2b(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const;
+    static vector_real_function_3d
+    s2b(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, Info& info);
 
     // result: -\sum_k( <l|kgi|ukl>_2 - <l|kgi|ukl>_1)
     // singles are not needed explicitly but to determine if it is response or ground state
+    ///@param world
     ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed
     ///@param[in] doubles:Pairs of CC_Pairs (GS or Response)
+    ///@param info
     ///@param[out] \f$ -\sum_k( <l|kgi|ukl>_2 - <l|kgi|ukl>_1) \f$
     /// Q-Projector is not applied, sign is correct
-    vector_real_function_3d
-    s2c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const;
+    static vector_real_function_3d
+    s2c(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, Info& info);
 
     /// the S4a potential can be calcualted from the S2b potential
     /// result is \f$ s4a_i = - <l|s2b_i>*|tau_l> \f$
@@ -769,43 +936,47 @@ class CCPotentials {
     s4a_from_s2b(const vector_real_function_3d& s2b, const CC_vecfunction& singles) const;
 
     // result: -\sum_k( <l|kgtaui|ukl>_2 - <l|kgtaui|ukl>_1) | kgtaui = <k|g|taui>
+    ///@param world
     ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed
     ///@param[in] doubles:Pairs of CC_Pairs (GS or Response)
+    ///@param info
     ///@param[out] \f$ -( <l|kgtaui|ukl>_2 - <l|kgtaui|ukl>_1) | kgtaui = <k|g|taui> | taui=singles_i \f$
     /// Q-Projector is not applied, sign is correct
-    vector_real_function_3d
-    s4b(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const;
+    static vector_real_function_3d
+    s4b(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, const Info& info);
 
 
+    ///@param world
     ///@param[in] singles:CC_vecfunction fof type response or particle (depending on this the correct intermediates will be used) the functions themselves are not needed
     ///@param[in] doubles:Pairs of CC_Pairs (GS or Response)
+    ///@param info
     ///@param[out] \f$ ( 4<l|kgtauk|uil>_2 - 2<l|kgtauk|uil>_1 - 2<k|lgtauk|uil>_2 + <k|lgtauk|uil>_1 ) \f$
     /// Q-Projector is not applied, sign is correct
-    vector_real_function_3d
-    s4c(const CC_vecfunction& singles, const Pairs<CCPair>& doubles) const;
+    static vector_real_function_3d
+    s4c(World& world, const CC_vecfunction& singles, const Pairs<CCPair>& doubles, const Info& info);
 
     // update the intermediates
     void update_intermediates(const CC_vecfunction& t) {
         g12->update_elements(mo_bra_, t);
-//        g12.sanity();
+        //        g12.sanity();
         f12->update_elements(mo_bra_, t);
-//        f12.sanity();
+        //        f12.sanity();
     }
 
     /// clear stored potentials
     /// if a response function is given only the response potentials are cleared (the GS potentials dont change anymore)
     void clear_potentials(const CC_vecfunction& t) const {
-
         if (t.type == RESPONSE) {
             output("Clearing Response Singles-Potentials");
             get_potentials.clear_response();
-        } else {
+        }
+        else {
             output("Clearing all stored Singles-Potentials");
             get_potentials.clear_all();
         }
     }
 
-protected:
+public:
     // member variables
     /// MPI World
     World& world;
@@ -821,19 +992,20 @@ class CCPotentials {
     std::vector<double> orbital_energies_;
     /// the coulomb operator with all intermediates
 public:
-    std::shared_ptr<CCConvolutionOperator<double,3>> g12;
+    std::shared_ptr<CCConvolutionOperator<double, 3>> g12;
     /// the f12 operator with all intermediates
-    std::shared_ptr<CCConvolutionOperator<double,3>> f12;
+    std::shared_ptr<CCConvolutionOperator<double, 3>> f12;
     /// the correlation factor, holds necessary regularized potentials
     CorrelationFactor corrfac;
     /// Manager for stored intermediate potentials which are s2c, s2b and the whole singles potentials without fock-residue for GS and EX state
     mutable CCIntermediatePotentials get_potentials;
+    /// POD for basis and intermediates
+    Info info;
+
 public:
     /// Messenger structure for formated output and to store warnings
     CCMessenger output;
-
 };
-
 } /* namespace madness */
 
 #endif /* SRC_APPS_CHEM_CCPOTENTIALS_H_ */
diff --git a/src/madness/chem/CCStructures.cc b/src/madness/chem/CCStructures.cc
index 218337c7f30..ffc38ebbd51 100644
--- a/src/madness/chem/CCStructures.cc
+++ b/src/madness/chem/CCStructures.cc
@@ -64,26 +64,6 @@ CCTimer::info(const bool debug, const double norm) {
 }
 
 
-madness::CC_vecfunction
-CC_vecfunction::copy() const {
-    std::vector<CCFunction<double,3>> vn;
-    for (auto x : functions) {
-        const CCFunction<double,3> fn(madness::copy(x.second.function), x.second.i, x.second.type);
-        vn.push_back(fn);
-    }
-    CC_vecfunction result(vn, type);
-    result.irrep = irrep;
-    return result;
-}
-
-std::string
-CC_vecfunction::name(const int ex) const {
-    if (type == PARTICLE) return "tau";
-    else if (type == HOLE) return "phi";
-    else if (type == MIXED) return "t";
-    else if (type == RESPONSE) return std::to_string(ex) + "_" + "x";
-    else return "UNKNOWN";
-}
 
 void
 CC_vecfunction::print_size(const std::string& msg) const {
@@ -117,21 +97,26 @@ madness::vector_real_function_3d
 CCIntermediatePotentials::operator()(const CC_vecfunction& f, const PotentialType& type) const {
     output("Getting " + assign_name(type) + " for " + f.name(0));
     vector_real_function_3d result;
-    if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED)) return current_singles_potential_gs_;
-    else if (type == POT_singles_ and f.type == RESPONSE) return current_singles_potential_ex_;
-    else if (type == POT_s2b_ and f.type == PARTICLE) return current_s2b_potential_gs_;
-    else if (type == POT_s2b_ and f.type == RESPONSE) return current_s2b_potential_ex_;
-    else if (type == POT_s2c_ and f.type == PARTICLE) return current_s2c_potential_gs_;
-    else if (type == POT_s2c_ and f.type == RESPONSE) return current_s2c_potential_ex_;
+    if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED)) result= current_singles_potential_gs_;
+    else if (type == POT_singles_ and f.type == RESPONSE) result= current_singles_potential_ex_;
+    else if (type == POT_s2b_ and f.type == PARTICLE) result= current_s2b_potential_gs_;
+    else if (type == POT_s2b_ and f.type == RESPONSE) result= current_s2b_potential_ex_;
+    else if (type == POT_s2c_ and f.type == PARTICLE) result= current_s2c_potential_gs_;
+    else if (type == POT_s2c_ and f.type == RESPONSE) result= current_s2c_potential_ex_;
     else if (f.type == HOLE) {
         output(assign_name(type) + " is zero for HOLE states");
-        result = zero_functions<double, 3>(world, f.size());
+        // result = zero_functions<double, 3>(f.size());
     } else {
         output("ERROR: Potential was not supposed to be stored");
         MADNESS_EXCEPTION("Potential was not supposed to be stored", 1);
     }
 
-    if (result.empty()) output("!!!WARNING: Potential is empty!!!");
+    if (result.empty()) {
+        output("!!!WARNING: Potential is empty!!!");
+    } else {
+        World& world=result.front().world();
+        if (parameters.debug()) print_size(world,result, "potential");
+    }
 
     return result;
 }
@@ -139,28 +124,30 @@ CCIntermediatePotentials::operator()(const CC_vecfunction& f, const PotentialTyp
 madness::real_function_3d
 CCIntermediatePotentials::operator()(const CCFunction<double,3>& f, const PotentialType& type) const {
     output("Getting " + assign_name(type) + " for " + f.name());
-    real_function_3d result = real_factory_3d(world);
-    if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED))
-        return current_singles_potential_gs_[f.i - parameters.freeze()];
-    else if (type == POT_singles_ and f.type == RESPONSE) return current_singles_potential_ex_[f.i - parameters.freeze()];
-    else if (type == POT_s2b_ and f.type == PARTICLE) return current_s2b_potential_gs_[f.i - parameters.freeze()];
-    else if (type == POT_s2b_ and f.type == RESPONSE) return current_s2b_potential_ex_[f.i - parameters.freeze()];
-    else if (type == POT_s2c_ and f.type == PARTICLE) return current_s2c_potential_gs_[f.i - parameters.freeze()];
-    else if (type == POT_s2c_ and f.type == RESPONSE) return current_s2c_potential_ex_[f.i - parameters.freeze()];
+    std::vector<real_function_3d> result;
+    if (type == POT_singles_ and (f.type == PARTICLE or f.type == MIXED)) result= current_singles_potential_gs_;
+    else if (type == POT_singles_ and f.type == RESPONSE) result= current_singles_potential_ex_;
+    else if (type == POT_s2b_ and f.type == PARTICLE) result= current_s2b_potential_gs_;
+    else if (type == POT_s2b_ and f.type == RESPONSE) result= current_s2b_potential_ex_;
+    else if (type == POT_s2c_ and f.type == PARTICLE) result= current_s2c_potential_gs_;
+    else if (type == POT_s2c_ and f.type == RESPONSE) result= current_s2c_potential_ex_;
     else if (f.type == HOLE) output(assign_name(type) + " is zero for HOLE states");
-    else MADNESS_EXCEPTION("Potential was not supposed to be stored", 1)
+    else MADNESS_EXCEPTION("Potential was not supposed to be stored", 1);
 
-    ;
-    if (result.norm2() < FunctionDefaults<3>::get_thresh())
-        output("WARNING: Potential seems to be zero ||V||=" + std::to_string(double(result.norm2())));
-
-    return result;
+    std::string errmsg="CCIntermediatePotential was not computed/stored "+assign_name(type) + " " +assign_name(f.type);
+    errmsg+="\n --> you might need to iterate the corresponding singles";
+    MADNESS_CHECK_THROW(result.size()>(f.i-parameters.freeze()),errmsg.c_str());
+    return result[f.i-parameters.freeze()];
 }
 
 void
 CCIntermediatePotentials::insert(const vector_real_function_3d& potential, const CC_vecfunction& f,
                                  const PotentialType& type) {
     output("Storing potential: " + assign_name(type) + " for " + f.name(0));
+    if (parameters.debug()) {
+        World& world=potential.front().world();
+        print_size(world, potential, "potential");
+    }
     MADNESS_ASSERT(!potential.empty());
     if (type == POT_singles_ && (f.type == PARTICLE || f.type == MIXED)) current_singles_potential_gs_ = potential;
     else if (type == POT_singles_ && f.type == RESPONSE) current_singles_potential_ex_ = potential;
@@ -182,19 +169,13 @@ void CCParameters::set_derived_values() {
     set_derived_value("tight_thresh_6d",thresh_6D()*0.1);
     set_derived_value("thresh_3d",thresh_6D()*0.01);
     set_derived_value("tight_thresh_3d",thresh_3D()*0.1);
-//    if (thresh_operators == uninitialized) thresh_operators = 1.e-6;
-//    if (thresh_operators_3D == uninitialized) thresh_operators_3D = thresh_operators;
-//    if (thresh_operators_6D == uninitialized) thresh_operators_6D = thresh_operators;
-//    if (thresh_bsh_3D == uninitialized) thresh_bsh_3D = thresh_operators_3D;
-//    if (thresh_bsh_6D == uninitialized) thresh_bsh_6D = thresh_operators_6D;
-//    if (thresh_poisson == uninitialized) thresh_poisson = thresh_operators_3D;
-//    if (thresh_f12 == uninitialized) thresh_f12 = thresh_operators_3D;
     set_derived_value("thresh_ue",tight_thresh_6D());
-    set_derived_value("dconv_6d",thresh_6D());
-    set_derived_value("dconv_3d",thresh_6D());
+    set_derived_value("dconv_6d",3.0*thresh_6D());
+    set_derived_value("dconv_3d",0.3*thresh_6D());
     set_derived_value("econv",0.1*dconv_6D());
     set_derived_value("econv_pairs",econv());
 
+
     set_derived_value("no_compute_gs",no_compute());
     set_derived_value("no_compute_mp2",no_compute() and no_compute_gs());
     set_derived_value("no_compute_cc2",no_compute() and no_compute_gs());
@@ -527,36 +508,79 @@ assign_name(const FuncType& inp) {
 
 
 std::vector<real_function_6d>
-MacroTaskMp2ConstantPart::operator() (const std::vector<CCPair>& pair, const std::vector<real_function_3d>& mo_ket,
-                                      const std::vector<real_function_3d>& mo_bra, const CCParameters& parameters,
-                                      const real_function_3d& Rsquare, const std::vector<real_function_3d>& U1,
+//MacroTaskMp2ConstantPart::operator() (const std::vector<CCPair>& pair, const std::vector<real_function_3d>& mo_ket,
+//                                      const std::vector<real_function_3d>& mo_bra, const CCParameters& parameters,
+//                                      const real_function_3d& Rsquare, const std::vector<real_function_3d>& U1,
+//                                      const std::vector<std::string>& argument) const {
+MacroTaskMp2ConstantPart::operator() (const std::vector<CCPair>& pair, const Info& info,
                                       const std::vector<std::string>& argument) const {
-    World& world = mo_ket[0].world();
+    World& world =info.mo_ket[0].world();
+    resultT result = zero_functions_compressed<double, 6>(world, pair.size());
+    for (size_t i = 0; i < pair.size(); i++) {
+        result[i] = CCPotentials::make_constant_part_mp2_macrotask(world, pair[i], info.mo_ket, info.mo_bra,
+                                    info.parameters, info.R_square, info.U1, argument);
+    }
+    return result;
+}
+
+std::vector<real_function_6d>
+MacroTaskConstantPart::operator() (const std::vector<CCPair>& pair,
+                                   const std::vector<Function<double,3>> & gs_singles,
+                                   const std::vector<Function<double,3>> & ex_singles,
+                                   const Info& info) const {
+
+    World& world =info.mo_ket[0].world();
+    CC_vecfunction singles(gs_singles, PARTICLE, info.parameters.freeze());
+    CC_vecfunction exsingles(ex_singles, RESPONSE, info.parameters.freeze());
+
+
     resultT result = zero_functions_compressed<double, 6>(world, pair.size());
     for (size_t i = 0; i < pair.size(); i++) {
-        result[i] = CCPotentials::make_constant_part_mp2_macrotask(world, pair[i], mo_ket, mo_bra, parameters,
-                                                                   Rsquare, U1, argument);
+        result[i] = CCPotentials::make_constant_part_macrotask(world, pair[i], singles, exsingles, info);
     }
     return result;
 }
 
+
 std::vector<real_function_6d>
+//MacroTaskMp2UpdatePair::operator() (const std::vector<CCPair> &pair,
+//                                    const std::vector<real_function_6d> &mp2_coupling,
+//                                    const CCParameters &parameters,
+//                                    const std::vector<madness::Vector<double, 3>> &all_coords_vec,
+//                                    const std::vector<real_function_3d> &mo_ket,
+//                                    const std::vector<real_function_3d> &mo_bra,
+//                                    const std::vector<real_function_3d> &U1, const real_function_3d &U2) const {
 MacroTaskMp2UpdatePair::operator() (const std::vector<CCPair> &pair,
                                     const std::vector<real_function_6d> &mp2_coupling,
-                                    const CCParameters &parameters,
                                     const std::vector<madness::Vector<double, 3>> &all_coords_vec,
-                                    const std::vector<real_function_3d> &mo_ket,
-                                    const std::vector<real_function_3d> &mo_bra,
-                                    const std::vector<real_function_3d> &U1, const real_function_3d &U2) const {
-    World& world = mo_ket[0].world();
+                                    const Info& info) const {
+    World& world = info.mo_ket[0].world();
     resultT result = zero_functions_compressed<double, 6>(world, pair.size());
 
     for (size_t i = 0; i < pair.size(); i++) {
         //(i, j) -> j*(j+1) + i
-        result[i] = CCPotentials::update_pair_mp2_macrotask(world, pair[i], parameters, all_coords_vec, mo_ket,
-                                                            mo_bra, U1, U2, mp2_coupling[i]);
+        result[i] = CCPotentials::update_pair_mp2_macrotask(world, pair[i], info.parameters, all_coords_vec, info.mo_ket,
+                                                            info.mo_bra, info.U1, info.U2, mp2_coupling[i]);
+    }
+    return result;
+}
+
+std::vector<real_function_6d>
+MacroTaskIteratePair::operator()(const std::vector<CCPair>& pair,
+        const std::vector<real_function_6d>& local_coupling,
+        const CC_vecfunction& gs_singles,
+        const CC_vecfunction& ex_singles,
+        const Info& info,
+        const std::size_t& maxiter) const {
+    World& world = info.mo_ket[0].world();
+    resultT result = zero_functions_compressed<double, 6>(world, pair.size());
+
+    for (size_t i = 0; i < pair.size(); i++) {
+        result[i]=  CCPotentials::iterate_pair_macrotask(world, pair[i], gs_singles, ex_singles,
+            local_coupling[i], info, maxiter).function();
     }
     return result;
+
 }
 
 template class CCConvolutionOperator<double,3>;
diff --git a/src/madness/chem/CCStructures.h b/src/madness/chem/CCStructures.h
index bdf9847f4e1..37ef03c7cc0 100644
--- a/src/madness/chem/CCStructures.h
+++ b/src/madness/chem/CCStructures.h
@@ -19,6 +19,8 @@
 #include <iostream>
 #include <madness/mra/macrotaskq.h>
 
+#include "lowrankfunction.h"
+
 namespace madness {
 
 /// Calculation Types used by CC2
@@ -228,11 +230,11 @@ struct CCParameters : public QCCalculationParametersBase {
         initialize < double > ("thresh_Ue", thresh_operators, "ue threshold");
         initialize < double > ("econv", thresh, "overal convergence threshold ");
         initialize < double > ("econv_pairs", 0.1*thresh, "convergence threshold for pairs");
-        initialize < double > ("dconv_3d", 0.01*thresh, "convergence for cc singles");
-        initialize < double > ("dconv_6d", thresh, "convergence for cc doubles");
+        initialize < double > ("dconv_3d", 0.3*thresh, "convergence for cc singles");
+        initialize < double > ("dconv_6d", 3.0*thresh, "convergence for cc doubles");
         initialize < std::size_t > ("iter_max", 10, "max iterations");
-        initialize < std::size_t > ("iter_max_3d", 10, "max iterations");
-        initialize < std::size_t > ("iter_max_6d", 10, "max iterations");
+        initialize < std::size_t > ("iter_max_3d", 10, "max iterations for singles");
+        initialize < std::size_t > ("iter_max_6d", 10, "max iterations for doubles");
         initialize < std::pair<int, int>> ("only_pair", {-1, -1}, "compute only a single pair");
         initialize < bool > ("restart", false, "restart");
         initialize < bool > ("no_compute", false, "no compute");
@@ -252,11 +254,11 @@ struct CCParameters : public QCCalculationParametersBase {
         initialize < long > ("freeze", -1, "number of frozen orbitals: -1: automatic");
         initialize < bool > ("test", false, "");
         // choose if Q for the constant part of MP2 and related calculations should be decomposed: GQV or GV - GO12V
-        initialize < bool > ("decompose_Q", true, "");
+        initialize < bool > ("decompose_Q", true, "always true",{true});
         // if true the ansatz for the CC2 ground state pairs is |tau_ij> = |u_ij> + Qtf12|titj>, with Qt = Q - |tau><phi|
         // if false the ansatz is the same with normal Q projector
         // the response ansatz is the corresponding response of the gs ansatz
-        initialize < bool > ("QtAnsatz", true, "");
+        initialize < bool > ("QtAnsatz", true, "always true",{true});
         // a vector containing the excitations which shall be optizmized later (with CIS(D) or CC2)
         initialize < std::vector<size_t>>
         ("excitations", {}, "vector containing the excitations");
@@ -379,6 +381,7 @@ struct CCParameters : public QCCalculationParametersBase {
 struct PairVectorMap {
 
     std::vector<std::pair<int, int>> map; ///< maps pair index (i,j) to vector index k
+    PairVectorMap() = default;
     PairVectorMap(const std::vector<std::pair<int, int>> map1) : map(map1) {}
 
     static PairVectorMap triangular_map(const int nfreeze, const int nocc) {
@@ -552,7 +555,7 @@ struct CC_vecfunction : public archive::ParallelSerializableObject {
               delta(other.delta), irrep(other.irrep) {
     }
 
-    /// assignment operator
+    /// assignment operator, shallow wrt the functions
 //    CC_vecfunction& operator=(const CC_vecfunction& other) = default;
     CC_vecfunction& operator=(const CC_vecfunction& other) {
         if (this == &other) return *this;
@@ -567,8 +570,29 @@ struct CC_vecfunction : public archive::ParallelSerializableObject {
 
 
     /// returns a deep copy (void shallow copy errors)
-    CC_vecfunction
-    copy() const;
+    friend CC_vecfunction
+    copy(const CC_vecfunction& other) {
+        CC_vecfunction tmp=other;
+        tmp.functions.clear();
+        for (const auto& x : other.functions) {
+            tmp.functions.insert(std::make_pair(x.first, copy(x.second)));
+        }
+        return tmp;
+    }
+
+
+//madness::CC_vecfunction
+//CC_vecfunction::copy() const {
+//    std::vector<CCFunction<double,3>> vn;
+//    for (auto x : functions) {
+//        const CCFunction<double,3> fn(madness::copy(x.second.function), x.second.i, x.second.type);
+//        vn.push_back(fn);
+//    }
+//    CC_vecfunction result(vn, type);
+//    result.irrep = irrep;
+//    return result;
+//}
+//
 
     static CC_vecfunction load_restartdata(World& world, std::string filename) {
         archive::ParallelInputArchive<archive::BinaryFstreamInputArchive> ar(world, filename.c_str());
@@ -608,6 +632,13 @@ struct CC_vecfunction : public archive::ParallelSerializableObject {
         }
     }
 
+    hashT hash() const {
+        hashT hashval = std::hash<FuncType>{}(type);
+        for (const auto& f : functions) hash_combine(hashval, hash_value(f.second.f().get_impl()->id()));
+
+        return hashval;
+    }
+
     typedef std::map<std::size_t, CCFunction<double,3>> CC_functionmap;
     CC_functionmap functions;
 
@@ -618,7 +649,9 @@ struct CC_vecfunction : public archive::ParallelSerializableObject {
     std::string irrep = "null";    /// excitation irrep (direct product of x function and corresponding orbital)
 
     std::string
-    name(const int ex) const;
+    name(const int ex) const {
+        return madness::name(type,ex);
+    };
 
     bool is_converged(const double econv, const double dconv) const {
         return (current_error<dconv) and (std::fabs(delta)<econv);
@@ -745,6 +778,11 @@ class CCConvolutionOperator {
 
     CCConvolutionOperator(const CCConvolutionOperator& other) = default;
 
+    static inline
+    std::shared_ptr<CCConvolutionOperator> CCConvolutionOperatorPtr(World& world, const OpType type, Parameters param) {
+        return std::shared_ptr<CCConvolutionOperator>(new CCConvolutionOperator(world, type, param));
+    }
+
 protected:
 
     friend CCConvolutionOperator combine(const CCConvolutionOperator& a, const CCConvolutionOperator& b) {
@@ -934,6 +972,40 @@ class CCPair : public archive::ParallelSerializableObject {
     size_t i;
     size_t j;
 
+    /// customized function to store this to the cloud
+
+    /// functions and constant_part can be very large and we want to split them and store them in different records
+    Recordlist<Cloud::keyT> cloud_store(World& world, Cloud& cloud) const {
+        // save bookkeeping stuff in a vector
+        std::vector<unsigned char> v;
+        archive::VectorOutputArchive arout(v);
+        bool function_is_assigned=(functions.size()>0 && functions[0].is_assigned());
+        arout & type & ctype & i & j & bsh_eps & function_is_assigned & constant_part.is_initialized();
+
+        Recordlist<Cloud::keyT> records;
+        records+=cloud.store(world,v);
+        if (function_is_assigned) records+=cloud.store(world,functions[0]);
+        if (constant_part.is_initialized()) records+=cloud.store(world,constant_part);
+        return records;
+   }
+
+    /// customized function to load this from the cloud
+
+    /// functions and constant_part can be very large and we want to split them and store them in different records
+    /// @param[inout] recordlist: containing the keys of the member variables -> will be reduced by the keys which are used
+    void cloud_load(World& world, const Cloud& cloud, Recordlist<Cloud::keyT>& recordlist) {
+        // load bookkeeping stuff in a vector
+        std::vector<unsigned char> v=cloud.forward_load<std::vector<unsigned char>>(world,recordlist);
+        archive::VectorInputArchive arin(v);
+        bool function_is_assigned = false, constant_part_is_initialized=false;
+        arin & type & ctype & i & j & bsh_eps & function_is_assigned & constant_part_is_initialized;
+        functions.clear();
+        constant_part.clear();
+
+        if (function_is_assigned) functions.emplace_back(cloud.forward_load<CCPairFunction<double,6>>(world,recordlist));
+        if (constant_part_is_initialized) constant_part=cloud.forward_load<real_function_6d>(world,recordlist);
+   }
+
     /// gives back the pure 6D part of the pair function
     real_function_6d function() const {
         MADNESS_ASSERT(not functions.empty());
@@ -1025,7 +1097,11 @@ class CCPair : public archive::ParallelSerializableObject {
 
 /// little helper structure which manages the stored singles potentials
 struct CCIntermediatePotentials {
-    CCIntermediatePotentials(World& world, const CCParameters& p) : world(world), parameters(p) {};
+    CCIntermediatePotentials() = default;
+    CCIntermediatePotentials(const CCParameters& p) : parameters(p) {};
+
+    CCIntermediatePotentials(const CCIntermediatePotentials& other) = default;
+    CCIntermediatePotentials& operator=(const CCIntermediatePotentials& other) = default;
 
     /// fetches the correct stored potential or throws an exception
     vector_real_function_3d
@@ -1056,9 +1132,52 @@ struct CCIntermediatePotentials {
     void
     insert(const vector_real_function_3d& potential, const CC_vecfunction& f, const PotentialType& type);
 
+    Recordlist<Cloud::keyT> cloud_store(World& world, Cloud& cloud) const {
+        Recordlist<Cloud::keyT> records;
+        records+=cloud.store(world,parameters);
+        records+=cloud.store(world,current_s2b_potential_ex_);
+        records+=cloud.store(world,current_s2b_potential_gs_);
+        records+=cloud.store(world,current_s2c_potential_ex_);
+        records+=cloud.store(world,current_s2c_potential_gs_);
+        records+=cloud.store(world,current_singles_potential_ex_);
+        records+=cloud.store(world,current_singles_potential_gs_);
+        records+=cloud.store(world,unprojected_cc2_projector_response_);
+        return records;
+    }
+
+    void cloud_load(World& world, const Cloud& cloud, Recordlist<Cloud::keyT>& recordlist) {
+        parameters=cloud.forward_load<CCParameters>(world,recordlist);
+        current_s2b_potential_ex_=cloud.forward_load<vector_real_function_3d>(world,recordlist);
+        current_s2b_potential_gs_=cloud.forward_load<vector_real_function_3d>(world,recordlist);
+        current_s2c_potential_ex_=cloud.forward_load<vector_real_function_3d>(world,recordlist);
+        current_s2c_potential_gs_=cloud.forward_load<vector_real_function_3d>(world,recordlist);
+        current_singles_potential_ex_=cloud.forward_load<vector_real_function_3d>(world,recordlist);
+        current_singles_potential_gs_=cloud.forward_load<vector_real_function_3d>(world,recordlist);
+        unprojected_cc2_projector_response_=cloud.forward_load<vector_real_function_3d>(world,recordlist);
+    }
+
+    friend hashT hash_value(const CCIntermediatePotentials& ip) {
+        auto hash_vector_of_functions =[](const vector_real_function_3d& v) {
+            hashT h;
+            for (const auto& f : v) {
+                hash_combine(h, hash_value(f.get_impl()->id()));
+            }
+            return h;
+        };
+        hashT h;
+        hash_combine(h, hash_vector_of_functions(ip.current_s2b_potential_ex_));
+        hash_combine(h, hash_vector_of_functions(ip.current_s2b_potential_gs_));
+        hash_combine(h, hash_vector_of_functions(ip.current_s2c_potential_ex_));
+        hash_combine(h, hash_vector_of_functions(ip.current_s2c_potential_gs_));
+        hash_combine(h, hash_vector_of_functions(ip.current_singles_potential_ex_));
+        hash_combine(h, hash_vector_of_functions(ip.current_singles_potential_gs_));
+        hash_combine(h, hash_vector_of_functions(ip.unprojected_cc2_projector_response_));
+        return h;
+    }
+
+    CCParameters parameters;
 private:
-    World& world;
-    const CCParameters& parameters;
+    // World& world;
     /// whole ground state singles potential without fock-residue
     vector_real_function_3d current_singles_potential_gs_;
     /// whole excited state singles potential without fock-residue
@@ -1077,11 +1196,75 @@ struct CCIntermediatePotentials {
 
     /// structured output
     void output(const std::string& msg) const {
-        if (world.rank() == 0 and parameters.debug())
+        if (parameters.debug())
             std::cout << "Intermediate Potential Manager: " << msg << "\n";
     }
 };
 
+/// POD holding some basic functions and some intermediates for the CC2 calculation
+
+/// the class is cloud-serializable and can be used in MacroTasks
+struct Info {
+    std::vector<Function<double,3>> mo_ket;
+    std::vector<Function<double,3>> mo_bra;
+    std::vector<madness::Vector<double,3>> molecular_coordinates;
+    CCParameters parameters;
+    std::vector<double> orbital_energies;
+    Tensor<double> fock;
+    CCIntermediatePotentials intermediate_potentials;
+    Function<double,3> R_square, U2, R;;
+    std::vector<Function<double,3>> U1;
+
+    vector_real_function_3d get_active_mo_ket() const {
+        vector_real_function_3d result;
+        for (size_t i = parameters.freeze(); i < mo_ket.size(); i++) result.push_back(mo_ket[i]);
+        return result;
+    }
+
+    vector_real_function_3d get_active_mo_bra() const {
+        vector_real_function_3d result;
+        for (size_t i = parameters.freeze(); i < mo_bra.size(); i++) result.push_back(mo_bra[i]);
+        return result;
+    }
+
+    /// customized function to store this to the cloud
+
+    /// functions and constant_part can be very large and we want to split them and store them in different records
+    Recordlist<Cloud::keyT> cloud_store(World& world, Cloud& cloud) const {
+        Recordlist<Cloud::keyT> records;
+        records+=cloud.store(world,mo_bra);
+        records+=cloud.store(world,mo_ket);
+        records+=cloud.store(world,parameters);
+        records+=cloud.store(world,orbital_energies);
+        records+=cloud.store(world,fock);
+        records+=cloud.store(world,intermediate_potentials);
+        records+=cloud.store(world,R_square);
+        records+=cloud.store(world,molecular_coordinates);
+        records+=cloud.store(world,U2);
+        records+=cloud.store(world,U1);
+        return records;
+    }
+
+    /// customized function to load this from the cloud
+
+    /// functions and constant_part can be very large and we want to split them and store them in different records
+    /// @param[inout] recordlist: containing the keys of the member variables -> will be reduced by the keys which are used
+    void cloud_load(World& world, const Cloud& cloud, Recordlist<Cloud::keyT>& recordlist) {
+        // load bookkeeping stuff in a vector
+        mo_bra=cloud.forward_load<std::vector<Function<double,3>>>(world,recordlist);
+        mo_ket=cloud.forward_load<std::vector<Function<double,3>>>(world,recordlist);
+        parameters=cloud.forward_load<CCParameters>(world,recordlist);
+        orbital_energies=cloud.forward_load<std::vector<double>>(world,recordlist);
+        fock=cloud.forward_load<Tensor<double>>(world,recordlist);
+        intermediate_potentials=cloud.forward_load<CCIntermediatePotentials>(world,recordlist);
+        R_square=cloud.forward_load<Function<double,3>>(world,recordlist);
+        molecular_coordinates=cloud.forward_load<std::vector<madness::Vector<double,3>>>(world,recordlist);
+        U2=cloud.forward_load<Function<double,3>>(world,recordlist);
+        U1=cloud.forward_load<std::vector<Function<double,3>>>(world,recordlist);
+    }
+
+};
+
 class MacroTaskMp2ConstantPart : public MacroTaskOperationBase {
 
     class ConstantPartPartitioner : public MacroTaskPartitioner {
@@ -1102,9 +1285,10 @@ class MacroTaskMp2ConstantPart : public MacroTaskOperationBase {
 public:
     MacroTaskMp2ConstantPart(){partitioner.reset(new ConstantPartPartitioner());}
 
-    typedef std::tuple<const std::vector<CCPair>&, const std::vector<Function<double,3>>&,
-            const std::vector<Function<double,3>>&, const CCParameters&, const Function<double,3>&,
-            const std::vector<Function<double,3>>&, const std::vector<std::string>& > argtupleT;
+    // typedef std::tuple<const std::vector<CCPair>&, const std::vector<Function<double,3>>&,
+            // const std::vector<Function<double,3>>&, const CCParameters&, const Function<double,3>&,
+            // const std::vector<Function<double,3>>&, const std::vector<std::string>& > argtupleT;
+    typedef std::tuple<const std::vector<CCPair>&, const madness::Info&, const std::vector<std::string>& > argtupleT;
 
     using resultT = std::vector<real_function_6d>;
 
@@ -1114,10 +1298,60 @@ class MacroTaskMp2ConstantPart : public MacroTaskOperationBase {
         return result;
     }
 
-    resultT operator() (const std::vector<CCPair>& pair, const std::vector<Function<double,3>>& mo_ket,
-                        const std::vector<Function<double,3>>& mo_bra, const CCParameters& parameters,
-                        const Function<double,3>& Rsquare, const std::vector<Function<double,3>>& U1,
-                        const std::vector<std::string>& argument) const;
+//    resultT operator() (const std::vector<CCPair>& pair, const std::vector<Function<double,3>>& mo_ket,
+//                        const std::vector<Function<double,3>>& mo_bra, const CCParameters& parameters,
+//                        const Function<double,3>& Rsquare, const std::vector<Function<double,3>>& U1,
+//                        const std::vector<std::string>& argument) const;
+    resultT operator() (const std::vector<CCPair>& pair, const Info& info, const std::vector<std::string>& argument) const;
+};
+
+/// compute the "constant" part of MP2, CC2, or LR-CC2
+///
+/// the constant part is
+/// result = G [F,f] |ij>  for MP2
+/// result = G [F,f] |t_i t_j>  for CC2
+/// result = G [F,f] |t_i x_j> + |x_i t_j>  for LR-CC2
+class MacroTaskConstantPart : public MacroTaskOperationBase {
+
+    class ConstantPartPartitioner : public MacroTaskPartitioner {
+    public:
+        ConstantPartPartitioner() {};
+
+        partitionT do_partitioning(const std::size_t& vsize1, const std::size_t& vsize2,
+                                   const std::string policy) const override {
+            partitionT p;
+            for (size_t i = 0; i < vsize1; i++) {
+                Batch batch(Batch_1D(i,i+1), Batch_1D(i,i+1));
+                p.push_back(std::make_pair(batch,1.0));
+            }
+            return p;
+        }
+    };
+
+public:
+    MacroTaskConstantPart()  {
+        partitioner.reset(new ConstantPartPartitioner());
+        name="ConstantPart";
+    }
+
+    // typedef std::tuple<const std::vector<CCPair>&, const std::vector<Function<double,3>>&,
+    // const std::vector<Function<double,3>>&, const CCParameters&, const Function<double,3>&,
+    // const std::vector<Function<double,3>>&, const std::vector<std::string>& > argtupleT;
+    typedef std::tuple<const std::vector<CCPair>&,
+                       const std::vector<Function<double,3>>&, const std::vector<Function<double,3>>&,
+                       const madness::Info&> argtupleT;
+
+    using resultT = std::vector<real_function_6d>;
+
+    resultT allocator(World& world, const argtupleT& argtuple) const {
+        std::size_t n = std::get<0>(argtuple).size();
+        resultT result = zero_functions_compressed<double, 6>(world, n);
+        return result;
+    }
+    resultT operator() (const std::vector<CCPair>& pair,
+        const std::vector<Function<double,3>>& gs_singles,
+        const std::vector<Function<double,3>>& ex_singles,
+        const Info& info) const;
 };
 
 class MacroTaskMp2UpdatePair : public MacroTaskOperationBase {
@@ -1139,12 +1373,65 @@ class MacroTaskMp2UpdatePair : public MacroTaskOperationBase {
         }
     };
 public:
-    MacroTaskMp2UpdatePair() {partitioner.reset(new UpdatePairPartitioner());}
+    MacroTaskMp2UpdatePair() {
+        partitioner.reset(new UpdatePairPartitioner());
+        name="MP2UpdatePair";
+    }
 
-    typedef std::tuple<const std::vector<CCPair>&, const std::vector<real_function_6d>&, const CCParameters&,
-                        const std::vector< madness::Vector<double,3> >&,
-                       const std::vector<Function<double,3>>&, const std::vector<Function<double,3>>&,
-                       const std::vector<Function<double,3>>&, const Function<double,3>&> argtupleT;
+    // typedef std::tuple<const std::vector<CCPair>&, const std::vector<real_function_6d>&, const CCParameters&,
+                        // const std::vector< madness::Vector<double,3> >&,
+                       // const std::vector<Function<double,3>>&, const std::vector<Function<double,3>>&,
+                       // const std::vector<Function<double,3>>&, const Function<double,3>&> argtupleT;
+    typedef std::tuple<const std::vector<CCPair>&, const std::vector<real_function_6d>&,
+                    const std::vector<madness::Vector<double,3>>&, const Info& > argtupleT;
+
+    using resultT = std::vector<real_function_6d>;
+
+    resultT allocator(World& world, const argtupleT& argtuple) const {
+        std::size_t n = std::get<0>(argtuple).size();
+        resultT result = zero_functions_compressed<double, 6>(world, n);
+        return result;
+    }
+
+//    resultT operator() (const std::vector<CCPair>& pair, const std::vector<real_function_6d>& mp2_coupling, const CCParameters& parameters,
+//                        const std::vector< madness::Vector<double,3> >& all_coords_vec,
+//                        const std::vector<Function<double,3>>& mo_ket, const std::vector<Function<double,3>>& mo_bra,
+//                        const std::vector<Function<double,3>>& U1, const Function<double,3>& U2) const;
+    resultT operator() (const std::vector<CCPair>& pair, const std::vector<real_function_6d>& mp2_coupling,
+                        const std::vector< madness::Vector<double,3> >& all_coords_vec, const Info& info) const;
+};
+
+
+class MacroTaskIteratePair : public MacroTaskOperationBase {
+
+    class IteratePairPartitioner : public MacroTaskPartitioner {
+    public :
+        IteratePairPartitioner() = default;
+
+        partitionT do_partitioning(const std::size_t& vsize1, const std::size_t& vsize2,
+                                   const std::string policy) const override {
+            partitionT p;
+            for (size_t i = 0; i < vsize1; i++) {
+                Batch batch(Batch_1D(i, i+1), Batch_1D(i, i+1), Batch_1D(i,i+1));
+                p.push_back(std::make_pair(batch,1.0));
+            }
+            return p;
+        }
+    };
+public:
+    MacroTaskIteratePair() {
+        partitioner.reset(new IteratePairPartitioner());
+        name="IteratePair";
+    }
+
+    typedef std::tuple<
+        const std::vector<CCPair>&,      // pair
+        const std::vector<real_function_6d>&,   // local coupling
+        const CC_vecfunction&,          // gs singles
+        const CC_vecfunction&,          // ex singles
+        const Info&,
+        const std::size_t&
+        > argtupleT;
 
     using resultT = std::vector<real_function_6d>;
 
@@ -1154,10 +1441,23 @@ class MacroTaskMp2UpdatePair : public MacroTaskOperationBase {
         return result;
     }
 
-    resultT operator() (const std::vector<CCPair>& pair, const std::vector<real_function_6d>& mp2_coupling, const CCParameters& parameters,
-                        const std::vector< madness::Vector<double,3> >& all_coords_vec,
-                        const std::vector<Function<double,3>>& mo_ket, const std::vector<Function<double,3>>& mo_bra,
-                        const std::vector<Function<double,3>>& U1, const Function<double,3>& U2) const;
+    /// iterate a given pair of the MP2, CC2 or LRCC2 calculation
+
+    /// will *NOT* compute the local coupling,
+    /// will apply the Fock operators (J-K+V)|pair> and use
+    /// the (excited) singles vectors to update the pair
+    /// @param[in] pair: the pair which will be updated
+    /// @param[in] gs_singles: the ground state singles, may be dummy for MP2
+    /// @param[in] ex_singles: the excited state singles, may be dummy for MP2, CC2
+    /// @param[in] all_coords_vec: the coordinates of the atoms
+    /// @param[in] info: the info structure
+    /// @param[in] maxiter: the maximal number of iterations
+    resultT operator() (const std::vector<CCPair>& pair,
+        const std::vector<real_function_6d>& local_coupling,
+        const CC_vecfunction& gs_singles,
+        const CC_vecfunction& ex_singles,
+        const Info& info,
+        const std::size_t& maxiter) const;
 };
 
 }//namespace madness
diff --git a/src/madness/chem/PNO.cpp b/src/madness/chem/PNO.cpp
index 9dd1b0f4111..8984a6e32b3 100644
--- a/src/madness/chem/PNO.cpp
+++ b/src/madness/chem/PNO.cpp
@@ -781,7 +781,7 @@ PNOPairs PNO::initialize_pairs(PNOPairs& pairs, const GuessType& inpgt) const {
 				vector_real_function_3d& pno = pno_ij[it.ij()];
 				if (not pno.empty()) {
 					msg << it.name() << ": pnos not empty ... project out and assemble\n";
-					QProjector<double, 3> Qpno(world, pno, pno);
+					QProjector<double, 3> Qpno( pno, pno);
 					pno = append(pno, Qpno(virtuals));
 				} else
 					pno = append(pno, virtuals);
@@ -814,7 +814,7 @@ PNOPairs PNO::initialize_pairs(PNOPairs& pairs, const GuessType& inpgt) const {
 			}
 			vector_real_function_3d virtij = guess_virtuals(pair_mo, guesstype);
 			if (not pno.empty()) {
-				QProjector<double, 3> Qpno(world, pno, pno);
+				QProjector<double, 3> Qpno( pno, pno);
 				virtij = Qpno(virtij);
 			}
 
@@ -1574,7 +1574,7 @@ PNOPairs PNO::grow_rank(PNOPairs& pairs, std::string exop)const{
 				vector_real_function_3d virtij = Q(basis.guess_with_exop(pair_mo, exop,param.exop_trigo()));// guess_virtuals(pair_mo, EXOP_TYPE);
 				// project out already existing pno pairs
 				if (not pairs.pno_ij[it.ij()].empty()) {
-					QProjector<double, 3> Qpno(world, pairs.pno_ij[it.ij()], pairs.pno_ij[it.ij()]);
+					QProjector<double, 3> Qpno(pairs.pno_ij[it.ij()], pairs.pno_ij[it.ij()]);
 					virtij = Qpno(virtij);
 
 				}
diff --git a/src/madness/chem/PNO.h b/src/madness/chem/PNO.h
index 0d0e77db417..cf0b24ffb91 100644
--- a/src/madness/chem/PNO.h
+++ b/src/madness/chem/PNO.h
@@ -36,7 +36,7 @@ class PNO : public QCPropertyInterface {
 	  T(world),
 	  V(world, nemo.ncf),
 	  F(world, &nemo),
-	  Q(world, nemo.get_calc()->amo),
+	  Q( nemo.get_calc()->amo),
 	  basis(world,nemo.get_calc()->molecule,8),
 	  f12(world,nemo,basis,paramf12),
 	  msg(world)
diff --git a/src/madness/chem/PNOF12Potentials.cpp b/src/madness/chem/PNOF12Potentials.cpp
index 99fc2bdcfc5..fa7804fd940 100644
--- a/src/madness/chem/PNOF12Potentials.cpp
+++ b/src/madness/chem/PNOF12Potentials.cpp
@@ -72,7 +72,7 @@ F12Potentials::F12Potentials(World& world,const Nemo& nemo, const BasisFunctions
 														mos(nemo.get_calc()->amo),
 														acmos(initialize_active_mos(nemo)),
 														K(ParametrizedExchange(world, nemo, pp.exchange())),
-														Q(world, nemo.get_calc()->amo) {
+														Q(nemo.get_calc()->amo) {
 	const double lo = 1.e-6;
 	const double eps = param.op_thresh();
 	coulombop = std::shared_ptr < real_convolution_3d > (CoulombOperatorPtr(world, lo, eps));
@@ -1377,7 +1377,7 @@ PairEnergies F12Potentials::compute_hylleraas_f12_energies(
 		for (ElectronPairIterator it = pit(); it; ++it) {
 			// right now this will make the same guess for all pairs
 			//const vector_real_function_3d tmp=guess_virtuals(param.abs);
-			QProjector<double, 3> Qpno(world, pnos[it.ij()]);
+			QProjector<double, 3> Qpno( pnos[it.ij()]);
 			const vector_real_function_3d tmp = Qpno(cabs);
 			abs_ij[it.ij()] = tmp;
 		}
diff --git a/src/madness/chem/TDHF.cc b/src/madness/chem/TDHF.cc
index 72b89e28f73..e9a876d1360 100644
--- a/src/madness/chem/TDHF.cc
+++ b/src/madness/chem/TDHF.cc
@@ -154,7 +154,7 @@ void TDHF::prepare_calculation() {
 
     mo_ket_ = make_mo_ket(get_calc()->amo);
     mo_bra_ = make_mo_bra(get_calc()->amo);
-    Q = QProjector(world, mo_bra_.get_vecfunction(), mo_ket_.get_vecfunction());
+    Q = QProjector( mo_bra_.get_vecfunction(), mo_ket_.get_vecfunction());
 
     if (not parameters.no_compute()) {
 
diff --git a/src/madness/chem/ccpairfunction.cc b/src/madness/chem/ccpairfunction.cc
index 8fbfe3010c6..f6cb1a0cfdf 100644
--- a/src/madness/chem/ccpairfunction.cc
+++ b/src/madness/chem/ccpairfunction.cc
@@ -38,11 +38,17 @@ void CCPairFunction<T,NDIM>::convert_to_pure_no_op_inplace() {
         result= CompositeFactory<T, NDIM, LDIM>(world())
                 .g12(get_operator().get_kernel())
                 .ket(get_function());
-    } else if (is_decomposed()) {
+    } else if (is_decomposed_no_op()) {
+        result= CompositeFactory<T, NDIM, LDIM>(world())
+                .particle1(get_a())
+                .particle2(get_b());
+    } else if (is_op_decomposed()) {
         result= CompositeFactory<T, NDIM, LDIM>(world())
                 .g12(get_operator().get_kernel())
                 .particle1(get_a())
                 .particle2(get_b());
+    } else {
+        MADNESS_EXCEPTION("error in convert_to_pure_no_op_inplace",1);
     }
     result.fill_tree();
     result.truncate(FunctionDefaults<NDIM>::get_thresh()*0.1);
@@ -99,10 +105,26 @@ std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::op_dec_to_dec(const
     return result;
 }
 
+/// turn decomposed functions with operator into pure functions
+template<typename T, std::size_t NDIM>
+std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::dec_to_pure(const std::vector<CCPairFunction<T,NDIM>>& other) {
+    std::vector<CCPairFunction<T,NDIM>> result;
+    for (const auto& c : other) {
+        if (c.is_decomposed_no_op()) {
+            CCPairFunction<T,NDIM> tmp=copy(c);
+            tmp.convert_to_pure_no_op_inplace();
+            result.push_back(tmp);
+        } else {
+            result.push_back(c);
+        }
+    }
+    return result;
+}
+
+
 /// turn decomposed functions with operator into pure functions
 template<typename T, std::size_t NDIM>
 std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::op_dec_to_pure(const std::vector<CCPairFunction<T,NDIM>>& other) {
-    LowRankFunctionParameters lrparameters;
     std::vector<CCPairFunction<T,NDIM>> result;
     for (const auto& c : other) {
         if (c.is_op_decomposed()) {
@@ -220,6 +242,8 @@ std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::consolidate(const st
     bool op_dec_to_dec=find(options.begin(),options.end(),"op_dec_to_dec")!=options.end();
     // convert op_dec functions to pure (via fill_tree)
     bool op_dec_to_pure=find(options.begin(),options.end(),"op_dec_to_pure")!=options.end();
+    // convert dec functions to pure (via hartree product)
+    bool dec_to_pure=find(options.begin(),options.end(),"dec_to_pure")!=options.end();
     // reorthogonalize decomposed functions and op_decomposed functions
     bool lindep=find(options.begin(),options.end(),"remove_lindep")!=options.end();
 
@@ -229,6 +253,7 @@ std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::consolidate(const st
 
     if (op_dec_to_dec) result=CCPairFunction<T,NDIM>::op_dec_to_dec(result,centers);
     if (op_dec_to_pure) result=CCPairFunction<T,NDIM>::op_dec_to_pure(result);
+    if (dec_to_pure) result=CCPairFunction<T,NDIM>::dec_to_pure(result);
     if (op_pure_to_pure) result=CCPairFunction<T,NDIM>::op_pure_to_pure(result);
 
     if (not is_collected(result)) result=collect_same_types(result);
@@ -252,6 +277,8 @@ CCPairFunction<T,NDIM>& CCPairFunction<T,NDIM>::multiply_with_op_inplace(const s
 template<typename T, std::size_t NDIM>
 double
 CCPairFunction<T,NDIM>::make_xy_u(const CCFunction<T,LDIM>& xx, const CCFunction<T,LDIM>& yy) const {
+    CCPairFunction<T,NDIM> bra(xx.function,yy.function);
+    return inner(bra,*this);
     T result = 0.0;
     if (is_pure()) {
         World& world=xx.function.world();
@@ -602,11 +629,9 @@ std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::apply(const Projecto
     constexpr std::size_t LDIM=CCPairFunction<T,NDIM>::LDIM;
 //    print("apply projector on argument with terms",argument.size());
     if (auto P=dynamic_cast<const Projector<double,LDIM>*>(&projector)) {
-//        print("P->get_particle()",P->get_particle());
         MADNESS_CHECK_THROW(P->get_particle()==0 or P->get_particle()==1,"P Projector particle must be 0 or 1 in CCPairFunction<T,NDIM>");
     }
     if (auto Q=dynamic_cast<const QProjector<double,LDIM>*>(&projector)) {
-//        print("Q->get_particle()",Q->get_particle());
         MADNESS_CHECK_THROW(Q->get_particle()==0 or Q->get_particle()==1,"Q Projector particle must be 0 or 1 in CCPairFunction<T,NDIM>");
     }
     std::vector<CCPairFunction<T,NDIM>> result;
@@ -618,10 +643,15 @@ std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::apply(const Projecto
                 auto tmp2=CCPairFunction<T,NDIM>(tmp);
                 result.push_back(tmp2);
             } else if (auto P=dynamic_cast<const Projector<double,LDIM>*>(&projector)) {
-                result.push_back(CCPairFunction<T,NDIM>((*P)(pf.get_function(),P->get_particle()+1)));
+                // result.push_back(CCPairFunction<T,NDIM>((*P)(pf.get_function())));
+                auto [left,right]=P->get_vectors_for_outer_product(pf.get_function());
+                result.push_back(CCPairFunction<T,NDIM>(left,right));
+
 
             } else if (auto Q=dynamic_cast<const QProjector<double,LDIM>*>(&projector)) {
-                result.push_back(CCPairFunction<T,NDIM>((*Q)(pf.get_function(),Q->get_particle()+1)));
+                // result.push_back(CCPairFunction<T,NDIM>((*Q)(pf.get_function())));
+                result.push_back(pf);
+                result.push_back(-1.0*Q->get_P_projector()(pf));
 
             } else {
                 MADNESS_EXCEPTION("CCPairFunction<T,NDIM>: unknown projector type",1);
@@ -629,8 +659,8 @@ std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::apply(const Projecto
         } else if (pf.is_decomposed_no_op()) {  // pair function is sum_i | a_i b_i >
             if (auto SO=dynamic_cast<const StrongOrthogonalityProjector<double,LDIM>*>(&projector)) {
                 // Q12 | kl > = (1-O1)(1-O2) |kl> = |(1-O1)k (1-O2)l>
-                QProjector<double,LDIM> Q1(world,SO->bra1(),SO->ket1());
-                QProjector<double,LDIM> Q2(world,SO->bra2(),SO->ket2());
+                QProjector<double,LDIM> Q1(SO->bra1(),SO->ket1());
+                QProjector<double,LDIM> Q2(SO->bra2(),SO->ket2());
                 result.push_back(CCPairFunction<T,NDIM>(Q1(pf.get_a()),Q2(pf.get_b())));
 
             } else if (auto P=dynamic_cast<const Projector<double,LDIM>*>(&projector)) {
@@ -652,9 +682,9 @@ std::vector<CCPairFunction<T,NDIM>> CCPairFunction<T,NDIM>::apply(const Projecto
 //                CCTimer t(world,"SO block");
                 // Q12 = 1 - O1 (1 - 1/2 O2) - O2 (1 - 1/2 O1)
 //                print("entering SO block");
-                QProjector<double,LDIM> Q1(world,SO->bra1(),SO->ket1());
+                QProjector<double,LDIM> Q1(SO->bra1(),SO->ket1());
                 Q1.set_particle(0);
-                QProjector<double,LDIM> Q2(world,SO->bra2(),SO->ket2());
+                QProjector<double,LDIM> Q2(SO->bra2(),SO->ket2());
                 Q2.set_particle(1);
 
                 Projector<double,LDIM> O1(SO->bra1(),SO->ket1());
diff --git a/src/madness/chem/ccpairfunction.h b/src/madness/chem/ccpairfunction.h
index d805daf9f8b..81a3437d4bb 100644
--- a/src/madness/chem/ccpairfunction.h
+++ b/src/madness/chem/ccpairfunction.h
@@ -25,6 +25,20 @@ class ProjectorBase;
 /// Types of Functions used by CC_function class
 enum FuncType { UNDEFINED, HOLE, PARTICLE, MIXED, RESPONSE };
 
+inline std::string name(const FuncType& type, const int ex=-1) {
+    if (type == PARTICLE) return "tau";
+    else if (type == HOLE) return "phi";
+    else if (type == MIXED) return "t";
+    else if (type == RESPONSE) {
+        MADNESS_CHECK_THROW(ex>=0,"ex must be >=0");
+        return std::to_string(ex) + "_" + "x";
+    }
+    else {
+        MADNESS_EXCEPTION("unknown FuncType",1);
+    }
+    return "undefined";
+}
+
 /// structure for a CC Function 3D which holds an index and a type
 // the type is defined by the enum FuncType (definition at the start of this file)
 template<typename T=double, std::size_t NDIM=3>
@@ -41,6 +55,17 @@ class CCFunction : public archive::ParallelSerializableObject {
 
     CCFunction(const CCFunction& other) : current_error(other.current_error), function(other.function), i(other.i),
                                           type(other.type) {};
+
+    /// deep copy
+    friend CCFunction copy(const CCFunction& other) {
+        CCFunction tmp;
+        tmp.current_error=other.current_error;
+        tmp.function=madness::copy(other.function);
+        tmp.i=other.i;
+        tmp.type=other.type;
+        return tmp;
+    }
+
     double current_error;
     Function<T,NDIM> function;
 
@@ -116,7 +141,7 @@ class TwoBodyFunctionComponentBase {
     virtual bool has_operator() const = 0;
 //    virtual void set_operator(const std::shared_ptr<CCConvolutionOperator> op) = 0;
 //    virtual const std::shared_ptr<CCConvolutionOperator> get_operator_ptr() const = 0;
-    virtual void print_size() const = 0;
+    virtual void print_size(const std::string name="") const = 0;
     virtual std::string name(const bool transpose=false) const = 0;
     virtual World& world() const =0;
     virtual std::shared_ptr<TwoBodyFunctionComponentBase> clone() = 0;
@@ -155,8 +180,8 @@ class TwoBodyFunctionPureComponent : public TwoBodyFunctionComponentBase {
 
     World& world() const override {return u.world();};
 
-    void print_size() const override {
-        u.print_size(name(false));
+    void print_size(const std::string name1="") const override {
+        u.print_size(name1+name(false));
     }
 
     std::string name(const bool transpose) const override {
@@ -240,7 +265,7 @@ class TwoBodyFunctionSeparatedComponent : public TwoBodyFunctionComponentBase {
         return a.front().world();
     };
 
-    void print_size() const override {
+    void print_size(const std::string name1="") const override {
         if (a.size() > 0) {
             World& world = a.front().world();
             madness::print_size(world, a, "a from " + name(false));
@@ -441,6 +466,9 @@ using pureT=Function<T,NDIM>;
     /// turn decomposed functions with operator into pure functions without operators
     static std::vector<CCPairFunction> op_dec_to_pure(const std::vector<CCPairFunction>& other);
 
+    /// turn decomposed functions without operator into pure functions without operators
+    static std::vector<CCPairFunction> dec_to_pure(const std::vector<CCPairFunction>& other);
+
     /// remove linear dependent terms in the low-rank parts
     static std::vector<CCPairFunction> remove_linearly_dependent_terms(const std::vector<CCPairFunction>& other,
         double thresh=-1.0);
@@ -458,7 +486,7 @@ using pureT=Function<T,NDIM>;
     /// @param[in] centers: a vector of 3D-vectors which are the centers of the grid for low-rank functions
     /// TODO: implement a function for removing linearly dependent terms without orthonormalization
     friend std::vector<CCPairFunction> consolidate(const std::vector<CCPairFunction>& other,
-                                                   const std::vector<std::string> options,
+                                                   const std::vector<std::string> options=std::vector<std::string>(),
                                                    const std::vector<Vector<double,LDIM>> centers=std::vector<Vector<double,LDIM>>()) {
 
         if (other.size()>0) return other.front().consolidate(other,options,centers); // workaround
@@ -617,8 +645,27 @@ using pureT=Function<T,NDIM>;
     }
 
     /// print the size of the functions
-    void print_size() const {
-        if (component) component->print_size();
+    void print_size(const std::string name1="") const {
+        if (not component) {
+            print("CCPairFunction "+name1+ " not assigned");
+        } else if (component->is_pure()) {
+            component->print_size(name1);
+        } else {
+            print("printing",name1,name());
+            double wall=wall_time();
+            component->print_size();
+            double anorm=madness::norm2(world(),get_a());
+            double bnorm=madness::norm2(world(),get_b());
+            print("anorm, bnorm",anorm,bnorm);
+            double norm=this->norm2();
+            std::size_t fsize=get_a().size();
+            std::size_t bufsize=128;
+            char buf[bufsize];
+            snprintf(buf, bufsize, "%40s at time %.1fs: norm/  #functions: %7.5f %zu \n",
+                   ((name1+" "+name()).c_str()), wall, norm, fsize);
+            if (world().rank()==0) print(std::string(buf));
+        }
+
     };
 
     std::string name(const bool transpose=false) const {
@@ -627,8 +674,13 @@ using pureT=Function<T,NDIM>;
     }
 
     typename Tensor<T>::scalar_type norm2() const {
-        if (component->is_pure()) return pure().get_function().norm2();
-        if (component->is_decomposed()) {
+        if (is_pure_no_op()) {
+            return pure().get_function().norm2();
+        } else if (is_op_pure()) {
+            double n2=inner(*this,*this);
+            if (n2<0.0) print("norm of ",name()," is < 0.0");
+            return sqrt(std::max(0.0,n2));
+        } else if (component->is_decomposed()) {
             Function<T,LDIM> R2;
             auto tmp= inner_internal(*this,R2);
             typename Tensor<T>::scalar_type result=std::real(tmp);
@@ -760,33 +812,8 @@ using pureT=Function<T,NDIM>;
     const std::pair<std::vector<Function<T,LDIM>>, std::vector<Function<T,LDIM>>> assign_particles(const size_t particle) const;
 
     static std::vector<CCPairFunction<T,NDIM>> apply(const ProjectorBase& P, const std::vector<CCPairFunction<T,NDIM>>& argument);
-
-    /// apply the operator on a CCPairfunction, both with the same dimension
-
-    /// note there is another function, where the operator works only on some dimensions of the CCPairFunction!
-    /// @return result(x) = \int op(x,x') arg(x') dx': a CCPairfunction with the same dimension as the argument
-    friend CCPairFunction<T,NDIM> apply(const SeparatedConvolution<T,NDIM>& G, const CCPairFunction<T,NDIM>& argument) {
-        CCPairFunction result;
-        timer t1(argument.world());
-        if (argument.is_pure()) {
-            result=CCPairFunction(G(argument.get_function()));
-        } else if (argument.is_decomposed_no_op()) {
-            Function<T,NDIM> result1=real_factory_6d(argument.world()).compressed();
-
-            MADNESS_ASSERT(argument.get_a().size() == argument.get_b().size());
-            MADNESS_CHECK_THROW(G.particle()==-1,"G must be a two-particle operator in apply(CCPairFunction)");
-
-            for (size_t k = 0; k < argument.get_a().size(); k++) {
-                const Function<T,NDIM> tmp = G(argument.get_a()[k], argument.get_b()[k]);
-                result1 += tmp;
-            }
-            result=CCPairFunction(result1);
-        } else {
-            MADNESS_EXCEPTION("unknown type in CCPairFunction::apply",1);
-        }
-        t1.end("applying G to " + argument.name());
-        return result;
-    };
+    static std::vector<CCPairFunction<T,NDIM>> apply(const SeparatedConvolution<T,NDIM>& G, const CCPairFunction<T,NDIM>& argument);
+    static std::vector<CCPairFunction<T,NDIM>> apply(const SeparatedConvolution<T,NDIM>& G, const std::vector<CCPairFunction<T,NDIM>>& argument);
 
 
     Function<T,LDIM> partial_inner(const Function<T,LDIM>& f,
@@ -910,12 +937,34 @@ std::vector<CCPairFunction<T,NDIM>> apply(const SeparatedConvolution<T,NDIM/2>&
 }
 
 template<typename T, std::size_t NDIM>
-CCPairFunction<T,NDIM> apply(const ProjectorBase& projector, const CCPairFunction<T,NDIM>& argument) {
-    auto result=madness::apply(projector,std::vector<CCPairFunction<T,NDIM>> (1,argument));
-    MADNESS_CHECK(result.size()==1);
-    return result[0];
+CCPairFunction<T,NDIM> apply(const SeparatedConvolution<T,NDIM>& G, const std::vector<CCPairFunction<T,NDIM>>& argument) {
+    CCPairFunction result;
+    for (const auto& a : argument) result+=G(a);
+    return result;
 }
 
+/// apply the operator on a CCPairfunction, both with the same dimension
+
+/// note there is another function, where the operator works only on some dimensions of the CCPairFunction!
+/// @return result(x) = \int op(x,x') arg(x') dx': a CCPairfunction with the same dimension as the argument
+template<typename T, std::size_t NDIM>
+CCPairFunction<T,NDIM> apply(const SeparatedConvolution<T,NDIM>& G, const CCPairFunction<T,NDIM>& argument) {
+    CCPairFunction result;
+    timer t1(argument.world());
+    if (argument.is_pure()) {
+        result=CCPairFunction(G(argument.get_function()));
+    } else if (argument.is_decomposed_no_op()) {
+        MADNESS_ASSERT(argument.get_a().size() == argument.get_b().size());
+        Function<T,NDIM> result1=G(argument.get_a(), argument.get_b());
+        result=CCPairFunction(result1);
+    } else {
+        MADNESS_EXCEPTION("unknown type in CCPairFunction::apply",1);
+    }
+    t1.end("applying G to " + argument.name());
+    return result;
+};
+
+
 /// apply the projector on the argument function, potentially yielding a vector of CCPairfunctions as result
 
 /// result can be
@@ -928,6 +977,13 @@ std::vector<CCPairFunction<T,NDIM>> apply(const ProjectorBase& projector, const
 }
 
 
+template<typename T, std::size_t NDIM>
+CCPairFunction<T,NDIM> apply(const ProjectorBase& projector, const CCPairFunction<T,NDIM>& argument) {
+    auto result=madness::apply(projector,std::vector<CCPairFunction<T,NDIM>> (1,argument));
+    MADNESS_CHECK(result.size()==1);
+    return result[0];
+}
+
 template<typename T, std::size_t NDIM>
 Function<T,CCPairFunction<T,NDIM>::LDIM>inner(const CCPairFunction<T,NDIM>& c, const Function<T,CCPairFunction<T,NDIM>::LDIM>& f,
                                               const std::tuple<int,int,int> v1, const std::tuple<int,int,int> v2) {
@@ -987,6 +1043,28 @@ std::vector<CCPairFunction<T,NDIM>> inner(const std::vector<CCPairFunction<T,NDI
     return result;
 }
 
+template <typename T, std::size_t NDIM>
+std::vector<CCPairFunction<T,NDIM> > operator+(const std::vector<CCPairFunction<T,NDIM>> c1, const std::vector<CCPairFunction<T,NDIM> >& c2) {
+    std::vector<CCPairFunction<T,NDIM>> result;
+    for (const auto& l : c1) result.push_back(l);
+    for (const auto& l : c2) result.push_back(l);
+    return result;
+}
+
+template <typename T, std::size_t NDIM>
+std::vector<CCPairFunction<T,NDIM> > operator-(const std::vector<CCPairFunction<T,NDIM>> c1, const std::vector<CCPairFunction<T,NDIM> >& c2) {
+    std::vector<CCPairFunction<T,NDIM>> result;
+    for (const auto& l : c1) result.push_back(l);
+    for (const auto& l : c2) result.push_back(-1.0*l);
+    return result;
+}
+
+template <typename T, std::size_t NDIM>
+std::vector<CCPairFunction<T,NDIM> >& operator+=(std::vector<CCPairFunction<T,NDIM> >& lhs,
+        const CCPairFunction<T,NDIM >& rhs) {
+    lhs.push_back(rhs);
+    return lhs;
+}
 
 template <typename T, std::size_t NDIM>
 std::vector<CCPairFunction<T,NDIM> >& operator+=(std::vector<CCPairFunction<T,NDIM> >& rhs,
diff --git a/src/madness/chem/correlationfactor.cc b/src/madness/chem/correlationfactor.cc
index def3c837957..9f34b498a1e 100644
--- a/src/madness/chem/correlationfactor.cc
+++ b/src/madness/chem/correlationfactor.cc
@@ -37,6 +37,7 @@ namespace madness{
 
 	/// create and return a new nuclear correlation factor
 
+	/// note there is also an Ad-hoc nuclear correlation factor, which can only be created directly
 	/// @param[in]	world	the world
 	/// @param[in]	calc	the calculation as read from the input file
 	/// @return 	a nuclear correlation factor
diff --git a/src/madness/chem/correlationfactor.h b/src/madness/chem/correlationfactor.h
index 65e75178d18..d80ff48586c 100644
--- a/src/madness/chem/correlationfactor.h
+++ b/src/madness/chem/correlationfactor.h
@@ -83,7 +83,7 @@ namespace madness {
 class NuclearCorrelationFactor {
 public:
 	enum corrfactype {None, GradientalGaussSlater, GaussSlater, LinearSlater,
-	    Polynomial, Slater, poly4erfc, Two};
+	    Polynomial, Slater, poly4erfc, Two, Adhoc};
 	typedef std::shared_ptr< FunctionFunctorInterface<double,3> > functorT;
 
 	/// ctor
@@ -213,12 +213,14 @@ class NuclearCorrelationFactor {
 	/// the molecule
 	const Molecule& molecule;
 
+protected:
 	/// the three components of the U1 potential
 	std::vector<real_function_3d> U1_function;
 
 	/// the purely local U2 potential, having absorbed the nuclear pot V_nuc
 	real_function_3d U2_function;
 
+private:
 	/// the correlation factor S wrt a given atom
 
 	/// @param[in]	r	the distance of the req'd coord to the nucleus
@@ -2032,6 +2034,65 @@ class PseudoNuclearCorrelationFactor : public NuclearCorrelationFactor {
 };
 
 
+/// this ncf has no information about itself, only U2 and U1 assigned
+class AdhocNuclearCorrelationFactor : public NuclearCorrelationFactor {
+
+public:
+	/// ctor
+
+	/// @param[in]	world	the world
+	/// @param[in]	mol molecule with the sites of the nuclei
+	AdhocNuclearCorrelationFactor(World& world, const real_function_3d U2,
+		const std::vector<real_function_3d>& U1)
+		: NuclearCorrelationFactor(world,Molecule()) {
+
+		U2_function=U2;
+		U1_function=U1;
+
+		if (world.rank()==0) {
+			print("constructed ad hoc nuclear correlation factor");
+		}
+	}
+
+	corrfactype type() const {return Adhoc;}
+
+private:
+
+    double Sr_div_S(const double& r, const double& Z) const {
+    	MADNESS_EXCEPTION("no Sr_div_S() in AdhocNuclearCorrelationFactor",0);
+	    return 0.0;
+    }
+
+    double Srr_div_S(const double& r, const double& Z) const {
+    	MADNESS_EXCEPTION("no Srr_div_S() in AdhocNuclearCorrelationFactor",0);
+	    return 0.0;
+    }
+
+    double Srrr_div_S(const double& r, const double& Z) const {
+    	MADNESS_EXCEPTION("no Srrr_div_S() in AdhocNuclearCorrelationFactor",0);
+	    return 0.0;
+    }
+
+    /// the nuclear correlation factor
+    double S(const double& r, const double& Z) const {
+    	MADNESS_EXCEPTION("no S() in AdhocNuclearCorrelationFactor",0);
+    	return 0.0;
+    }
+
+    /// radial part first derivative of the nuclear correlation factor
+    coord_3d Sp(const coord_3d& vr1A, const double& Z) const {
+    	MADNESS_EXCEPTION("no Sp() in AdhocNuclearCorrelationFactor",0);
+    	return coord_3d(0.0);
+    }
+
+    /// second derivative of the nuclear correlation factor
+    double Spp_div_S(const double& r, const double& Z) const {
+    	MADNESS_EXCEPTION("no Spp_div_S() in AdhocNuclearCorrelationFactor",0);
+    	return 0.0;
+    }
+};
+
+
 std::shared_ptr<NuclearCorrelationFactor>
 create_nuclear_correlation_factor(World& world,
 		const Molecule& molecule,
diff --git a/src/madness/chem/electronic_correlation_factor.h b/src/madness/chem/electronic_correlation_factor.h
index a1fab4d12c1..c4b3168b513 100644
--- a/src/madness/chem/electronic_correlation_factor.h
+++ b/src/madness/chem/electronic_correlation_factor.h
@@ -35,20 +35,18 @@ class CorrelationFactor {
     CorrelationFactor(World& world, const double& gamma, const double dcut,
             const Molecule& molecule) : world(world), _gamma(gamma), dcut(dcut) {
         lo=1.e-6;//lo = molecule.smallest_length_scale();
-        if (world.rank()==0) {
-
-            if (gamma>0.0) print("constructed correlation factor with gamma=",gamma);
-            else if (gamma==0.0) print("constructed linear correlation factor");
-        }
+//        if (world.rank()==0) {
+//            if (gamma>0.0) print("constructed correlation factor with gamma=",gamma);
+//            else if (gamma==0.0) print("constructed linear correlation factor");
+//        }
     }
     /// ctor, use negative gamma for linear correlation factor r12
     CorrelationFactor(World& world, const double& gamma, const double dcut,
             const double lo) : world(world), _gamma(gamma), dcut(dcut), lo(lo) {
-        if (world.rank()==0) {
-
-            if (gamma>0.0) print("constructed correlation factor with gamma=",gamma);
-            else if (gamma==0.0) print("constructed linear correlation factor");
-        }
+//        if (world.rank()==0) {
+//            if (gamma>0.0) print("constructed correlation factor with gamma=",gamma);
+//            else if (gamma==0.0) print("constructed linear correlation factor");
+//        }
     }
 
     /// copy ctor
diff --git a/src/madness/chem/lowrankfunction.h b/src/madness/chem/lowrankfunction.h
index 3fbfffe2005..9b8845dda45 100644
--- a/src/madness/chem/lowrankfunction.h
+++ b/src/madness/chem/lowrankfunction.h
@@ -271,8 +271,9 @@ namespace madness {
     public:
         /// ctor takes centers of the grid and the grid parameters
         molecular_grid(const std::vector<Vector<double,NDIM>> origins, const LowRankFunctionParameters& params)
-            : centers(origins) {
-            if (centers.size()==0) centers.push_back({0,0,0});
+            : centers(origins)
+        {
+            if (centers.size()==0) centers.push_back(Vector<double,NDIM>(0) );
             if (params.gridtype()=="random") grid_builder=std::make_shared<randomgrid<NDIM>>(params.volume_element(),params.radius());
             // else if (params.gridtype()=="cartesian") grid_builder=std::make_shared<cartesian_grid<NDIM>>(params.volume_element(),params.radius());
             else if (params.gridtype()=="dftgrid") {
diff --git a/src/madness/chem/mp3.cc b/src/madness/chem/mp3.cc
index da6343dfef7..a720e34f3be 100644
--- a/src/madness/chem/mp3.cc
+++ b/src/madness/chem/mp3.cc
@@ -971,26 +971,26 @@ double MP3::mp3_energy_contribution_macrotask_driver(const Pairs<CCPair>& mp2pai
     MacroTaskMP3 task_square("square");
     MacroTask macrotask_triangular(world,task_triangular,taskq);
     MacroTask macrotask_square(world,task_square,taskq);
-    // auto ghij_future=macrotask_triangular(std::string("ghij"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector<std::string>());
+    auto ghij_future=macrotask_triangular(std::string("ghij"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector<std::string>());
     auto klmn_future=macrotask_square(std::string("klmn"), nact, nact, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector<std::string>());
-    // auto cd_future=macrotask_triangular(std::string("cd"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector<std::string>());
-    // auto ef_future=macrotask_triangular(std::string("ef"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector<std::string>());
-    // taskq->print_taskq();
+    auto cd_future=macrotask_triangular(std::string("cd"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector<std::string>());
+    auto ef_future=macrotask_triangular(std::string("ef"), ij_triangular, dummy, clusterfunc_vec, ket, bra, parameters, nemo_->molecule(), nemo_->R_square, std::vector<std::string>());
+    taskq->print_taskq();
     taskq->run_all();
 
-    // double term_CD=cd_future->get();
-    // double term_EF=ef_future->get();
-    // double term_GHIJ=ghij_future->get();
-    // double term_KLMN=klmn_future->get();
-    // double mp3_energy=term_CD+term_GHIJ+term_KLMN+term_EF;
-    // if (world.rank()==0) {
-        // printf("term_CD    %12.8f\n",term_CD);
-        // printf("term_GHIJ  %12.8f\n",term_GHIJ);
-        // printf("term_KLMN  %12.8f\n",term_KLMN);
-        // printf("term_EF    %12.8f\n",term_EF);
-        // printf("MP3 energy contribution  %12.8f\n",mp3_energy);
-    // }
-    // return mp3_energy;
+    double term_CD=cd_future->get();
+    double term_EF=ef_future->get();
+    double term_GHIJ=ghij_future->get();
+    double term_KLMN=klmn_future->get();
+    double mp3_energy=term_CD+term_GHIJ+term_KLMN+term_EF;
+    if (world.rank()==0) {
+        printf("term_CD    %12.8f\n",term_CD);
+        printf("term_GHIJ  %12.8f\n",term_GHIJ);
+        printf("term_KLMN  %12.8f\n",term_KLMN);
+        printf("term_EF    %12.8f\n",term_EF);
+        printf("MP3 energy contribution  %12.8f\n",mp3_energy);
+    }
+    return mp3_energy;
     return 0.0;
 }
 }
diff --git a/src/madness/chem/nemo.cc b/src/madness/chem/nemo.cc
index 773345b8b3d..2730c2d99e0 100644
--- a/src/madness/chem/nemo.cc
+++ b/src/madness/chem/nemo.cc
@@ -1094,7 +1094,7 @@ vecfuncT Nemo::make_cphf_constant_term(const size_t iatom, const int iaxis,
     const int nmo=nemo.size();
 
     const Tensor<double> occ=get_calc()->get_aocc();
-    QProjector<double,3> Q(world,R2nemo,nemo);
+    QProjector<double,3> Q(R2nemo,nemo);
 
     DNuclear<double,3> Dunuc(world,this,iatom,iaxis);
     vecfuncT Vpsi2b=Dunuc(nemo);
@@ -1162,7 +1162,7 @@ vecfuncT Nemo::solve_cphf(const size_t iatom, const int iaxis, const Tensor<doub
 
     vecfuncT R2nemo=mul(world,R_square,nemo);
     truncate(world,R2nemo);
-    QProjector<double,3> Q(world,R2nemo,nemo);
+    QProjector<double,3> Q(R2nemo,nemo);
 
     // construct quantities that are independent of xi
 
diff --git a/src/madness/chem/projector.h b/src/madness/chem/projector.h
index f40341a5d50..ce348ef35a2 100644
--- a/src/madness/chem/projector.h
+++ b/src/madness/chem/projector.h
@@ -17,11 +17,15 @@ namespace madness {
     class ProjectorBase {
     protected:
         /// a projector might work only on a subset of dimensions, e.g. P(1) | \psi(1,2) >
-        int particle=-1;
+        int particle=-1;        // must only be 0 or 1!
     public:
         virtual ~ProjectorBase() {}
-        virtual void set_particle(const int p) {particle=p;}
-        int get_particle() const {return particle;}
+        virtual void set_particle(const int p)
+        {
+            MADNESS_CHECK_THROW(p==0 or p==1, "particle must be 0 or 1");
+            particle=p;
+        }
+        virtual int get_particle() const {return particle;}
         virtual std::string type() const = 0;
     };
 
@@ -62,18 +66,35 @@ namespace madness {
 
         /// bra and ket spaces are not symmetric (e.g. |ket>^+ = <bra|R2 )
         Projector(const funcT& bra, const funcT& ket) : mo_ket_(vecfuncT(1,ket))
-                , mo_bra_(vecfuncT(1,bra)) {}
+                , mo_bra_(vecfuncT(1,bra)) {
+            MADNESS_CHECK_THROW(mo_bra_.size()==mo_ket_.size(), "bra and ket spaces must have the same size in projector");
+        }
 
         /// constructor with a set of orbitals to project out
 
         /// bra and ket spaces are symmetric
-        Projector(const vecfuncT& p) : mo_ket_(p), mo_bra_(p) {}
+        Projector(const vecfuncT& p) : mo_ket_(p), mo_bra_(p) {
+            MADNESS_CHECK_THROW(mo_bra_.size()==mo_ket_.size(), "bra and ket spaces must have the same size in projector");
+        }
 
         /// constructor with a set of orbitals to project out
 
         /// bra and ket spaces are not symmetric (e.g. |ket>^+ = <bra|R2 )
         Projector(const vecfuncT& bra, const vecfuncT& ket) : mo_ket_(ket),
-                mo_bra_(bra) {}
+                mo_bra_(bra) {
+            MADNESS_CHECK_THROW(mo_bra_.size()==mo_ket_.size(), "bra and ket spaces must have the same size in projector");
+        }
+
+    	void set_spaces(const vecfuncT& p) {
+            mo_bra_=p;
+            mo_ket_=p;
+        }
+
+        void set_spaces(const vecfuncT& bra, const vecfuncT& ket) {
+            mo_bra_=bra;
+            mo_ket_=ket;
+            MADNESS_CHECK_THROW(mo_bra_.size()==mo_ket_.size(), "bra and ket spaces must have the same size in projector");
+        }
 
         virtual std::string type() const override {return "PProjector";}
 
@@ -109,28 +130,37 @@ namespace madness {
         /// |result> = \sum_p |p(particle)> <p(particle)|f(1,2)>_{particle}
         /// \f]
         /// @param[in] f the 6D function to be projected
-        /// @param[in] the particle that is projected (1 or 2)
+        /// @param[in] the particle that is projected (0 or 1)
         /// @return the projected function
         template<std::size_t KDIM>
         typename std::enable_if<KDIM==2*NDIM, Function<T,KDIM> >::type
-        operator()(const Function<T,KDIM>& f, size_t particle1=size_t(-1)) const {
-            Function<T,KDIM> result = FunctionFactory<T,KDIM>(f.world());
-            if (particle1==size_t(-1)) particle1=particle;
-            MADNESS_CHECK_THROW(particle1 == 1 or particle1 == 2, "particle must be 1 or 2");
-            for (size_t i = 0; i < mo_ket_.size(); i++) {
-                Function<T,NDIM> tmp1 = mo_ket_[i];
-                Function<T,NDIM> tmp2 = f.project_out(mo_bra_[i], particle1 - 1);
-                Function<T,KDIM> tmp12;
-                if (particle1 == 1) {
-                    tmp12 = CompositeFactory<T, KDIM, NDIM>(f.world()).particle1(copy(tmp1)).particle2(copy(tmp2));
-                    tmp12.fill_tree();
-                } else {
-                    tmp12 = CompositeFactory<T, KDIM, NDIM>(f.world()).particle1(copy(tmp2)).particle2(copy(tmp1));
-                    tmp12.fill_tree();
-                }
-                result += tmp12;
+        operator()(const Function<T,KDIM>& f, int particle1=-1) const {
+            if (particle1==-1) particle1=get_particle();
+            MADNESS_CHECK_THROW(particle1 == 0 or particle1 == 1, "particle must be 0 or 1");
+            auto [left,right]=get_vectors_for_outer_product(f);
+            return hartree_product(left,right);
+        }
+
+        /// apply the projection parts of the operator on a function f
+
+        /// The operator applied on f(1,2) is
+        ///  O(1)f(1,2) = \sum_i |i(1) > <i(1) | f(1,2)>_1 = \sum_i |i(1) f_i(2)>
+        /// return the lo-dim vectors i and f_i only, perform no outer product
+        std::pair<std::vector<Function<T,NDIM>>,std::vector<Function<T,NDIM>>>
+        get_vectors_for_outer_product(const Function<T,2*NDIM>& f) const {
+            World& world=f.world();
+            reconstruct(world, mo_bra_, false);
+            f.reconstruct(false);
+            reconstruct(world, mo_ket_, true);
+            std::vector<Function<T,NDIM>> projected;
+            for (const auto& i : mo_bra_) {
+                projected.push_back(f.project_out(i,particle));
+            }
+            if (particle==0) return std::make_pair(mo_ket_,projected);
+            else if (particle==1) return std::make_pair(projected,mo_ket_);
+            else {
+                MADNESS_EXCEPTION("confused particles in Projector::get_vector_for_outer_products",1);
             }
-            return result;
         }
 
         template<typename argT>
@@ -162,10 +192,17 @@ namespace madness {
         QProjector() = default;
 
         /// constructor with symmetric bra and ket spaces
-        QProjector(World& world, const vecfuncT& amo) : O(amo) {};
+        [[deprecated]] QProjector(World& world, const vecfuncT& amo) : O(amo) {};
+
+        /// constructor with asymmetric bra and ket spaces
+        [[deprecated]] QProjector(World& world, const vecfuncT& bra, const vecfuncT& ket)
+            : O(bra,ket) {};
+
+        /// constructor with symmetric bra and ket spaces
+        QProjector(const vecfuncT& amo) : O(amo) {};
 
         /// constructor with asymmetric bra and ket spaces
-        QProjector(World& world, const vecfuncT& bra, const vecfuncT& ket)
+        QProjector(const vecfuncT& bra, const vecfuncT& ket)
             : O(bra,ket) {};
 
         /// copy ctor
@@ -173,6 +210,14 @@ namespace madness {
 
         std::string type() const override {return "QProjector";}
 
+        void set_spaces(const vecfuncT& p) {
+            O.set_spaces(p);
+        }
+
+        void set_spaces(const vecfuncT& bra, const vecfuncT& ket) {
+            O.set_spaces(bra,ket);
+        }
+
         Function<T,NDIM> operator()(const Function<T,NDIM>& rhs) const {
             return (rhs-O(rhs)).truncate();
         }
@@ -184,7 +229,7 @@ namespace madness {
             return result;
         }
 
-        Function<T,2*NDIM> operator()(const Function<T,2*NDIM>& f, const size_t particle) const {
+        Function<T,2*NDIM> operator()(const Function<T,2*NDIM>& f, const size_t particle=-1) const {
             return f-O(f,particle);
         }
 
@@ -200,8 +245,12 @@ namespace madness {
         Projector<T,NDIM> get_P_projector() const {return O;}
 
         void set_particle(const int p) override {
-            particle=p;
             O.set_particle(p);
+            particle=p;
+        }
+
+        int get_particle() const override {
+            return O.get_particle();
         }
 
     private:
@@ -249,6 +298,8 @@ namespace madness {
     		bra1_=bra1;
     		ket2_=ket2;
     		bra2_=bra2;
+            MADNESS_CHECK_THROW(ket1.size()==bra1.size(), "bra1 and ket1 spaces must have the same size in SOprojector");
+            MADNESS_CHECK_THROW(ket2.size()==bra2.size(), "bra2 and ket2 spaces must have the same size in SOprojector");
     	}
 
     	/// return the orbital space for the ket of particle 1
@@ -367,6 +418,45 @@ namespace madness {
         std::vector<Function<T,NDIM> > ket1_, bra1_, ket2_, bra2_;
 
     };
+
+
+    /// an outer product of two projectors
+    template<typename projT, typename projQ>
+    class OuterProjector : public ProjectorBase {
+        projT projector0;
+        projQ projector1;
+    public:
+
+        OuterProjector() = default;
+        OuterProjector(const projT& p0, const projQ& p1) : projector0(p0), projector1(p1) {
+            static_assert(std::is_base_of<ProjectorBase,projT>::value, "projT must be a ProjectorBase");
+            static_assert(std::is_base_of<ProjectorBase,projQ>::value, "projQ must be a ProjectorBase");
+            projector0.set_particle(0);
+            projector1.set_particle(1);
+        }
+
+        std::string type() const override {
+            return "OuterProjector";
+        }
+
+        template<typename resultT>
+        resultT operator()(const resultT& argument) const {
+
+            if (projector0.type()=="PProjector") return projector1(projector0(argument));
+            return projector0(projector1(argument));
+        }
+    };
+
+//    template<typename projT, typename projQ>
+//    OuterProjector<projT, projQ> outer(const projT& p0 , const projQ& p1) {
+//        return OuterProjector<projT, projQ>(p0, p1);
+//    }
+
+    template<typename projT, typename projQ>
+    typename std::enable_if<std::is_base_of<ProjectorBase,projT>::value, OuterProjector<projT,projQ>>::type
+    outer(const projT& p0 , const projQ& p1) {
+        return OuterProjector<projT, projQ>(p0, p1);
+    }
 }
 
 #endif /* PROJECTOR_H_ */
diff --git a/src/madness/chem/test_ccpairfunction.cc b/src/madness/chem/test_ccpairfunction.cc
index 862c8613e4a..5e9682d86b0 100644
--- a/src/madness/chem/test_ccpairfunction.cc
+++ b/src/madness/chem/test_ccpairfunction.cc
@@ -195,6 +195,31 @@ int test_constructor(World& world, std::shared_ptr<NuclearCorrelationFactor> ncf
     return t1.end();
 }
 
+template<typename T, std::size_t NDIM>
+int test_norm(World& world, std::shared_ptr<NuclearCorrelationFactor> ncf, data<T,NDIM>& data,
+                     const CCParameters& parameter) {
+    test_output t1("norm of <T,"+std::to_string(NDIM)+">");
+
+    auto [p1,p2,p3,p4,p5]=data.get_ccpairfunctions();  // p2-p5 correspond to f230
+    for (const CCPairFunction<T,NDIM>& p : {p2,p3,p4,p5}) {
+        double n=p.norm2();
+        print("norm of ",p.name(),n);
+        double n1=sqrt(inner(p,p));
+        print("inner",n1);
+        t1.checkpoint(n,n1,FunctionDefaults<NDIM>::get_thresh(),"norm of p");
+    }
+
+    double n2=p2.norm2();
+    double n3=p3.norm2();
+    double n4=p4.norm2();
+    double n5=p5.norm2();
+    t1.checkpoint(n2,n4,FunctionDefaults<NDIM>::get_thresh(),"norm of p2/4");
+    t1.checkpoint(n3,n5,FunctionDefaults<NDIM>::get_thresh(),"norm of p3/5");
+
+    return t1.end();
+
+}
+
 template<typename T, std::size_t NDIM>
 int test_load_store(World& world, std::shared_ptr<NuclearCorrelationFactor> ncf, data<T,NDIM>& data,
                      const CCParameters& parameter) {
@@ -1133,7 +1158,7 @@ int test_projector(World& world, std::shared_ptr<NuclearCorrelationFactor> ncf,
     std::vector<CCPairFunction<T,NDIM>> vp3({p3});
 
     Projector<T,LDIM> O(o,o);
-    QProjector<T,LDIM> Q(world,o,o);
+    QProjector<T,LDIM> Q(o,o);
     StrongOrthogonalityProjector<T,LDIM> Q12(world);
     Q12.set_spaces(o);
 
@@ -1151,7 +1176,10 @@ int test_projector(World& world, std::shared_ptr<NuclearCorrelationFactor> ncf,
         O.set_particle(0);
         {
             double ref=inner({CCPairFunction<T,NDIM>({of1},{f2})},vp[i]);
-            double result=inner({CCPairFunction<T,NDIM>({f1},{f2})},O(vp[i]));
+            auto tmp=O(vp[i]);
+            t1.checkpoint(tmp.size()==1,"vector size correct");
+            t1.checkpoint(tmp[0].is_decomposed(),"O(argument) is decomposed");
+            double result=inner({CCPairFunction<T,NDIM>({f1},{f2})},tmp);
             t1.checkpoint(result,ref,thresh,"O1 p"+std::to_string(i));
         }
 
@@ -1159,7 +1187,10 @@ int test_projector(World& world, std::shared_ptr<NuclearCorrelationFactor> ncf,
         O.set_particle(1);
         {
             double ref=inner({CCPairFunction<T,NDIM>({f1},{of2})},vp[i]);
-            double result=inner({CCPairFunction<T,NDIM>({f1},{f2})},O(vp[i]));
+            auto tmp=O(vp[i]);
+            t1.checkpoint(tmp.size()==1,"vector size correct");
+            t1.checkpoint(tmp[0].is_decomposed(),"O(argument) is decomposed");
+            double result=inner({CCPairFunction<T,NDIM>({f1},{f2})},tmp);
             t1.checkpoint(result,ref,thresh,"O2 p"+std::to_string(i));
         }
         // Q1
@@ -1315,20 +1346,21 @@ int main(int argc, char **argv) {
         auto data4=data<double,4>(world,ccparam);
         auto data6=data<double,6>(world,ccparam);
 
-//        isuccess+=test_constructor<double,2>(world, ncf, data2, ccparam);
-//        isuccess+=test_load_store<double,2>(world,ncf,data2,ccparam);
-//        isuccess+=test_operator_apply<double,2>(world, ncf, data2, ccparam);
-//        isuccess+=test_transformations<double,2>(world, ncf, data2, ccparam);
-//        isuccess+=test_multiply_with_f12<double,2>(world, ncf, data2, ccparam);
-//        isuccess+=test_inner<double,2>(world, ncf, data2, ccparam);
-//        isuccess+=test_multiply<double,2>(world, ncf, data2, ccparam);
-//        isuccess+=test_swap_particles<double,2>(world, ncf, data2, ccparam);
-//        isuccess+=test_scalar_multiplication<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_constructor<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_load_store<double,2>(world,ncf,data2,ccparam);
+        isuccess+=test_operator_apply<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_norm<double,2>(world,ncf,data2,ccparam);
+        isuccess+=test_transformations<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_multiply_with_f12<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_inner<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_multiply<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_swap_particles<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_scalar_multiplication<double,2>(world, ncf, data2, ccparam);
         isuccess+=test_projector<double,2>(world, ncf, data2, ccparam);
-        // isuccess+=test_partial_inner_3d<double,2>(world, ncf, data2, ccparam);
-        // isuccess+=test_partial_inner_6d<double,2>(world, ncf, data2, ccparam);
-        // isuccess+=test_apply<double,2>(world, ncf, data2, ccparam);
-        // isuccess+=test_consolidate<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_partial_inner_3d<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_partial_inner_6d<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_apply<double,2>(world, ncf, data2, ccparam);
+        isuccess+=test_consolidate<double,2>(world, ncf, data2, ccparam);
 
 
 //        isuccess+=test_constructor<double,4>(world, ncf, data4, ccparam);
diff --git a/src/madness/chem/test_projector.cc b/src/madness/chem/test_projector.cc
index 1cf44fd8147..5aff5b528a4 100644
--- a/src/madness/chem/test_projector.cc
+++ b/src/madness/chem/test_projector.cc
@@ -70,6 +70,44 @@ int test_projector(World& world) {
 
 
 
+    return t1.end();
+}
+
+template<typename T, std::size_t NDIM>
+int test_projector_outer(World& world) {
+    test_output t1("testing projector_outer for dimension " + std::to_string(NDIM));
+    constexpr std::size_t LDIM=NDIM/2;
+    static_assert(2*LDIM==NDIM);
+
+    auto g1=[](const Vector<double,LDIM>& r){return exp(-inner(r,r));};
+    auto g_hidim=[](const Vector<double,NDIM>& r){return 2.0*exp(-3.0*inner(r,r));};
+    Function<double,LDIM> f1=FunctionFactory<double,LDIM>(world).f(g1);
+    Function<double,NDIM> f_hidim=FunctionFactory<double,NDIM>(world).f(g_hidim);
+
+
+    // compare explicit SO projector Q12 and outer product projector Q1Q2
+    StrongOrthogonalityProjector<double,LDIM> Q1(world);
+    Q1.set_spaces({f1});
+
+    QProjector<double,LDIM> q(world,{f1});
+    auto Q2=outer(q,q);
+
+    auto Q1f=Q1(f_hidim);
+    auto Q2f=Q2(f_hidim);
+    double err=(Q1f-Q2f).norm2();
+    print("error",err);
+    double norm1=Q1f.norm2();
+    double norm2=Q2f.norm2();
+    print("norm1/2",norm1,norm2);
+    double trace1=Q1f.trace();
+    double trace2=Q2f.trace();
+    print("trace1/2",trace1,trace2);
+
+    t1.checkpoint(norm1-norm2,FunctionDefaults<NDIM>::get_thresh(),"Q1 direct and Q2 outer are the same");
+    t1.checkpoint(trace1-trace2,FunctionDefaults<NDIM>::get_thresh(),"Q1 direct and Q2 outer are the same");
+    // loosen threshold due to outer product
+    t1.checkpoint(err,FunctionDefaults<NDIM>::get_thresh()*3.0,"Q1 direct and Q2 outer are the same");
+
     return t1.end();
 }
 
@@ -128,8 +166,8 @@ int test_Q12_projector(World& world) {
     // SO(f) = f - O1(f) - O2(f) + O1O2(f)
     Projector<T,LDIM> O1(vphi);
     Projector<T,LDIM> O2(vphi);
-    O1.set_particle(1);
-    O2.set_particle(2);
+    O1.set_particle(0);
+    O2.set_particle(1);
     Function<T,NDIM> f3=f-O1(f)-O2(f)+O1(O2(f));
     double err1=(f1-f3).norm2()/f.norm2();
     print("err1",err1);
@@ -191,6 +229,8 @@ int main(int argc, char**argv) {
         error+=test_projector<double,3>(world);
         error+=test_projector<double,4>(world);
 
+        error+=test_projector_outer<double,2>(world);
+
         if (HAVE_GENTENSOR) {
             error+=test_Q12_projector<double,2>(world);
             error+=test_Q12_projector<double,4>(world);
diff --git a/src/madness/chem/zcis.h b/src/madness/chem/zcis.h
index aee71bfe1ae..6899e937cb2 100644
--- a/src/madness/chem/zcis.h
+++ b/src/madness/chem/zcis.h
@@ -126,7 +126,7 @@ class Zcis : public QCPropertyInterface {
 
 
 	Zcis(World& w, const commandlineparser& parser, std::shared_ptr<Znemo> n) : world(w), cis_param(world, parser), nemo(n),
-		Qa(world,nemo->amo,nemo->amo), Qb(world,nemo->bmo,nemo->bmo) {
+		Qa(nemo->amo,nemo->amo), Qb(nemo->bmo,nemo->bmo) {
 		cis_param.print("response","end");
 		print("Qa projector",Qa.get_ket_vector().size());
 		print("Qb projector",Qb.get_ket_vector().size());
diff --git a/src/madness/misc/CMakeLists.txt b/src/madness/misc/CMakeLists.txt
index f825dcfa6a3..ff5f0a7219b 100644
--- a/src/madness/misc/CMakeLists.txt
+++ b/src/madness/misc/CMakeLists.txt
@@ -2,7 +2,7 @@
 
 set(MADMISC_HEADERS misc.h ran.h phandler.h interpolation_1d.h cfft.h info.h gnuplot.h)
 set(MADMISC_SOURCES
-    checksum_file.cc position_stream.cc gprofexit.cc ran.cc cfft.cc info.cc)
+    checksum_file.cc position_stream.cc gprofexit.cc ran.cc cfft.cc info.cc unique_filename.cc)
 # retrieve git metadata
 include(GetGitMetadata)
 vgkit_cmake_git_metadata()
diff --git a/src/madness/mra/QCCalculationParametersBase.h b/src/madness/mra/QCCalculationParametersBase.h
index 9d6d7fbb951..b563911bd89 100644
--- a/src/madness/mra/QCCalculationParametersBase.h
+++ b/src/madness/mra/QCCalculationParametersBase.h
@@ -388,7 +388,7 @@ class QCCalculationParametersBase {
 		std::transform(key_lower.begin(), key_lower.end(), key_lower.begin(), ::tolower);
 		std::transform(svalue.begin(), svalue.end(), svalue.begin(), ::tolower);
 		std::vector<std::string> av_lower_vec;
-		for (auto av : allowed_values) {
+		for (const T& av : allowed_values) {
 			std::string av_lower=tostring(av);
 			std::transform(av_lower.begin(), av_lower.end(), av_lower.begin(), ::tolower);
 			av_lower_vec.push_back(av_lower);
@@ -598,6 +598,7 @@ class QCCalculationParametersBase {
 	static std::string tostring(const T& arg) {
                 using madness::operators::operator<<;
 		std::ostringstream ss;
+		static_assert(not std::is_same<T,bool>::value, "you need to specialize tostring for this type");
 
 		ss<<std::scientific  << std::setprecision(4) << arg;
 		std::string str=ss.str();
diff --git a/src/madness/mra/macrotaskq.h b/src/madness/mra/macrotaskq.h
index 611f2f5e6ff..d4210e75ef2 100644
--- a/src/madness/mra/macrotaskq.h
+++ b/src/madness/mra/macrotaskq.h
@@ -303,9 +303,11 @@ class MacroTaskQ : public WorldObject< MacroTaskQ> {
 			print("redirecting output to files task.#####");
 		}
 
-
+		double cpu0=cpu_time();
 		cloud.replicate();
         universe.gop.fence();
+		double cpu1=cpu_time();
+		if (printtimings()) print("cloud replication wall time",cpu1-cpu0);
         if (printdebug()) cloud.print_size(universe);
         universe.gop.set_forbid_fence(true); // make sure there are no hidden universe fences
         pmap1=FunctionDefaults<1>::get_pmap();
@@ -363,6 +365,7 @@ class MacroTaskQ : public WorldObject< MacroTaskQ> {
 		// cleanup task-persistent input data
 		for (auto& task : taskq) task->cleanup();
 		cloud.clear_cache(subworld);
+		cloud.clear();
 		subworld.gop.fence();
         subworld.gop.fence();
         universe.gop.fence();
@@ -503,7 +506,7 @@ class MacroTask {
 
     /// constructor takes the actual task
     MacroTask(World &world, taskT &task, std::shared_ptr<MacroTaskQ> taskq_ptr = 0)
-            : task(task), world(world), taskq_ptr(taskq_ptr) {
+            : task(task), name(task.name), world(world), taskq_ptr(taskq_ptr) {
         if (taskq_ptr) {
             // for the time being this condition must hold because tasks are
             // constructed as replicated objects and are not broadcast to other processes
@@ -638,7 +641,14 @@ class MacroTask {
             const argtupleT argtuple = cloud.load<argtupleT>(subworld, inputrecords);
             const argtupleT batched_argtuple = task.batch.template copy_input_batch(argtuple);
         	try {
+			    print("starting task no",element, "in subworld",subworld.id(),"at time",wall_time());
+        	    double cpu0=cpu_time();
         		resultT result_tmp = std::apply(task, batched_argtuple);
+        	    double cpu1=cpu_time();
+			    std::size_t bufsize=256;
+			    char buffer[bufsize];
+		    	std::snprintf(buffer,bufsize,"completed task %3ld after %6.1fs at time %6.1fs\n",element,cpu1-cpu0,wall_time());
+        		print(std::string(buffer));
 
         		resultT result = get_output(subworld, cloud, argtuple);       // lives in the universe
         		if constexpr (is_madness_function<resultT>::value) {
@@ -706,6 +716,7 @@ class MacroTask {
 class MacroTaskOperationBase {
 public:
     Batch batch;
+	std::string name="unknown_task";
     std::shared_ptr<MacroTaskPartitioner> partitioner=0;
     MacroTaskOperationBase() : batch(Batch(_, _, _)), partitioner(new MacroTaskPartitioner) {}
 };
diff --git a/src/madness/mra/mra.h b/src/madness/mra/mra.h
index a34d2edd3a5..e14be5fb59d 100644
--- a/src/madness/mra/mra.h
+++ b/src/madness/mra/mra.h
@@ -496,8 +496,11 @@ namespace madness {
 
         /// print some info about this
         void print_size(const std::string name) const {
-            if (!impl) print("function",name,"not assigned yet");
-            impl->print_size(name);
+            if (!impl) {
+                print("function",name,"not assigned yet");
+            } else {
+                impl->print_size(name);
+            }
         }
 
         /// Returns the maximum depth of the function tree ... collective global sum
@@ -2101,8 +2104,10 @@ namespace madness {
             result.get_impl()->recursive_apply(op, f1[i].get_impl().get(),f2[i].get_impl().get(),false);
         world.gop.fence();
 
-        result.get_impl()->print_timer();
-        op.print_timer();
+        if (op.print_timings) {
+            result.get_impl()->print_timer();
+            op.print_timer();
+        }
 
 		result.get_impl()->finalize_apply();	// need fence before reconstruct
 
diff --git a/src/madness/mra/test_cloud.cc b/src/madness/mra/test_cloud.cc
index b421e8b1efa..3b2027559ec 100644
--- a/src/madness/mra/test_cloud.cc
+++ b/src/madness/mra/test_cloud.cc
@@ -31,6 +31,43 @@ struct gaussian {
     }
 };
 
+
+/// this class stores different member variables in different records of the cloud
+class custom_serialize_tester {
+public:
+    int i;
+    double d;
+
+    custom_serialize_tester() : i(0), d(0.0) {}
+    bool operator==(const custom_serialize_tester& other) const {
+        return i == other.i && d == other.d;
+    }
+
+    /// customized function to store this to the cloud
+
+    /// functions and constant_part can be very large and we want to split them and store them in differenc records
+    Recordlist<Cloud::keyT> cloud_store(World& world, Cloud& cloud) const {
+        // save bookkeeping stuff in a vector
+        std::vector<unsigned char> v;
+        archive::VectorOutputArchive arout(v);
+        arout &  i;
+
+        Recordlist<Cloud::keyT> records;
+        records+=cloud.store(world,v);
+        records+=cloud.store(world,d);
+        return records;
+    }
+
+    void cloud_load(World& world, const Cloud& cloud, Recordlist<Cloud::keyT>& recordlist) {
+        std::vector<unsigned char> v=cloud.forward_load<std::vector<unsigned char>>(world,recordlist);
+        archive::VectorInputArchive arin(v);
+        arin & i;
+        d=cloud.forward_load<double>(world,recordlist);
+    }
+
+
+};
+
 template<typename T>
 double norm(const T i1) { return fabs(i1); }
 
@@ -157,6 +194,35 @@ int test_custom_worldobject(World& universe, World& subworld, Cloud& cloud) {
     double error=d1-d2;
     cloud.set_force_load_from_cache(false);
     return t1.end(error < 1.e-10 );
+}
+
+int test_custom_serialization(World& universe, Cloud& cloud) {
+    test_output t1("testing custom serialization");
+    t1.set_cout_to_terminal();
+    cloud.set_debug(true);
+    custom_serialize_tester cst;
+    cst.i=1;
+    cst.d=2.0;
+    static_assert(Cloud::has_cloud_serialize<custom_serialize_tester>::value,"custom_serialize_tester must have a cloud_serialize method");
+    {
+        auto records = cloud.store(universe, cst);
+        auto cst2=cloud.load<custom_serialize_tester>(universe, records);
+        t1.checkpoint(cst==cst2,"custom serialization");
+    }
+
+    // test being part of a tuple
+    typedef std::tuple<int,double,custom_serialize_tester> tupleT;
+    tupleT tuple1=std::make_tuple(1,2.0,cst);
+    cloud.clear();
+    {
+        auto records = cloud.store(universe, tuple1);
+        auto tuple2=cloud.load<tupleT>(universe, records);
+
+        t1.checkpoint(tuple1==tuple2,"custom serialization with tuple");
+    }
+
+    return t1.end();
+
 
 }
 
@@ -177,6 +243,7 @@ int main(int argc, char **argv) {
 
         // test storing custom WorldObject
         success += test_custom_worldobject(universe, subworld, cloud);
+        success += test_custom_serialization(universe, cloud);
 
         if (universe.rank() == 0) print("entering test_cloud");
         print("my world: universe_rank, subworld_id", universe.rank(), subworld.id());
diff --git a/src/madness/tensor/srconf.h b/src/madness/tensor/srconf.h
index 9ecb4858b48..6e5797c60d8 100644
--- a/src/madness/tensor/srconf.h
+++ b/src/madness/tensor/srconf.h
@@ -663,7 +663,7 @@ namespace madness {
 	public:
 		/// return the number of physical dimensions
 		int dim_per_vector(int idim) const {
-			MADNESS_ASSERT(vector_.size()>idim);
+                    MADNESS_ASSERT(vector_.size()>size_t(idim));
 			return vector_[idim].ndim()-1;		// remove dimension for the rank
 		}
 
diff --git a/src/madness/world/cloud.h b/src/madness/world/cloud.h
index ae05897c9f8..f5638351863 100644
--- a/src/madness/world/cloud.h
+++ b/src/madness/world/cloud.h
@@ -68,6 +68,13 @@ struct Recordlist {
     template <typename T>
     using has_member_id = madness::meta::is_detected<member_id_t, T>;
 
+    // if type provides a hashing function use that, intrusive hashing, see worldhash.h
+    template <typename T>
+    using member_hash_t = decltype(std::declval<T>().hash());
+
+    template <typename T>
+    using has_member_hash = madness::meta::is_detected<member_hash_t, T>;
+
     template<typename T, std::size_t NDIM>
     static keyT compute_record(const Function<T,NDIM>& arg) {return hash_value(arg.get_impl()->id());}
 
@@ -99,7 +106,10 @@ struct Recordlist {
         } else if constexpr (std::is_pointer_v<T> && has_member_id<std::remove_pointer_t<T>>::value) {
             return hash_value(arg->id());
         } else {
-            return hash_value(arg);
+            // compute hash_code for fundamental types
+            std::size_t hashtype = typeid(T).hash_code();
+            hash_combine(hashtype,hash_value(arg));
+            return hashtype;
         }
     }
 
@@ -154,6 +164,13 @@ class Cloud {
     cacheT cached_objects;
     recordlistT local_list_of_container_keys;   // a world-local list of keys occupied in container
 
+public:
+    template <typename T>
+    using member_cloud_serialize_t = decltype(std::declval<T>().cloud_store(std::declval<World&>(), std::declval<Cloud&>()));
+
+    template <typename T>
+    using has_cloud_serialize = madness::meta::is_detected<member_cloud_serialize_t, T>;
+
 public:
 
     /// @param[in]	universe	the universe world
@@ -176,12 +193,17 @@ class Cloud {
     void print_size(World& universe) {
 
         std::size_t memsize=0;
-        for (auto& item : container) memsize+=item.second.size();
+        std::size_t max_record_size=0;
+        for (auto& item : container) {
+            memsize+=item.second.size();
+            max_record_size=std::max(max_record_size,item.second.size());
+        }
         std::size_t global_memsize=memsize;
         std::size_t max_memsize=memsize;
         std::size_t min_memsize=memsize;
         universe.gop.sum(global_memsize);
         universe.gop.max(max_memsize);
+        universe.gop.max(max_record_size);
         universe.gop.min(min_memsize);
 
         auto local_size=container.size();
@@ -193,25 +215,25 @@ class Cloud {
             print("Cloud memory:");
             print("  replicated:",is_replicated);
             print("size of cloud (total)");
-            print("  number of records:",global_size);
-            print("  memory in GBytes: ",global_memsize*byte2gbyte);
+            print("  number of records:        ",global_size);
+            print("  memory in GBytes:         ",global_memsize*byte2gbyte);
             print("size of cloud (average per node)");
-            print("  number of records:",double(global_size)/universe.size());
-            print("  memory in GBytes: ",global_memsize*byte2gbyte/universe.size());
+            print("  number of records:        ",double(global_size)/universe.size());
+            print("  memory in GBytes:         ",global_memsize*byte2gbyte/universe.size());
             print("min/max of node");
-            print("  memory in GBytes: ",min_memsize*byte2gbyte,max_memsize*byte2gbyte);
+            print("  memory in GBytes:         ",min_memsize*byte2gbyte,max_memsize*byte2gbyte);
+            print("  max record size in GBytes:",max_record_size*byte2gbyte);
+
         }
     }
 
     void print_timings(World &universe) const {
         double rtime = double(reading_time);
         double wtime = double(writing_time);
-        double wtime1 = double(writing_time1);
         double ptime = double(replication_time);
-        universe.gop.max(rtime);
-        universe.gop.max(wtime);
-        universe.gop.max(wtime1);
-        universe.gop.max(ptime);
+        universe.gop.sum(rtime);
+        universe.gop.sum(wtime);
+        universe.gop.sum(ptime);
         long creads = long(cache_reads);
         long cstores = long(cache_stores);
         universe.gop.sum(creads);
@@ -219,10 +241,9 @@ class Cloud {
         if (universe.rank() == 0) {
             auto precision = std::cout.precision();
             std::cout << std::fixed << std::setprecision(1);
-            print("cloud storing wall time", wtime * 0.001);
-            print("cloud storing wall time inner loop", wtime1 * 0.001);
-            print("cloud replication wall time", ptime * 0.001);
-            print("cloud reading wall time", rtime * 0.001, std::defaultfloat);
+            print("cloud storing cpu time", wtime * 0.001);
+            print("cloud replication cpu time", ptime * 0.001);
+            print("cloud reading cpu time", rtime * 0.001, std::defaultfloat);
             std::cout << std::setprecision(precision) << std::scientific;
             print("cloud cache stores    ", long(cstores));
             print("cloud cache loads     ", long(creads));
@@ -234,6 +255,10 @@ class Cloud {
         subworld.gop.fence();
     }
 
+    void clear() {
+        container.clear();
+    }
+
     void clear_timings() {
         reading_time=0l;
         writing_time=0l;
@@ -249,10 +274,26 @@ class Cloud {
     T load(madness::World &world, const recordlistT recordlist) const {
         recordlistT rlist = recordlist;
         cloudtimer t(world, reading_time);
+
+        // forward_load will consume the recordlist while loading elements
+        return forward_load<T>(world, rlist);
+    }
+
+    /// load a single object from the cloud, recordlist is consumed while loading elements
+    template<typename T>
+    T forward_load(madness::World &world, recordlistT& recordlist) const {
+        // different objects are stored in different ways
+        // - tuples are split up into their components
+        // - classes with their own cloud serialization are stored using that
+        // - everything else is stored using their usual serialization
         if constexpr (is_tuple<T>::value) {
-            return load_tuple<T>(world, rlist);
+            return load_tuple<T>(world, recordlist);
+        } else if constexpr (has_cloud_serialize<T>::value) {
+            T target = allocator<T>(world);
+            target.cloud_load(world, *this, recordlist);
+            return target;
         } else {
-            return load_other<T>(world, rlist);
+            return do_load<T>(world, recordlist);
         }
     }
 
@@ -264,9 +305,16 @@ class Cloud {
             MADNESS_EXCEPTION("cloud error",1);
         }
         cloudtimer t(world,writing_time);
+
+        // different objects are stored in different ways
+        // - tuples are split up into their components
+        // - classes with their own cloud serialization are stored using that
+        // - everything else is stored using their usual serialization
         recordlistT recordlist;
         if constexpr (is_tuple<T>::value) {
             recordlist+=store_tuple(world,source);
+        } else if constexpr (has_cloud_serialize<T>::value) {
+            recordlist+=source.cloud_store(world,*this);
         } else {
             recordlist+=store_other(world,source);
         }
@@ -371,17 +419,16 @@ class Cloud {
         }
     };
 
-
     template<typename T>
     void cache(madness::World &world, const T &obj, const keyT &record) const {
         const_cast<cacheT &>(cached_objects).insert({record,std::make_any<T>(obj)});
     }
 
+    /// load an object from the cache, record is unchanged
     template<typename T>
     T load_from_cache(madness::World &world, const keyT &record) const {
         if (world.rank()==0) cache_reads++;
         if (debug) print("loading", typeid(T).name(), "from cache record", record, "to world", world.id());
-//        if (auto obj = std::get_if<T>(&cached_objects.find(record)->second)) return *obj;
         if (auto obj = std::any_cast<T>(&cached_objects.find(record)->second)) return *obj;
         MADNESS_EXCEPTION("failed to load from cloud-cache", 1);
         T target = allocator<T>(world);
@@ -417,7 +464,6 @@ class Cloud {
         bool is_already_present= is_in_container(record);
         if (debug) {
             if (is_already_present) std::cout << "skipping ";
-            std::string msg;
             if constexpr (Recordlist<keyT>::has_member_id<T>::value) {
                 std::cout << "storing world object of " << typeid(T).name() << "id " << source.id() << " to record " << record << std::endl;
             }
@@ -438,20 +484,29 @@ class Cloud {
         return recordlistT{record};
     }
 
+public:
+    /// load a vector from the cloud, pop records from recordlist
+    ///
+    /// @param[inout]    world	destination world
+    /// @param[inout]    recordlist	list of records to load from (reduced by the first few elements)
     template<typename T>
     typename std::enable_if<is_vector<T>::value, T>::type
-    load_other(World &world, recordlistT &recordlist) const {
-        std::size_t sz = load_other<std::size_t>(world, recordlist);
+    do_load(World &world, recordlistT &recordlist) const {
+        std::size_t sz = do_load<std::size_t>(world, recordlist);
         T target(sz);
         for (std::size_t i = 0; i < sz; ++i) {
-            target[i] = load_other<typename T::value_type>(world, recordlist);
+            target[i] = do_load<typename T::value_type>(world, recordlist);
         }
         return target;
     }
 
+    /// load a single object from the cloud, pop record from recordlist
+    ///
+    /// @param[inout]    world	destination world
+    /// @param[inout]    recordlist	list of records to load from (reduced by the first element)
     template<typename T>
     typename std::enable_if<!is_vector<T>::value, T>::type
-    load_other(World &world, recordlistT &recordlist) const {
+    do_load(World &world, recordlistT &recordlist) const {
         keyT record = recordlist.pop_front_and_return();
         if (force_load_from_cache) MADNESS_CHECK(is_cached(record));
 
@@ -465,6 +520,8 @@ class Cloud {
         return target;
     }
 
+public:
+
     // overloaded
     template<typename T>
     recordlistT store_other(madness::World& world, const std::vector<T>& source) {
@@ -491,12 +548,16 @@ class Cloud {
         return v;
     }
 
+    /// load a tuple from the cloud, pop records from recordlist
+    ///
+    /// @param[inout]    world	destination world
+    /// @param[inout]    recordlist	list of records to load from (reduced by the first few elements)
     template<typename T>
     T load_tuple(madness::World &world, recordlistT &recordlist) const {
         if (debug) std::cout << "loading tuple of type " << typeid(T).name() << " to world " << world.id() << std::endl;
         T target;
         std::apply([&](auto &&... args) {
-            ((args = load_other<typename std::remove_reference<decltype(args)>::type>(world, recordlist)), ...);
+            ((args = forward_load<typename std::remove_reference<decltype(args)>::type>(world, recordlist)), ...);
         }, target);
         return target;
     }
diff --git a/src/madness/world/parallel_archive.h b/src/madness/world/parallel_archive.h
index bcfedf9e2af..45cb9c8a61c 100644
--- a/src/madness/world/parallel_archive.h
+++ b/src/madness/world/parallel_archive.h
@@ -104,7 +104,7 @@ namespace madness {
 
             /// \return The process doing I/O for this node.
             ProcessID my_io_node() const {
-                MADNESS_ASSERT(world);
+                MADNESS_CHECK(world);
                 return io_node(world->rank());
             }
 
@@ -112,7 +112,7 @@ namespace madness {
 
             /// \return The number of I/O clients for this node, including self (zero if not an I/O node).
             int num_io_clients() const {
-                MADNESS_ASSERT(world);
+                MADNESS_CHECK(world);
                 return nclient;
             }
 
@@ -120,7 +120,7 @@ namespace madness {
 
             /// \return True if this node is doing physical I/O.
             bool is_io_node() const {
-                MADNESS_ASSERT(world);
+                MADNESS_CHECK(world);
                 return world->rank() == my_io_node();
             }
 
@@ -128,7 +128,7 @@ namespace madness {
 
             /// \return A pointer to the world.
             World* get_world() const {
-                MADNESS_ASSERT(world);
+                MADNESS_CHECK(world);
                 return world;
             }
 
@@ -166,12 +166,12 @@ namespace madness {
                 if (nio > maxio) nio = maxio; // Sanity?
                 if (nio > world.size()) nio = world.size();
 
-                MADNESS_ASSERT(filename);
-                MADNESS_ASSERT(strlen(filename)-1<sizeof(fname));
+                MADNESS_CHECK(filename);
+                MADNESS_CHECK(strlen(filename)-1<sizeof(fname));
                 strcpy(fname,filename); // Save the filename for later
-                constexpr std::size_t bufsize=256;
+                constexpr std::size_t bufsize=512;
                 char buf[bufsize];
-                MADNESS_ASSERT(strlen(filename)+7 <= sizeof(buf));
+                MADNESS_CHECK(strlen(filename)+7 <= sizeof(buf));
                 snprintf(buf, bufsize, "%s.%5.5d", filename, world.rank());
 
                 // if file doesn't exist we have a race condition if this code is handled by a try/catch block
@@ -183,7 +183,7 @@ namespace madness {
                 if (world.rank() == 0) {
                     ar.open(buf);
                     ar & nio; // read/write nio from/to the archive
-                    MADNESS_ASSERT(nio <= world.size());
+                    MADNESS_CHECK(nio <= world.size());
                 }
 
                 // Ensure all agree on value of nio that may also have changed if reading
@@ -222,9 +222,9 @@ namespace madness {
             typename std::enable_if_t<std::is_same<X,BinaryFstreamInputArchive>::value || std::is_same<X,BinaryFstreamOutputArchive>::value,
                                       bool>
             exists(World& world, const char* filename) {
-                constexpr std::size_t bufsize=256;
+                constexpr std::size_t bufsize=512;
                 char buf[bufsize];
-                MADNESS_ASSERT(strlen(filename)+7 <= sizeof(buf));
+                MADNESS_CHECK(strlen(filename)+7 <= sizeof(buf));
                 snprintf(buf,bufsize, "%s.%5.5d", filename, world.rank());
                 bool status;
                 if (world.rank() == 0)
@@ -237,7 +237,7 @@ namespace madness {
 
             /// Closes the parallel archive.
             void close() {
-                MADNESS_ASSERT(world);
+                MADNESS_CHECK(world);
                 if (is_io_node()) ar.close();
             }
 
@@ -246,8 +246,8 @@ namespace madness {
             /// \throw MadnessException If not an I/O node.
             /// \return A reference to the local archive.
             Archive& local_archive() const {
-                MADNESS_ASSERT(world);
-                MADNESS_ASSERT(is_io_node());
+                MADNESS_CHECK(world);
+                MADNESS_CHECK(is_io_node());
                 return ar;
             }
 
@@ -273,9 +273,9 @@ namespace madness {
                                       void>
             remove(World& world, const char* filename) {
                 if (world.rank() == 0) {
-                    constexpr std::size_t bufsize=268;
+                    constexpr std::size_t bufsize=512;
                     char buf[bufsize];
-                    MADNESS_ASSERT(strlen(filename)+7 <= sizeof(buf));
+                    MADNESS_CHECK(strlen(filename)+7 <= sizeof(buf));
                     for (ProcessID p=0; p<world.size(); ++p) {
                         snprintf(buf,bufsize, "%s.%5.5d", filename, p);
                         if (::remove(buf)) break;
@@ -285,7 +285,7 @@ namespace madness {
 
             /// Removes the files associated with the current archive.
             void remove() {
-                MADNESS_ASSERT(world);
+                MADNESS_CHECK(world);
                 remove(*world, fname);
             }
 
diff --git a/src/madness/world/parallel_dc_archive.h b/src/madness/world/parallel_dc_archive.h
index d178d26cba3..4b4c42593f8 100644
--- a/src/madness/world/parallel_dc_archive.h
+++ b/src/madness/world/parallel_dc_archive.h
@@ -102,15 +102,57 @@ namespace madness {
         };
 
 
+        /// Implementation of functions for storing the pre/postamble in ContainerRecord archives.
+
+        /// \attention No type checking over Vector buffers, for efficiency.
+        /// \tparam T The data type.
+        template <class T>
+        struct ArchivePrePostImpl<ContainerRecordOutputArchive,T> {
+            /// Store the preamble.
+
+            /// \param[in] ar The archive.
+            static void preamble_store(const ContainerRecordOutputArchive& ar) {};
+
+            /// Store the postamble.
+
+            /// \param[in] ar The archive.
+            static inline void postamble_store(const ContainerRecordOutputArchive& ar) {};
+        };
+
+        /// Implementation of functions for loading the pre/postamble in ContainerRecord archives.
+
+        /// \attention No type checking over ContainerRecord buffers, for efficiency.
+        /// \tparam T The data type.
+        template <class T>
+        struct ArchivePrePostImpl<ContainerRecordInputArchive,T> {
+            /// Load the preamble.
+
+            /// \param[in] ar The archive.
+            static inline void preamble_load(const ContainerRecordInputArchive& ar) {};
+
+            /// Load the postamble.
+
+            /// \param[in] ar The archive.
+            static inline void postamble_load(const ContainerRecordInputArchive& ar) {};
+        };
+
+        // Forward storing to VectorOutputArchive
         template <class keyT, class valueT>
         struct ArchiveStoreImpl< ParallelOutputArchive<ContainerRecordOutputArchive>, WorldContainer<keyT,valueT> > {
             static void store(const ParallelOutputArchive<ContainerRecordOutputArchive>& ar, const WorldContainer<keyT,valueT>& t) {
-                ParallelOutputArchive<VectorOutputArchive> par(*(ar.get_world()), ar.local_archive().get_archive());
+                std::vector<unsigned char> v;
+                VectorOutputArchive dummyar(v,0);
+                const int me = ar.get_world()->rank();
+
+                // Need to pass local archive by reference
+                ParallelOutputArchive<VectorOutputArchive> par(*(ar.get_world()), (me==0) ? ar.local_archive().get_archive() : dummyar);
                 par & t;
 
             }
         };
 
+        
+
     }
 
 
diff --git a/src/madness/world/test_dc.cc b/src/madness/world/test_dc.cc
index bf03c72ee6f..f908df1dc64 100644
--- a/src/madness/world/test_dc.cc
+++ b/src/madness/world/test_dc.cc
@@ -270,7 +270,16 @@ void test_local(World& world) {
 
 void test_florian(World& world) {
     WorldContainer<Key,LargeNode> c(world);
-    long nlarge=200000;
+
+    long nlarge=20000;
+    // get nlarge variable from the environment and convert it into long
+    char* nlarge_env = getenv("NLARGE");
+    if (nlarge_env) {
+        nlarge = atol(nlarge_env);
+    }
+    if (world.rank()==0) print("size of the container",nlarge);
+
+
 
     if (world.rank() == 0) {
         for (int i=0; i<nlarge; ++i) {
@@ -279,7 +288,7 @@ void test_florian(World& world) {
     }
     world.gop.fence();
     double wall0=wall_time();
-    printf("starting at time %8.4f with %ld items\n",wall0,nlarge);
+    if (world.rank() == 0) printf("starting at time %8.4f with %ld items\n",wall0,nlarge);
     std::vector<unsigned char> v;
     {
         archive::VectorOutputArchive var(v);
@@ -287,7 +296,7 @@ void test_florian(World& world) {
         ar & c;
     }
     double wall1=wall_time();
-    printf("ending at time %8.4f after %8.4fs\n",wall1,wall1-wall0);
+    if (world.rank() == 0) printf("ending at time %8.4f after %8.4fs\n",wall1,wall1-wall0);
 
     WorldContainer<Key,LargeNode> c2(world);
     {
@@ -303,7 +312,7 @@ void test_florian(World& world) {
     }
 
     world.gop.fence();
-    print("test_florian passed");
+    if (world.rank() == 0) print("test_florian passed");
 }
 
 int main(int argc, char** argv) {
diff --git a/src/madness/world/vector_archive.h b/src/madness/world/vector_archive.h
index 129c35e44e9..3530a5a258d 100644
--- a/src/madness/world/vector_archive.h
+++ b/src/madness/world/vector_archive.h
@@ -144,6 +144,40 @@ namespace madness {
             void close() {}
         };
 
+        /// Implementation of functions for storing the pre/postamble in Vector archives.
+
+        /// \attention No type checking over Vector buffers, for efficiency.
+        /// \tparam T The data type.
+        template <class T>
+        struct ArchivePrePostImpl<VectorOutputArchive,T> {
+            /// Store the preamble.
+
+            /// \param[in] ar The archive.
+            static void preamble_store(const VectorOutputArchive& ar) {};
+
+            /// Store the postamble.
+
+            /// \param[in] ar The archive.
+            static inline void postamble_store(const VectorOutputArchive& ar) {};
+        };
+
+        /// Implementation of functions for loading the pre/postamble in Vector archives.
+
+        /// \attention No type checking over Vector buffers, for efficiency.
+        /// \tparam T The data type.
+        template <class T>
+        struct ArchivePrePostImpl<VectorInputArchive,T> {
+            /// Load the preamble.
+
+            /// \param[in] ar The archive.
+            static inline void preamble_load(const VectorInputArchive& ar) {};
+
+            /// Load the postamble.
+
+            /// \param[in] ar The archive.
+            static inline void postamble_load(const VectorInputArchive& ar) {};
+        };
+
         /// @}
     }
 }
diff --git a/src/madness/world/worlddc.h b/src/madness/world/worlddc.h
index 671e2964071..4cf178cafda 100644
--- a/src/madness/world/worlddc.h
+++ b/src/madness/world/worlddc.h
@@ -48,7 +48,8 @@
 #include <madness/world/mpi_archive.h>
 #include <madness/world/world_object.h>
 
-namespace madness {
+namespace madness
+{
 
     template <typename keyT, typename valueT, typename hashfunT>
     class WorldContainer;
@@ -57,37 +58,40 @@ namespace madness {
     class WorldContainerImpl;
 
     template <typename keyT, typename valueT, typename hashfunT>
-    void swap(WorldContainer<keyT, valueT, hashfunT>&, WorldContainer<keyT, valueT, hashfunT>&);
+    void swap(WorldContainer<keyT, valueT, hashfunT> &, WorldContainer<keyT, valueT, hashfunT> &);
 
     template <typename keyT>
     class WorldDCPmapInterface;
 
     template <typename keyT>
-    class WorldDCRedistributeInterface {
+    class WorldDCRedistributeInterface
+    {
     public:
         virtual std::size_t size() const = 0;
-        virtual void redistribute_phase1(const std::shared_ptr< WorldDCPmapInterface<keyT> >& newmap) = 0;
+        virtual void redistribute_phase1(const std::shared_ptr<WorldDCPmapInterface<keyT>> &newmap) = 0;
         virtual void redistribute_phase2() = 0;
         virtual void redistribute_phase3() = 0;
-	virtual ~WorldDCRedistributeInterface() {};
+        virtual ~WorldDCRedistributeInterface() {};
     };
 
-
     /// Interface to be provided by any process map
 
     /// \ingroup worlddc
     template <typename keyT>
-    class WorldDCPmapInterface {
+    class WorldDCPmapInterface
+    {
     public:
-        typedef WorldDCRedistributeInterface<keyT>* ptrT;
+        typedef WorldDCRedistributeInterface<keyT> *ptrT;
+
     private:
         std::set<ptrT> ptrs;
+
     public:
         /// Maps key to processor
 
         /// @param[in] key Key for container
         /// @return Processor that logically owns the key
-        virtual ProcessID owner(const keyT& key) const = 0;
+        virtual ProcessID owner(const keyT &key) const = 0;
 
         virtual ~WorldDCPmapInterface() {}
 
@@ -96,14 +100,16 @@ namespace madness {
         /// Registers object for receipt of redistribute callbacks
 
         /// @param[in] ptr Pointer to class derived from WorldDCRedistributedInterface
-        void register_callback(ptrT ptr) {
+        void register_callback(ptrT ptr)
+        {
             ptrs.insert(ptr);
         }
 
         /// Deregisters object for receipt of redistribute callbacks
 
         /// @param[in] ptr Pointer to class derived from WorldDCRedistributedInterface
-        void deregister_callback(ptrT ptr) {
+        void deregister_callback(ptrT ptr)
+        {
             ptrs.erase(ptr);
         }
 
@@ -113,26 +119,30 @@ namespace madness {
         /// new map and no objects will be registered in the current map.
         /// @param[in] world The associated world
         /// @param[in] newpmap The new process map
-        void redistribute(World& world, const std::shared_ptr< WorldDCPmapInterface<keyT> >& newpmap) {
+        void redistribute(World &world, const std::shared_ptr<WorldDCPmapInterface<keyT>> &newpmap)
+        {
             print_data_sizes(world, "before redistributing");
             world.gop.fence();
             for (typename std::set<ptrT>::iterator iter = ptrs.begin();
                  iter != ptrs.end();
-                 ++iter) {
+                 ++iter)
+            {
                 (*iter)->redistribute_phase1(newpmap);
             }
             world.gop.fence();
             for (typename std::set<ptrT>::iterator iter = ptrs.begin();
                  iter != ptrs.end();
-                 ++iter) {
+                 ++iter)
+            {
                 (*iter)->redistribute_phase2();
                 newpmap->register_callback(*iter);
             }
             world.gop.fence();
             for (typename std::set<ptrT>::iterator iter = ptrs.begin();
                  iter != ptrs.end();
-                 ++iter) {
-	         (*iter)->redistribute_phase3();
+                 ++iter)
+            {
+                (*iter)->redistribute_phase3();
             }
             world.gop.fence();
             ptrs.clear();
@@ -142,7 +152,8 @@ namespace madness {
         /// Counts global number of entries in all containers associated with this process map
 
         /// Collective operation with global fence
-        std::size_t global_size(World& world) const {
+        std::size_t global_size(World &world) const
+        {
             world.gop.fence();
             std::size_t sum = local_size();
             world.gop.sum(sum);
@@ -151,9 +162,11 @@ namespace madness {
         }
 
         /// Counts local number of entries in all containers associated with this process map
-        std::size_t local_size() const {
+        std::size_t local_size() const
+        {
             std::size_t sum = 0;
-            for (typename std::set<ptrT>::iterator iter = ptrs.begin(); iter != ptrs.end(); ++iter) {
+            for (typename std::set<ptrT>::iterator iter = ptrs.begin(); iter != ptrs.end(); ++iter)
+            {
                 sum += (*iter)->size();
             }
             return sum;
@@ -162,17 +175,20 @@ namespace madness {
         /// Prints size info to std::cout
 
         /// Collective operation with global fence
-        void print_data_sizes(World& world, const std::string msg="") const {
+        void print_data_sizes(World &world, const std::string msg = "") const
+        {
             world.gop.fence();
             std::size_t total = global_size(world);
             std::vector<std::size_t> sizes(world.size());
             sizes[world.rank()] = local_size();
-            world.gop.sum(&sizes[0],world.size());
-            if (world.rank() == 0) {
+            world.gop.sum(&sizes[0], world.size());
+            if (world.rank() == 0)
+            {
                 madness::print("data distribution info", msg);
                 madness::print("   total: ", total);
                 std::cout << "   procs: ";
-                for (int i=0; i<world.size(); i++) std::cout << sizes[i] << " ";
+                for (int i = 0; i < world.size(); i++)
+                    std::cout << sizes[i] << " ";
                 std::cout << std::endl;
             }
             world.gop.fence();
@@ -182,144 +198,161 @@ namespace madness {
     /// Default process map is "random" using madness::hash(key)
 
     /// \ingroup worlddc
-    template <typename keyT, typename hashfunT = Hash<keyT> >
-    class WorldDCDefaultPmap : public WorldDCPmapInterface<keyT> {
+    template <typename keyT, typename hashfunT = Hash<keyT>>
+    class WorldDCDefaultPmap : public WorldDCPmapInterface<keyT>
+    {
     private:
         const int nproc;
         hashfunT hashfun;
+
     public:
-        WorldDCDefaultPmap(World& world, const hashfunT& hf = hashfunT()) :
-            nproc(world.mpi.nproc()),
-            hashfun(hf)
-        { }
+        WorldDCDefaultPmap(World &world, const hashfunT &hf = hashfunT()) : nproc(world.mpi.nproc()),
+                                                                            hashfun(hf)
+        {
+        }
 
-        ProcessID owner(const keyT& key) const {
-            if (nproc == 1) return 0;
-            return hashfun(key)%nproc;
+        ProcessID owner(const keyT &key) const
+        {
+            if (nproc == 1)
+                return 0;
+            return hashfun(key) % nproc;
         }
     };
 
     /// Local process map will always return the current process as owner
 
     /// \ingroup worlddc
-    template <typename keyT, typename hashfunT = Hash<keyT> >
-    class WorldDCLocalPmap : public WorldDCPmapInterface<keyT> {
+    template <typename keyT, typename hashfunT = Hash<keyT>>
+    class WorldDCLocalPmap : public WorldDCPmapInterface<keyT>
+    {
     private:
-    	ProcessID me;
+        ProcessID me;
+
     public:
-    	WorldDCLocalPmap(World& world) : me(world.rank())  { }
-    	ProcessID owner(const keyT& key) const {
-    		return me;
-    	}
+        WorldDCLocalPmap(World &world) : me(world.rank()) {}
+        ProcessID owner(const keyT &key) const
+        {
+            return me;
+        }
     };
 
     /// Iterator for distributed container wraps the local iterator
 
     /// \ingroup worlddc
     template <class internal_iteratorT>
-    class WorldContainerIterator {
+    class WorldContainerIterator
+    {
     public:
-      typedef typename std::iterator_traits<internal_iteratorT>::iterator_category iterator_category;
-      typedef typename std::iterator_traits<internal_iteratorT>::value_type value_type;
-      typedef typename std::iterator_traits<internal_iteratorT>::difference_type difference_type;
-      typedef typename std::iterator_traits<internal_iteratorT>::pointer pointer;
-      typedef typename std::iterator_traits<internal_iteratorT>::reference reference;
+        typedef typename std::iterator_traits<internal_iteratorT>::iterator_category iterator_category;
+        typedef typename std::iterator_traits<internal_iteratorT>::value_type value_type;
+        typedef typename std::iterator_traits<internal_iteratorT>::difference_type difference_type;
+        typedef typename std::iterator_traits<internal_iteratorT>::pointer pointer;
+        typedef typename std::iterator_traits<internal_iteratorT>::reference reference;
 
     private:
-        internal_iteratorT  it;       ///< Iterator from local container
+        internal_iteratorT it; ///< Iterator from local container
         // TODO: Convert this to a scoped pointer.
-        mutable value_type* value;    ///< holds the remote values
+        mutable value_type *value; ///< holds the remote values
 
     public:
         /// Default constructor makes a local uninitialized value
         explicit WorldContainerIterator()
-                : it(), value(nullptr) {}
+            : it(), value(nullptr) {}
 
         /// Initializes from a local iterator
-        explicit WorldContainerIterator(const internal_iteratorT& it)
-                : it(it), value(nullptr) {}
+        explicit WorldContainerIterator(const internal_iteratorT &it)
+            : it(it), value(nullptr) {}
 
         /// Initializes to cache a remote value
-        explicit WorldContainerIterator(const value_type& v)
-                : it(), value(nullptr)
+        explicit WorldContainerIterator(const value_type &v)
+            : it(), value(nullptr)
         {
             value = new value_type(v);
         }
 
-        WorldContainerIterator(const WorldContainerIterator& other)
-                : it(), value(nullptr)
+        WorldContainerIterator(const WorldContainerIterator &other)
+            : it(), value(nullptr)
         {
             copy(other);
         }
 
         template <class iteratorT>
-        WorldContainerIterator(const WorldContainerIterator<iteratorT>& other)
-                : it(), value(nullptr)
+        WorldContainerIterator(const WorldContainerIterator<iteratorT> &other)
+            : it(), value(nullptr)
         {
             copy(other);
         }
 
-        ~WorldContainerIterator() {
+        ~WorldContainerIterator()
+        {
             delete value;
         }
 
         /// Assignment
-        WorldContainerIterator& operator=(const WorldContainerIterator& other) {
+        WorldContainerIterator &operator=(const WorldContainerIterator &other)
+        {
             copy(other);
             return *this;
         }
 
         /// Determines if two iterators are identical
-        bool operator==(const WorldContainerIterator& other) const {
+        bool operator==(const WorldContainerIterator &other) const
+        {
             return (((!is_cached()) && (!other.is_cached())) && it == other.it) ||
-                ((is_cached() && other.is_cached()) && value->first == other.value->first);
+                   ((is_cached() && other.is_cached()) && value->first == other.value->first);
         }
 
-
         /// Determines if two iterators are different
-        bool operator!=(const WorldContainerIterator& other) const {
+        bool operator!=(const WorldContainerIterator &other) const
+        {
             return !(*this == other);
         }
 
-
         /// Pre-increment of an iterator (i.e., ++it) --- \em local iterators only
 
         /// Trying to increment a remote iterator will throw
-        WorldContainerIterator& operator++() {
-            MADNESS_ASSERT( !is_cached() );
+        WorldContainerIterator &operator++()
+        {
+            MADNESS_ASSERT(!is_cached());
             ++it;
             return *this;
         }
 
-        WorldContainerIterator operator++(int) {
-            MADNESS_ASSERT( !is_cached() );
+        WorldContainerIterator operator++(int)
+        {
+            MADNESS_ASSERT(!is_cached());
             WorldContainerIterator<internal_iteratorT> result(*this);
             ++it;
             return result;
         }
 
         /// Iterators dereference to std::pair<const keyT,valueT>
-        pointer operator->() const {
-            return (is_cached() ? value : it.operator->() );
+        pointer operator->() const
+        {
+            return (is_cached() ? value : it.operator->());
         }
 
         /// Iterators dereference to std::pair<const keyT,valueT>
-        reference operator*() const {
-            return (is_cached() ? *value : *it );
+        reference operator*() const
+        {
+            return (is_cached() ? *value : *it);
         }
 
         /// Private: (or should be) Returns iterator of internal container
-        const internal_iteratorT& get_internal_iterator() const {
+        const internal_iteratorT &get_internal_iterator() const
+        {
             return it;
         }
 
         /// Returns true if this is non-local or cached value
-        bool is_cached() const {
+        bool is_cached() const
+        {
             return value != nullptr;
         }
 
         template <typename Archive>
-        void serialize(const Archive&) {
+        void serialize(const Archive &)
+        {
             MADNESS_EXCEPTION("Serializing DC iterator ... why?", false);
         }
 
@@ -328,13 +361,18 @@ namespace madness {
         friend class WorldContainerIterator;
 
         template <class iteratorT>
-        void copy(const WorldContainerIterator<iteratorT>& other) {
-            if (static_cast<const void*>(this) != static_cast<const void*>(&other)) {
+        void copy(const WorldContainerIterator<iteratorT> &other)
+        {
+            if (static_cast<const void *>(this) != static_cast<const void *>(&other))
+            {
                 delete value;
-                if(other.is_cached()) {
-                    value = new value_type(* other.value);
+                if (other.is_cached())
+                {
+                    value = new value_type(*other.value);
                     it = internal_iteratorT();
-                } else {
+                }
+                else
+                {
                     it = other.it;
                     value = nullptr;
                 }
@@ -345,22 +383,23 @@ namespace madness {
     /// Internal implementation of distributed container to facilitate shallow copy
 
     /// \ingroup worlddc
-    template <typename keyT, typename valueT, typename hashfunT >
+    template <typename keyT, typename valueT, typename hashfunT>
     class WorldContainerImpl
-        : public WorldObject< WorldContainerImpl<keyT, valueT, hashfunT> >
-        , public WorldDCRedistributeInterface<keyT>
+        : public WorldObject<WorldContainerImpl<keyT, valueT, hashfunT>>,
+          public WorldDCRedistributeInterface<keyT>
 #ifndef MADNESS_DISABLE_SHARED_FROM_THIS
-        , public std::enable_shared_from_this<WorldContainerImpl<keyT, valueT, hashfunT> >
+        ,
+          public std::enable_shared_from_this<WorldContainerImpl<keyT, valueT, hashfunT>>
 #endif // MADNESS_DISABLE_SHARED_FROM_THIS
     {
     public:
-        typedef typename std::pair<const keyT,valueT> pairT;
+        typedef typename std::pair<const keyT, valueT> pairT;
         typedef const pairT const_pairT;
-        typedef WorldContainerImpl<keyT,valueT,hashfunT> implT;
+        typedef WorldContainerImpl<keyT, valueT, hashfunT> implT;
 
-        typedef ConcurrentHashMap< keyT,valueT,hashfunT > internal_containerT;
+        typedef ConcurrentHashMap<keyT, valueT, hashfunT> internal_containerT;
 
-	//typedef WorldObject< WorldContainerImpl<keyT, valueT, hashfunT> > worldobjT;
+        // typedef WorldObject< WorldContainerImpl<keyT, valueT, hashfunT> > worldobjT;
 
         typedef typename internal_containerT::iterator internal_iteratorT;
         typedef typename internal_containerT::const_iterator internal_const_iteratorT;
@@ -371,82 +410,87 @@ namespace madness {
         typedef WorldContainerIterator<internal_const_iteratorT> const_iteratorT;
         typedef WorldContainerIterator<internal_const_iteratorT> const_iterator;
 
-        friend class WorldContainer<keyT,valueT,hashfunT>;
+        friend class WorldContainer<keyT, valueT, hashfunT>;
 
-//         template <typename containerT, typename datumT>
-//         inline
-//         static
-//         typename containerT::iterator replace(containerT& c, const datumT& d) {
-//             std::pair<typename containerT::iterator,bool> p = c.insert(d);
-//             if (!p.second) p.first->second = d.second;   // Who's on first?
-//             return p.first;
-//         }
+        //         template <typename containerT, typename datumT>
+        //         inline
+        //         static
+        //         typename containerT::iterator replace(containerT& c, const datumT& d) {
+        //             std::pair<typename containerT::iterator,bool> p = c.insert(d);
+        //             if (!p.second) p.first->second = d.second;   // Who's on first?
+        //             return p.first;
+        //         }
 
     private:
+        WorldContainerImpl(); // Inhibit default constructor
 
-        WorldContainerImpl();   // Inhibit default constructor
-
-        std::shared_ptr< WorldDCPmapInterface<keyT> > pmap;///< Function/class to map from keys to owning process
-        const ProcessID me;                      ///< My MPI rank
-        internal_containerT local;               ///< Locally owned data
-        std::vector<keyT>* move_list;            ///< Tempoary used to record data that needs redistributing
+        std::shared_ptr<WorldDCPmapInterface<keyT>> pmap; ///< Function/class to map from keys to owning process
+        const ProcessID me;                               ///< My MPI rank
+        internal_containerT local;                        ///< Locally owned data
+        std::vector<keyT> *move_list;                     ///< Tempoary used to record data that needs redistributing
 
         /// Handles find request
-        void find_handler(ProcessID requestor, const keyT& key, const RemoteReference< FutureImpl<iterator> >& ref) {
+        void find_handler(ProcessID requestor, const keyT &key, const RemoteReference<FutureImpl<iterator>> &ref)
+        {
             internal_iteratorT r = local.find(key);
-            if (r == local.end()) {
-                //print("find_handler: failure:", key);
+            if (r == local.end())
+            {
+                // print("find_handler: failure:", key);
                 this->send(requestor, &implT::find_failure_handler, ref);
             }
-            else {
-                //print("find_handler: success:", key, r->first, r->second);
+            else
+            {
+                // print("find_handler: success:", key, r->first, r->second);
                 this->send(requestor, &implT::find_success_handler, ref, *r);
             }
         }
 
         /// Handles successful find response
-        void find_success_handler(const RemoteReference< FutureImpl<iterator> >& ref, const pairT& datum) {
-            FutureImpl<iterator>* f = ref.get();
+        void find_success_handler(const RemoteReference<FutureImpl<iterator>> &ref, const pairT &datum)
+        {
+            FutureImpl<iterator> *f = ref.get();
             f->set(iterator(datum));
-            //print("find_success_handler: success:", datum.first, datum.second, f->get()->first, f->get()->second);
-            // Todo: Look at this again.
-//            ref.reset(); // Matching inc() in find() where ref was made
+            // print("find_success_handler: success:", datum.first, datum.second, f->get()->first, f->get()->second);
+            //  Todo: Look at this again.
+            //            ref.reset(); // Matching inc() in find() where ref was made
         }
 
         /// Handles unsuccessful find response
-        void find_failure_handler(const RemoteReference< FutureImpl<iterator> >& ref) {
-            FutureImpl<iterator>* f = ref.get();
+        void find_failure_handler(const RemoteReference<FutureImpl<iterator>> &ref)
+        {
+            FutureImpl<iterator> *f = ref.get();
             f->set(end());
-            //print("find_failure_handler");
-            // Todo: Look at this again.
-//            ref.reset(); // Matching inc() in find() where ref was made
+            // print("find_failure_handler");
+            //  Todo: Look at this again.
+            //            ref.reset(); // Matching inc() in find() where ref was made
         }
 
     public:
-
-        WorldContainerImpl(World& world,
-                           const std::shared_ptr< WorldDCPmapInterface<keyT> >& pm,
-                           const hashfunT& hf)
-                : WorldObject< WorldContainerImpl<keyT, valueT, hashfunT> >(world)
-                , pmap(pm)
-                , me(world.mpi.rank())
-                , local(5011, hf) {
+        WorldContainerImpl(World &world,
+                           const std::shared_ptr<WorldDCPmapInterface<keyT>> &pm,
+                           const hashfunT &hf)
+            : WorldObject<WorldContainerImpl<keyT, valueT, hashfunT>>(world), pmap(pm), me(world.mpi.rank()), local(5011, hf)
+        {
             pmap->register_callback(this);
         }
 
-        virtual ~WorldContainerImpl() {
+        virtual ~WorldContainerImpl()
+        {
             pmap->deregister_callback(this);
         }
 
-        const std::shared_ptr< WorldDCPmapInterface<keyT> >& get_pmap() const {
+        const std::shared_ptr<WorldDCPmapInterface<keyT>> &get_pmap() const
+        {
             return pmap;
         }
 
-        std::shared_ptr< WorldDCPmapInterface<keyT> >& get_pmap() {
+        std::shared_ptr<WorldDCPmapInterface<keyT>> &get_pmap()
+        {
             return pmap;
         }
 
-        void reset_pmap_to_local() {
+        void reset_pmap_to_local()
+        {
             pmap->deregister_callback(this);
             pmap.reset(new WorldDCLocalPmap<keyT>(this->get_world()));
             pmap->register_callback(this);
@@ -454,51 +498,61 @@ namespace madness {
 
         /// replicates this WorldContainer on all ProcessIDs and generates a
         /// ProcessMap where all nodes are local
-        void replicate(bool fence) {
-
-        	World& world=this->get_world();
-        	pmap->deregister_callback(this);
-        	pmap.reset(new WorldDCLocalPmap<keyT>(world));
-        	pmap->register_callback(this);
-
-        	for (ProcessID rank=0; rank<world.size(); rank++) {
-        		if (rank == world.rank()) {
-        			std::size_t sz = size();
-        			world.gop.broadcast_serializable(sz, rank);
-
-        			for (auto it=begin(); it!=end(); ++it) {
-        				keyT key = it->first;
-        				valueT value = it->second;
-        				world.gop.broadcast_serializable(key, rank);
-        				world.gop.broadcast_serializable(value, rank);
-        			}
-        		}
-        		else {
-        			size_t sz;
-        			world.gop.broadcast_serializable(sz, rank);
-        			for (size_t i=0; i<sz; i++) {
-        				keyT key;
-        				valueT value;
-        				world.gop.broadcast_serializable(key, rank);
-        				world.gop.broadcast_serializable(value, rank);
-        				insert(pairT(key,value));
-        			}
-        		}
-        	}
-        	if (fence) world.gop.fence();
-        }
-
-        hashfunT& get_hash() const { return local.get_hash(); }
-
-        bool is_local(const keyT& key) const {
+        void replicate(bool fence)
+        {
+
+            World &world = this->get_world();
+            pmap->deregister_callback(this);
+            pmap.reset(new WorldDCLocalPmap<keyT>(world));
+            pmap->register_callback(this);
+
+            for (ProcessID rank = 0; rank < world.size(); rank++)
+            {
+                if (rank == world.rank())
+                {
+                    std::size_t sz = size();
+                    world.gop.broadcast_serializable(sz, rank);
+
+                    for (auto it = begin(); it != end(); ++it)
+                    {
+                        keyT key = it->first;
+                        valueT value = it->second;
+                        world.gop.broadcast_serializable(key, rank);
+                        world.gop.broadcast_serializable(value, rank);
+                    }
+                }
+                else
+                {
+                    size_t sz;
+                    world.gop.broadcast_serializable(sz, rank);
+                    for (size_t i = 0; i < sz; i++)
+                    {
+                        keyT key;
+                        valueT value;
+                        world.gop.broadcast_serializable(key, rank);
+                        world.gop.broadcast_serializable(value, rank);
+                        insert(pairT(key, value));
+                    }
+                }
+            }
+            if (fence)
+                world.gop.fence();
+        }
+
+        hashfunT &get_hash() const { return local.get_hash(); }
+
+        bool is_local(const keyT &key) const
+        {
             return owner(key) == me;
         }
 
-        ProcessID owner(const keyT& key) const {
+        ProcessID owner(const keyT &key) const
+        {
             return pmap->owner(key);
         }
 
-        bool probe(const keyT& key) const {
+        bool probe(const keyT &key) const
+        {
             ProcessID dest = owner(key);
             if (dest == me)
                 return local.find(key) != local.end();
@@ -506,123 +560,146 @@ namespace madness {
                 return false;
         }
 
-        std::size_t size() const {
+        std::size_t size() const
+        {
             return local.size();
         }
 
-        void insert(const pairT& datum) {
+        void insert(const pairT &datum)
+        {
             ProcessID dest = owner(datum.first);
-            if (dest == me) {
+            if (dest == me)
+            {
                 // Was using iterator ... try accessor ?????
                 accessor acc;
                 // N.B. key might already exist if want to simply replace
-                [[maybe_unused]] auto inserted = local.insert(acc,datum.first);
+                [[maybe_unused]] auto inserted = local.insert(acc, datum.first);
                 acc->second = datum.second;
             }
-            else {
-  	        // Must be send (not task) for sequential consistency (and relies on single-threaded remote server)
+            else
+            {
+                // Must be send (not task) for sequential consistency (and relies on single-threaded remote server)
                 this->send(dest, &implT::insert, datum);
             }
         }
 
-        bool insert_acc(accessor& acc, const keyT& key) {
+        bool insert_acc(accessor &acc, const keyT &key)
+        {
             MADNESS_ASSERT(owner(key) == me);
-            return local.insert(acc,key);
+            return local.insert(acc, key);
         }
 
-        bool insert_const_acc(const_accessor& acc, const keyT& key) {
+        bool insert_const_acc(const_accessor &acc, const keyT &key)
+        {
             MADNESS_ASSERT(owner(key) == me);
-            return local.insert(acc,key);
+            return local.insert(acc, key);
         }
 
-        void clear() {
+        void clear()
+        {
             local.clear();
         }
 
-
-        void erase(const keyT& key) {
+        void erase(const keyT &key)
+        {
             ProcessID dest = owner(key);
-            if (dest == me) {
+            if (dest == me)
+            {
                 [[maybe_unused]] auto erased = local.try_erase(key);
                 MADNESS_ASSERT(erased);
             }
-            else {
-                void(implT::*eraser)(const keyT&) = &implT::erase;
+            else
+            {
+                void (implT::*eraser)(const keyT &) = &implT::erase;
                 this->send(dest, eraser, key);
             }
         }
 
         template <typename InIter>
-        void erase(InIter it) {
+        void erase(InIter it)
+        {
             MADNESS_ASSERT(!it.is_cached());
             MADNESS_ASSERT(it != end());
             erase(it->first);
         }
 
         template <typename InIter>
-        void erase(InIter first, InIter last) {
+        void erase(InIter first, InIter last)
+        {
             InIter it = first;
-            do {
+            do
+            {
                 first++;
                 erase(it->first);
                 it = first;
-            } while(first != last);
+            } while (first != last);
         }
 
-        iterator begin() {
+        iterator begin()
+        {
             return iterator(local.begin());
         }
 
-        const_iterator begin() const {
+        const_iterator begin() const
+        {
             return const_iterator(local.begin());
         }
 
-        iterator end() {
+        iterator end()
+        {
             return iterator(local.end());
         }
 
-        const_iterator end() const {
+        const_iterator end() const
+        {
             return const_iterator(local.end());
         }
 
-        Future<const_iterator> find(const keyT& key) const {
+        Future<const_iterator> find(const keyT &key) const
+        {
             // Ugliness here to avoid replicating find() and
             // associated handlers for const.  Assumption is that
             // const and non-const iterators are identical except for
             // const attribute ... at some point probably need to do
             // the right thing.
-            Future<iterator> r = const_cast<implT*>(this)->find(key);
-            return *(Future<const_iterator>*)(&r);
+            Future<iterator> r = const_cast<implT *>(this)->find(key);
+            return *(Future<const_iterator> *)(&r);
         }
 
-
-        Future<iterator> find(const keyT& key) {
+        Future<iterator> find(const keyT &key)
+        {
             ProcessID dest = owner(key);
-            if (dest == me) {
+            if (dest == me)
+            {
                 return Future<iterator>(iterator(local.find(key)));
-            } else {
+            }
+            else
+            {
                 Future<iterator> result;
                 this->send(dest, &implT::find_handler, me, key, result.remote_ref(this->get_world()));
                 return result;
             }
         }
 
-        bool find(accessor& acc, const keyT& key) {
-            if (owner(key) != me) return false;
-            return local.find(acc,key);
+        bool find(accessor &acc, const keyT &key)
+        {
+            if (owner(key) != me)
+                return false;
+            return local.find(acc, key);
         }
 
-
-        bool find(const_accessor& acc, const keyT& key) const {
-            if (owner(key) != me) return false;
-            return local.find(acc,key);
+        bool find(const_accessor &acc, const keyT &key) const
+        {
+            if (owner(key) != me)
+                return false;
+            return local.find(acc, key);
         }
 
-
         // Used to forward call to item member function
         template <typename memfunT>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun) {
+        itemfun(const keyT &key, memfunT memfun)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
@@ -632,7 +709,8 @@ namespace madness {
         // Used to forward call to item member function
         template <typename memfunT, typename arg1T>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun, const arg1T& arg1) {
+        itemfun(const keyT &key, memfunT memfun, const arg1T &arg1)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
@@ -642,110 +720,122 @@ namespace madness {
         // Used to forward call to item member function
         template <typename memfunT, typename arg1T, typename arg2T>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2) {
+        itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
-            return (acc->second.*memfun)(arg1,arg2);
+            return (acc->second.*memfun)(arg1, arg2);
         }
 
         // Used to forward call to item member function
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3) {
+        itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
-            return (acc->second.*memfun)(arg1,arg2,arg3);
+            return (acc->second.*memfun)(arg1, arg2, arg3);
         }
 
         // Used to forward call to item member function
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4) {
+        itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
-            return (acc->second.*memfun)(arg1,arg2,arg3,arg4);
+            return (acc->second.*memfun)(arg1, arg2, arg3, arg4);
         }
 
         // Used to forward call to item member function
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5) {
+        itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
-            return (acc->second.*memfun)(arg1,arg2,arg3,arg4,arg5);
+            return (acc->second.*memfun)(arg1, arg2, arg3, arg4, arg5);
         }
 
         // Used to forward call to item member function
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6) {
+        itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
-            return (acc->second.*memfun)(arg1,arg2,arg3,arg4,arg5,arg6);
+            return (acc->second.*memfun)(arg1, arg2, arg3, arg4, arg5, arg6);
         }
 
         // Used to forward call to item member function
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T, typename arg7T>
         MEMFUN_RETURNT(memfunT)
-        itemfun(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3,
-				const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7) {
+        itemfun(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3,
+                const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7)
+        {
             accessor acc;
             // N.B. key may already exist, this is just to ensure lock is held by acc
             [[maybe_unused]] auto inserted = local.insert(acc, key);
-            return (acc->second.*memfun)(arg1,arg2,arg3,arg4,arg5,arg6,arg7);
+            return (acc->second.*memfun)(arg1, arg2, arg3, arg4, arg5, arg6, arg7);
         }
 
         // First phase of redistributions changes pmap and makes list of stuff to move
-        void redistribute_phase1(const std::shared_ptr< WorldDCPmapInterface<keyT> >& newpmap) {
+        void redistribute_phase1(const std::shared_ptr<WorldDCPmapInterface<keyT>> &newpmap)
+        {
             pmap = newpmap;
             move_list = new std::vector<keyT>();
-            for (typename internal_containerT::iterator iter=local.begin(); iter!=local.end(); ++iter) {
-                if (owner(iter->first) != me) move_list->push_back(iter->first);
+            for (typename internal_containerT::iterator iter = local.begin(); iter != local.end(); ++iter)
+            {
+                if (owner(iter->first) != me)
+                    move_list->push_back(iter->first);
             }
         }
 
-	struct P2Op {
-	  implT * impl;
-	  typedef Range<typename std::vector<keyT>::const_iterator> rangeT;
-	  P2Op(implT* impl) : impl(impl) {}
-    	  P2Op(const P2Op& p) : impl(p.impl) {}
-	  bool operator()(typename rangeT::iterator& iterator) const {
-	    typename internal_containerT::iterator iter = impl->local.find(*iterator);
-	    MADNESS_ASSERT(iter != impl->local.end());
-
-	    //impl->insert(*iter);
-	    impl->task(impl->owner(*iterator), &implT::insert, *iter);
-
-	    impl->local.erase(iter); // delete local copy of the data
-	    return true;
-	  }
-	};
+        struct P2Op
+        {
+            implT *impl;
+            typedef Range<typename std::vector<keyT>::const_iterator> rangeT;
+            P2Op(implT *impl) : impl(impl) {}
+            P2Op(const P2Op &p) : impl(p.impl) {}
+            bool operator()(typename rangeT::iterator &iterator) const
+            {
+                typename internal_containerT::iterator iter = impl->local.find(*iterator);
+                MADNESS_ASSERT(iter != impl->local.end());
+
+                // impl->insert(*iter);
+                impl->task(impl->owner(*iterator), &implT::insert, *iter);
+
+                impl->local.erase(iter); // delete local copy of the data
+                return true;
+            }
+        };
 
         // Second phase moves data
-        void redistribute_phase2() {
-	  this->get_world().taskq.for_each(typename P2Op::rangeT(move_list->begin(), move_list->end()), P2Op(this));
-	    //std::vector<keyT>& mvlist = *move_list;
-            //for (unsigned int i=0; i<move_list->size(); ++i) {
-            //    typename internal_containerT::iterator iter = local.find(mvlist[i]);
-            //    MADNESS_ASSERT(iter != local.end());
-            //    insert(*iter);
-            //    local.erase(iter);
-            //}
-            //delete move_list;
+        void redistribute_phase2()
+        {
+            this->get_world().taskq.for_each(typename P2Op::rangeT(move_list->begin(), move_list->end()), P2Op(this));
+            // std::vector<keyT>& mvlist = *move_list;
+            // for (unsigned int i=0; i<move_list->size(); ++i) {
+            //     typename internal_containerT::iterator iter = local.find(mvlist[i]);
+            //     MADNESS_ASSERT(iter != local.end());
+            //     insert(*iter);
+            //     local.erase(iter);
+            // }
+            // delete move_list;
         }
 
         // Third phase cleans up
-        void redistribute_phase3() {
-	   delete move_list;
+        void redistribute_phase3()
+        {
+            delete move_list;
         }
     };
 
-
     /// Makes a distributed container with specified attributes
 
     /// \ingroup worlddc
@@ -771,11 +861,12 @@ namespace madness {
     /// All operations, including constructors and destructors, are
     /// non-blocking and return immediately.  If communication occurs
     /// it is asynchronous, otherwise operations are local.
-    template <typename keyT, typename valueT, typename hashfunT = Hash<keyT> >
-    class WorldContainer : public archive::ParallelSerializableObject {
+    template <typename keyT, typename valueT, typename hashfunT = Hash<keyT>>
+    class WorldContainer : public archive::ParallelSerializableObject
+    {
     public:
-        typedef WorldContainer<keyT,valueT,hashfunT> containerT;
-        typedef WorldContainerImpl<keyT,valueT,hashfunT> implT;
+        typedef WorldContainer<keyT, valueT, hashfunT> containerT;
+        typedef WorldContainerImpl<keyT, valueT, hashfunT> implT;
         typedef typename implT::pairT pairT;
         typedef typename implT::iterator iterator;
         typedef typename implT::const_iterator const_iterator;
@@ -787,20 +878,21 @@ namespace madness {
     private:
         std::shared_ptr<implT> p;
 
-        inline void check_initialized() const {
+        inline void check_initialized() const
+        {
             MADNESS_ASSERT(p);
         }
-    public:
 
+    public:
         /// Makes an uninitialized container (no communication)
 
         /// The container is useless until assigned to from a fully
         /// constructed container.  There is no need to worry about
         /// default constructors being executed in order.
         WorldContainer()
-                : p()
-        {}
-
+            : p()
+        {
+        }
 
         /// Makes an initialized, empty container with default data distribution (no communication)
 
@@ -809,12 +901,12 @@ namespace madness {
         /// making a container, we have to assume that all processes
         /// execute this constructor in the same order (does not apply
         /// to the non-initializing, default constructor).
-        WorldContainer(World& world, bool do_pending=true, const hashfunT& hf = hashfunT())
+        WorldContainer(World &world, bool do_pending = true, const hashfunT &hf = hashfunT())
             : p(new implT(world,
-                          std::shared_ptr< WorldDCPmapInterface<keyT> >(new WorldDCDefaultPmap<keyT, hashfunT>(world, hf)),
+                          std::shared_ptr<WorldDCPmapInterface<keyT>>(new WorldDCDefaultPmap<keyT, hashfunT>(world, hf)),
                           hf))
         {
-            if(do_pending)
+            if (do_pending)
                 p->process_pending();
         }
 
@@ -825,22 +917,21 @@ namespace madness {
         /// making a container, we have to assume that all processes
         /// execute this constructor in the same order (does not apply
         /// to the non-initializing, default constructor).
-        WorldContainer(World& world,
-                       const std::shared_ptr< WorldDCPmapInterface<keyT> >& pmap,
-                       bool do_pending=true,
-                       const hashfunT& hf = hashfunT())
+        WorldContainer(World &world,
+                       const std::shared_ptr<WorldDCPmapInterface<keyT>> &pmap,
+                       bool do_pending = true,
+                       const hashfunT &hf = hashfunT())
             : p(new implT(world, pmap, hf))
         {
-            if(do_pending)
+            if (do_pending)
                 p->process_pending();
         }
 
-
         /// Copy constructor is shallow (no communication)
 
         /// The copy refers to exactly the same container as other
         /// which must be initialized.
-        WorldContainer(const WorldContainer& other)
+        WorldContainer(const WorldContainer &other)
             : p(other.p)
         {
             check_initialized();
@@ -850,8 +941,10 @@ namespace madness {
 
         /// The copy refers to exactly the same container as other
         /// which must be initialized.
-        containerT& operator=(const containerT& other) {
-            if (this != &other) {
+        containerT &operator=(const containerT &other)
+        {
+            if (this != &other)
+            {
                 other.check_initialized();
                 p = other.p;
             }
@@ -859,140 +952,146 @@ namespace madness {
         }
 
         /// Returns the world associated with this container
-        World& get_world() const {
+        World &get_world() const
+        {
             check_initialized();
             return p->get_world();
         }
 
-        std::shared_ptr< WorldDCPmapInterface<keyT> >& get_impl() {
+        std::shared_ptr<WorldDCPmapInterface<keyT>> &get_impl()
+        {
             check_initialized();
             return p;
         }
 
         /// replicates this WorldContainer on all ProcessIDs
-        void replicate(bool fence=true) {
-        	p->replicate(fence);
+        void replicate(bool fence = true)
+        {
+            p->replicate(fence);
         }
 
         /// Inserts/replaces key+value pair (non-blocking communication if key not local)
-        void replace(const pairT& datum) {
+        void replace(const pairT &datum)
+        {
             check_initialized();
             p->insert(datum);
         }
 
-
         /// Inserts/replaces key+value pair (non-blocking communication if key not local)
-        void replace(const keyT& key, const valueT& value) {
-            replace(pairT(key,value));
+        void replace(const keyT &key, const valueT &value)
+        {
+            replace(pairT(key, value));
         }
 
-
         /// Write access to LOCAL value by key. Returns true if found, false otherwise (always false for remote).
-        bool find(accessor& acc, const keyT& key) {
+        bool find(accessor &acc, const keyT &key)
+        {
             check_initialized();
-            return p->find(acc,key);
+            return p->find(acc, key);
         }
 
-
         /// Read access to LOCAL value by key. Returns true if found, false otherwise (always false for remote).
-        bool find(const_accessor& acc, const keyT& key) const {
+        bool find(const_accessor &acc, const keyT &key) const
+        {
             check_initialized();
-            return p->find(acc,key);
+            return p->find(acc, key);
         }
 
-
         /// Write access to LOCAL value by key. Returns true if inserted, false if already exists (throws if remote)
-        bool insert(accessor& acc, const keyT& key) {
+        bool insert(accessor &acc, const keyT &key)
+        {
             check_initialized();
-            return p->insert_acc(acc,key);
+            return p->insert_acc(acc, key);
         }
 
-
         /// Read access to LOCAL value by key. Returns true if inserted, false if already exists (throws if remote)
-        bool insert(const_accessor& acc, const keyT& key) {
+        bool insert(const_accessor &acc, const keyT &key)
+        {
             check_initialized();
-            return p->insert_acc(acc,key);
+            return p->insert_acc(acc, key);
         }
 
-
         /// Inserts pairs (non-blocking communication if key(s) not local)
         template <typename input_iterator>
-        void replace(input_iterator& start, input_iterator& end) {
+        void replace(input_iterator &start, input_iterator &end)
+        {
             check_initialized();
             using std::placeholders::_1;
-            std::for_each(start,end,std::bind(this,std::mem_fn(&containerT::insert),_1));
+            std::for_each(start, end, std::bind(this, std::mem_fn(&containerT::insert), _1));
         }
 
-
         /// Returns true if local data is immediately available (no communication)
-        bool probe(const keyT& key) const {
+        bool probe(const keyT &key) const
+        {
             check_initialized();
             return p->probe(key);
         }
 
-
         /// Returns processor that logically owns key (no communication)
 
         /// Local remapping may have changed its physical location, but all
         /// operations should forward correctly.
-        inline ProcessID owner(const keyT& key) const {
+        inline ProcessID owner(const keyT &key) const
+        {
             check_initialized();
             return p->owner(key);
         }
 
-
         /// Returns true if the key maps to the local processor (no communication)
-        bool is_local(const keyT& key) const {
+        bool is_local(const keyT &key) const
+        {
             check_initialized();
             return p->is_local(key);
         }
 
-
         /// Returns a future iterator (non-blocking communication if key not local)
 
         /// Like an std::map an iterator "points" to an std::pair<const keyT,valueT>.
         ///
         /// Refer to Future for info on how to avoid blocking.
-        Future<iterator> find(const keyT& key) {          //
+        Future<iterator> find(const keyT &key)
+        { //
             check_initialized();
             return p->find(key);
         }
 
-
         /// Returns a future iterator (non-blocking communication if key not local)
 
         /// Like an std::map an iterator "points" to an std::pair<const keyT,valueT>.
         ///
         /// Refer to Future for info on how to avoid blocking.
-        Future<const_iterator> find(const keyT& key) const {
+        Future<const_iterator> find(const keyT &key) const
+        {
             check_initialized();
-            return const_cast<const implT*>(p.get())->find(key);
+            return const_cast<const implT *>(p.get())->find(key);
         }
 
-
         /// Returns an iterator to the beginning of the \em local data (no communication)
-        iterator begin() {
+        iterator begin()
+        {
             check_initialized();
             return p->begin();
         }
 
-
         /// Returns an iterator to the beginning of the \em local data (no communication)
-        const_iterator begin() const {
+        const_iterator begin() const
+        {
             check_initialized();
-            return const_cast<const implT*>(p.get())->begin();
+            return const_cast<const implT *>(p.get())->begin();
         }
 
         /// Returns an iterator past the end of the \em local data (no communication)
-        iterator end() {
+        iterator end()
+        {
             check_initialized();
             return p->end();
         }
 
         /// Returns an iterator past the end of the \em local data (no communication)
-        const_iterator end() const {
+        const_iterator end() const
+        {
             check_initialized();
-            return const_cast<const implT*>(p.get())->end();
+            return const_cast<const implT *>(p.get())->end();
         }
 
         /// Erases entry from container (non-blocking comm if remote)
@@ -1003,51 +1102,58 @@ namespace madness {
         /// remote end.  This is just the same as what happens when
         /// using STL iterators on an STL container in a sequential
         /// algorithm.
-        void erase(const keyT& key) {
+        void erase(const keyT &key)
+        {
             check_initialized();
             p->erase(key);
         }
 
         /// Erases entry corresponding to \em local iterator (no communication)
-        void erase(const iterator& it) {
+        void erase(const iterator &it)
+        {
             check_initialized();
             p->erase(it);
         }
 
         /// Erases range defined by \em local iterators (no communication)
-        void erase(const iterator& start, const iterator& finish) {
+        void erase(const iterator &start, const iterator &finish)
+        {
             check_initialized();
-            p->erase(start,finish);
+            p->erase(start, finish);
         }
 
-
         /// Clears all \em local data (no communication)
 
         /// Invalidates all iterators
-        void clear() {
+        void clear()
+        {
             check_initialized();
             p->clear();
         }
 
         /// Returns the number of \em local entries (no communication)
-        std::size_t size() const {
+        std::size_t size() const
+        {
             check_initialized();
             return p->size();
         }
 
         /// Returns shared pointer to the process mapping
-        inline const std::shared_ptr< WorldDCPmapInterface<keyT> >& get_pmap() const {
+        inline const std::shared_ptr<WorldDCPmapInterface<keyT>> &get_pmap() const
+        {
             check_initialized();
             return p->get_pmap();
         }
 
         /// Returns shared pointer to the process mapping
-        inline void reset_pmap_to_local() {
+        inline void reset_pmap_to_local()
+        {
             p->reset_pmap_to_local();
         }
 
         /// Returns a reference to the hashing functor
-        hashfunT& get_hash() const {
+        hashfunT &get_hash() const
+        {
             check_initialized();
             return p->get_hash();
         }
@@ -1057,7 +1163,8 @@ namespace madness {
         /// If the constructor was given \c do_pending=false then you
         /// \em must invoke this routine in order to process both
         /// prior and future messages.
-        inline void process_pending() {
+        inline void process_pending()
+        {
             check_initialized();
             p->process_pending();
         }
@@ -1072,14 +1179,15 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT>
-        Future< MEMFUN_RETURNT(memfunT) >
-        send(const keyT& key, memfunT memfun) {
+        Future<MEMFUN_RETURNT(memfunT)>
+        send(const keyT &key, memfunT memfun)
+        {
             check_initialized();
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT) = &implT:: template itemfun<memfunT>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT) = &implT::template itemfun<memfunT>;
             return p->send(owner(key), itemfun, key, memfun);
         }
 
-
         /// Sends message "resultT memfun(arg1T)" to item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1090,18 +1198,19 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, const memfunT& memfun, const arg1T& arg1) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, const memfunT &memfun, const arg1T &arg1)
+        {
             check_initialized();
             // To work around bug in g++ 4.3.* use static cast as alternative mechanism to force type deduction
-            MEMFUN_RETURNT(memfunT) (implT::*itemfun)(const keyT&, memfunT, const arg1T&) = &implT:: template itemfun<memfunT,arg1T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const arg1T &) = &implT::template itemfun<memfunT, arg1T>;
             return p->send(owner(key), itemfun, key, memfun, arg1);
             /*return p->send(owner(key),
                            static_cast<MEMFUN_RETURNT(memfunT)(implT::*)(const keyT&, memfunT, const arg1T&)>(&implT:: template itemfun<memfunT,arg1T>),
                            key, memfun, arg1);*/
         }
 
-
         /// Sends message "resultT memfun(arg1T,arg2T)" to item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1112,17 +1221,18 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2)
+        {
             check_initialized();
             // To work around bug in g++ 4.3.* use static cast as alternative mechanism to force type deduction
-            MEMFUN_RETURNT(memfunT) (implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&) = &implT:: template itemfun<memfunT,arg1T,arg2T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &) = &implT::template itemfun<memfunT, arg1T, arg2T>;
             return p->send(owner(key), itemfun, key, memfun, arg1, arg2);
             /*return p->send(owner(key),
                            static_cast<MEMFUN_RETURNT(memfunT)(implT::*)(const keyT&, memfunT, const arg1T&, const arg2T&)>(&implT:: template itemfun<memfunT,arg1T,arg2T>), key, memfun, arg1, arg2);*/
         }
 
-
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T)" to item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1133,14 +1243,15 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3)
+        {
             check_initialized();
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&) = &implT:: template itemfun<memfunT,arg1T,arg2T,arg3T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &) = &implT::template itemfun<memfunT, arg1T, arg2T, arg3T>;
             return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3);
         }
 
-
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T)" to item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1151,14 +1262,15 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4)
+        {
             check_initialized();
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&) = &implT:: template itemfun<memfunT,arg1T,arg2T,arg3T,arg4T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &) = &implT::template itemfun<memfunT, arg1T, arg2T, arg3T, arg4T>;
             return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4);
         }
 
-
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T)" to item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1169,14 +1281,15 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5)
+        {
             check_initialized();
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&, const arg5T&) = &implT:: template itemfun<memfunT,arg1T,arg2T,arg3T,arg4T,arg5T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &, const arg5T &) = &implT::template itemfun<memfunT, arg1T, arg2T, arg3T, arg4T, arg5T>;
             return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5);
         }
 
-
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T)" to item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1187,14 +1300,15 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6)
+        {
             check_initialized();
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&, const arg5T&, const arg6T&) = &implT:: template itemfun<memfunT,arg1T,arg2T,arg3T,arg4T,arg5T,arg6T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &, const arg5T &, const arg6T &) = &implT::template itemfun<memfunT, arg1T, arg2T, arg3T, arg4T, arg5T, arg6T>;
             return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6);
         }
 
-
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T,arg7T)" to item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1205,91 +1319,98 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T, typename arg7T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4,
-		     const arg5T& arg5, const arg6T& arg6, const arg7T& arg7) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4,
+             const arg5T &arg5, const arg6T &arg6, const arg7T &arg7)
+        {
             check_initialized();
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const arg1T&, const arg2T&, const arg3T&, const arg4T&, const arg5T&, const arg6T&, const arg7T&) = &implT:: template itemfun<memfunT,arg1T,arg2T,arg3T,arg4T,arg5T,arg6T,arg7T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const arg1T &, const arg2T &, const arg3T &, const arg4T &, const arg5T &, const arg6T &, const arg7T &) = &implT::template itemfun<memfunT, arg1T, arg2T, arg3T, arg4T, arg5T, arg6T, arg7T>;
             return p->send(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
         }
 
-
         /// Sends message "resultT memfun() const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun) const {
-            return const_cast<containerT*>(this)->send(key,memfun);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun);
         }
 
         /// Sends message "resultT memfun(arg1T) const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1) const {
-            return const_cast<containerT*>(this)->send(key,memfun,arg1);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun, arg1);
         }
 
         /// Sends message "resultT memfun(arg1T,arg2T) const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2) const {
-            return const_cast<containerT*>(this)->send(key,memfun,arg1,arg2);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun, arg1, arg2);
         }
 
-
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T) const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3) const {
-            return const_cast<containerT*>(this)->send(key,memfun,arg1,arg2,arg3);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun, arg1, arg2, arg3);
         }
 
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T) const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4) const {
-            return const_cast<containerT*>(this)->send(key,memfun,arg1,arg2,arg3,arg4);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun, arg1, arg2, arg3, arg4);
         }
 
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T) const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5) const {
-            return const_cast<containerT*>(this)->send(key,memfun,arg1,arg2,arg3,arg4,arg5);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun, arg1, arg2, arg3, arg4, arg5);
         }
 
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T) const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3,
-			 const arg4T& arg4, const arg5T& arg5, const arg6T& arg6) const {
-            return const_cast<containerT*>(this)->send(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3,
+             const arg4T &arg4, const arg5T &arg5, const arg6T &arg6) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6);
         }
 
         /// Sends message "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T,arg7T) const" to item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T, typename arg7T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        send(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3,
-			 const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7) const {
-            return const_cast<containerT*>(this)->send(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        send(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3,
+             const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7) const
+        {
+            return const_cast<containerT *>(this)->send(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
         }
 
-
         /// Adds task "resultT memfun()" in process owning item (non-blocking comm if remote)
 
         /// If item does not exist it is made with the default constructor.
@@ -1301,10 +1422,12 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT) = &implT:: template itemfun<memfunT>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT) = &implT::template itemfun<memfunT>;
             return p->task(owner(key), itemfun, key, memfun, attr);
         }
 
@@ -1319,11 +1442,13 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
             typedef REMFUTURE(arg1T) a1T;
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&) = &implT:: template itemfun<memfunT,a1T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const a1T &) = &implT::template itemfun<memfunT, a1T>;
             return p->task(owner(key), itemfun, key, memfun, arg1, attr);
         }
 
@@ -1338,12 +1463,14 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
             typedef REMFUTURE(arg1T) a1T;
             typedef REMFUTURE(arg2T) a2T;
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&) = &implT:: template itemfun<memfunT,a1T,a2T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &) = &implT::template itemfun<memfunT, a1T, a2T>;
             return p->task(owner(key), itemfun, key, memfun, arg1, arg2, attr);
         }
 
@@ -1358,13 +1485,15 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
             typedef REMFUTURE(arg1T) a1T;
             typedef REMFUTURE(arg2T) a2T;
             typedef REMFUTURE(arg3T) a3T;
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&) = &implT:: template itemfun<memfunT,a1T,a2T,a3T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &) = &implT::template itemfun<memfunT, a1T, a2T, a3T>;
             return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, attr);
         }
 
@@ -1379,14 +1508,16 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
             typedef REMFUTURE(arg1T) a1T;
             typedef REMFUTURE(arg2T) a2T;
             typedef REMFUTURE(arg3T) a3T;
             typedef REMFUTURE(arg4T) a4T;
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&) = &implT:: template itemfun<memfunT,a1T,a2T,a3T,a4T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &) = &implT::template itemfun<memfunT, a1T, a2T, a3T, a4T>;
             return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, attr);
         }
 
@@ -1401,15 +1532,17 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
             typedef REMFUTURE(arg1T) a1T;
             typedef REMFUTURE(arg2T) a2T;
             typedef REMFUTURE(arg3T) a3T;
             typedef REMFUTURE(arg4T) a4T;
             typedef REMFUTURE(arg5T) a5T;
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&, const a5T&) = &implT:: template itemfun<memfunT,a1T,a2T,a3T,a4T,a5T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &, const a5T &) = &implT::template itemfun<memfunT, a1T, a2T, a3T, a4T, a5T>;
             return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, attr);
         }
 
@@ -1424,8 +1557,9 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
             typedef REMFUTURE(arg1T) a1T;
             typedef REMFUTURE(arg2T) a2T;
@@ -1433,7 +1567,8 @@ namespace madness {
             typedef REMFUTURE(arg4T) a4T;
             typedef REMFUTURE(arg5T) a5T;
             typedef REMFUTURE(arg6T) a6T;
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&, const a5T&, const a6T&) = &implT:: template itemfun<memfunT,a1T,a2T,a3T,a4T,a5T,a6T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &, const a5T &, const a6T &) = &implT::template itemfun<memfunT, a1T, a2T, a3T, a4T, a5T, a6T>;
             return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, attr);
         }
 
@@ -1448,8 +1583,9 @@ namespace madness {
         ///
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T, typename arg7T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7, const TaskAttributes& attr = TaskAttributes()) {
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7, const TaskAttributes &attr = TaskAttributes())
+        {
             check_initialized();
             typedef REMFUTURE(arg1T) a1T;
             typedef REMFUTURE(arg2T) a2T;
@@ -1458,7 +1594,8 @@ namespace madness {
             typedef REMFUTURE(arg5T) a5T;
             typedef REMFUTURE(arg6T) a6T;
             typedef REMFUTURE(arg7T) a7T;
-            MEMFUN_RETURNT(memfunT)(implT::*itemfun)(const keyT&, memfunT, const a1T&, const a2T&, const a3T&, const a4T&, const a5T&, const a6T&, const a7T&) = &implT:: template itemfun<memfunT,a1T,a2T,a3T,a4T,a5T,a6T,a7T>;
+            MEMFUN_RETURNT(memfunT)
+            (implT::*itemfun)(const keyT &, memfunT, const a1T &, const a2T &, const a3T &, const a4T &, const a5T &, const a6T &, const a7T &) = &implT::template itemfun<memfunT, a1T, a2T, a3T, a4T, a5T, a6T, a7T>;
             return p->task(owner(key), itemfun, key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7, attr);
         }
 
@@ -1466,80 +1603,88 @@ namespace madness {
 
         /// The method executes with a write lock on the item.
         template <typename memfunT>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun,  const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, attr);
         }
 
         /// Adds task "resultT memfun(arg1T) const" in process owning item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1,  const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,arg1,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, arg1, attr);
         }
 
         /// Adds task "resultT memfun(arg1T,arg2T) const" in process owning item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2,  const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,arg1,arg2,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, arg1, arg2, attr);
         }
 
         /// Adds task "resultT memfun(arg1T,arg2T,arg3T) const" in process owning item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,arg1,arg2,arg3,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, arg1, arg2, arg3, attr);
         }
 
         /// Adds task "resultT memfun(arg1T,arg2T,arg3T, arg4T) const" in process owning item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,arg1,arg2,arg3,arg4,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, arg1, arg2, arg3, arg4, attr);
         }
 
         /// Adds task "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T) const" in process owning item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,arg1,arg2,arg3,arg4,arg5,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, arg1, arg2, arg3, arg4, arg5, attr);
         }
 
         /// Adds task "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T) const" in process owning item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, attr);
         }
 
         /// Adds task "resultT memfun(arg1T,arg2T,arg3T,arg4T,arg5T,arg6T,arg7T) const" in process owning item (non-blocking comm if remote)
 
         /// The method executes with a write lock on the item.
         template <typename memfunT, typename arg1T, typename arg2T, typename arg3T, typename arg4T, typename arg5T, typename arg6T, typename arg7T>
-        Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) >
-        task(const keyT& key, memfunT memfun, const arg1T& arg1, const arg2T& arg2, const arg3T& arg3, const arg4T& arg4, const arg5T& arg5, const arg6T& arg6, const arg7T& arg7, const TaskAttributes& attr = TaskAttributes()) const {
-            return const_cast<containerT*>(this)->task(key,memfun,arg1,arg2,arg3,arg4,arg5,arg6,arg7,attr);
+        Future<REMFUTURE(MEMFUN_RETURNT(memfunT))>
+        task(const keyT &key, memfunT memfun, const arg1T &arg1, const arg2T &arg2, const arg3T &arg3, const arg4T &arg4, const arg5T &arg5, const arg6T &arg6, const arg7T &arg7, const TaskAttributes &attr = TaskAttributes()) const
+        {
+            return const_cast<containerT *>(this)->task(key, memfun, arg1, arg2, arg3, arg4, arg5, arg6, arg7, attr);
         }
 
-
         /// (de)Serialize --- *Local* data only to/from anything *except* Buffer*Archive and Parallel*Archive
 
         /// Advisable for *you* to fence before and after this to ensure consistency
         template <typename Archive>
-        void serialize(const Archive& ar) {
+        void serialize(const Archive &ar)
+        {
             //
             // !! If you change the format of this stream make sure that
             // !! the parallel in/out archive below is compatible
@@ -1548,18 +1693,23 @@ namespace madness {
             unsigned long count = 0;
             check_initialized();
 
-            if (Archive::is_output_archive) {
+            if (Archive::is_output_archive)
+            {
                 ar & magic;
-                for (iterator it=begin(); it!=end(); ++it) count++;
+                for (iterator it = begin(); it != end(); ++it)
+                    count++;
                 ar & count;
-                for (iterator it=begin(); it!=end(); ++it) ar & *it;
+                for (iterator it = begin(); it != end(); ++it)
+                    ar &*it;
             }
-            else {
+            else
+            {
                 long cookie = 0l;
                 ar & cookie;
                 MADNESS_ASSERT(cookie == magic);
                 ar & count;
-                while (count--) {
+                while (count--)
+                {
                     pairT datum;
                     ar & datum;
                     replace(datum);
@@ -1570,49 +1720,55 @@ namespace madness {
         /// (de)Serialize --- !! ONLY for purpose of interprocess communication
 
         /// This just writes/reads the unique id to/from the Buffer*Archive.
-        void serialize(const archive::BufferOutputArchive& ar) {
+        void serialize(const archive::BufferOutputArchive &ar)
+        {
             check_initialized();
-            ar & static_cast<WorldObject<implT>*>(p.get());
+            ar &static_cast<WorldObject<implT> *>(p.get());
         }
 
         /// (de)Serialize --- !! ONLY for purpose of interprocess communication
 
         /// This just writes/reads the unique id to/from the Buffer*Archive.
-        void serialize(const archive::BufferInputArchive& ar) {
-            WorldObject<implT>* ptr = nullptr;
+        void serialize(const archive::BufferInputArchive &ar)
+        {
+            WorldObject<implT> *ptr = nullptr;
             ar & ptr;
             MADNESS_ASSERT(ptr);
 
 #ifdef MADNESS_DISABLE_SHARED_FROM_THIS
-            p.reset(static_cast<implT*>(ptr), [] (implT *p_) -> void {});
+            p.reset(static_cast<implT *>(ptr), [](implT *p_) -> void{});
 #else
-            p = static_cast<implT*>(ptr)->shared_from_this();
+            p = static_cast<implT *>(ptr)->shared_from_this();
 #endif // MADNESS_DISABLE_SHARED_FROM_THIS
         }
 
         /// Returns the associated unique id ... must be initialized
-        const uniqueidT& id() const {
+        const uniqueidT &id() const
+        {
             check_initialized();
             return p->id();
         }
 
         /// Destructor passes ownership of implementation to world for deferred cleanup
-        virtual ~WorldContainer() {
+        virtual ~WorldContainer()
+        {
             detail::deferred_cleanup(p->get_world(), p);
         }
 
-        friend void swap<>(WorldContainer&, WorldContainer&);
+        friend void swap<>(WorldContainer &, WorldContainer &);
     };
 
     /// Swaps the content of two WorldContainer objects. It should be called on all nodes.
 
     /// \ingroup worlddc
     template <typename keyT, typename valueT, typename hashfunT>
-    void swap(WorldContainer<keyT, valueT, hashfunT>& dc0, WorldContainer<keyT, valueT, hashfunT>& dc1) {
-      std::swap(dc0.p, dc1.p);
+    void swap(WorldContainer<keyT, valueT, hashfunT> &dc0, WorldContainer<keyT, valueT, hashfunT> &dc1)
+    {
+        std::swap(dc0.p, dc1.p);
     }
 
-    namespace archive {
+    namespace archive
+    {
 
         /// Write container to parallel archive
 
@@ -1620,119 +1776,189 @@ namespace madness {
         /// all threads on each process serialize some values into a buffer, which gets concatenated
         /// and finally serialized to localarchive (aka VectorOutputArchive).
         template <class keyT, class valueT>
-        struct ArchiveStoreImpl< ParallelOutputArchive<VectorOutputArchive>, WorldContainer<keyT,valueT> > {
-            static void store(const ParallelOutputArchive<VectorOutputArchive>& ar, const WorldContainer<keyT,valueT>& t) {
+        struct ArchiveStoreImpl<ParallelOutputArchive<VectorOutputArchive>, WorldContainer<keyT, valueT>>
+        {
+            static void store(const ParallelOutputArchive<VectorOutputArchive> &ar, const WorldContainer<keyT, valueT> &t)
+            {
                 using localarchiveT = VectorOutputArchive;
                 const long magic = -5881828; // Sitar Indian restaurant in Knoxville (negative to indicate parallel!)
-                typedef WorldContainer<keyT,valueT> dcT;
+                typedef WorldContainer<keyT, valueT> dcT;
                 using const_iterator = typename dcT::const_iterator;
+                int count = t.size(); // Must be INT for MPI and NOT const since we'll do a global sum eventually
 
-                // const size_t default_size = 100*1024*1024;
-                const size_t default_size = 8ul<<30;
+                // Strategy:
+                // 1. Serialize local data to a buffer in parallel over threads
+                //    a) Compute the size of the buffer needed by each task
+                //    b) Sum sizes and allocate the buffer of exact sizes needed for all threads
+                //    c) Serialize the data into the buffer in parallel over threads
+                // 2. Gather all buffers to process 0
 
-                World* world = ar.get_world();
-                world->gop.fence();
+                World *world = ar.get_world();
+                world->gop.fence(); // Global fence here
 
-                class op_serialize : public TaskInterface {
-                    const size_t ntasks;
-                    const size_t taskid;
-                    const dcT& t;
-                    std::vector<unsigned char>& v;
+                class op_inspector : public TaskInterface
+                {
+                    const_iterator start, end;
+                    size_t &size;
 
                 public:
-                    op_serialize(size_t ntasks, size_t taskid, const dcT& t, std::vector<unsigned char>& v)
-                        : ntasks(ntasks), taskid(taskid), t(t), v(v) {}
-                    void run(World& world) {
-                        std::size_t hint_size=(1ul<<30)/ntasks;
-                        VectorOutputArchive var(v,hint_size);
-                        const_iterator it=t.begin();
-                        size_t n = 0;
-                        /// threads serialize round-robin over the container
-                        while (it!=t.end()) {
-                            if ((n%ntasks) == taskid) {
-                                var & *it;
-                            }
-                            ++it;
-                            n++;
-                        }
+                    op_inspector(const_iterator start, const_iterator end, size_t &size)
+                        : start(start), end(end), size(size) {}
+                    void run(World &world)
+                    {
+                        BufferOutputArchive bo;
+                        for (const_iterator it = start; it != end; ++it)
+                            bo &*it;
+                        size = bo.size();
                     }
                 };
 
-                class op_concat : public TaskInterface {
-                    unsigned char* all_data;
-                    const std::vector<unsigned char>& v;
+                class op_executor : public TaskInterface
+                {
+                    const_iterator start, end;
+                    unsigned char *buf;
+                    const size_t size;
+
                 public:
-                    op_concat(unsigned char* all_data, const std::vector<unsigned char>& v)
-                        : all_data(all_data), v(v) {}
-                    void run(World& world) {
-                        memcpy(all_data, v.data(), v.size());
+                    op_executor(const_iterator start, const_iterator end, unsigned char *buf, size_t size)
+                        : start(start), end(end), buf(buf), size(size) {}
+                    void run(World &world)
+                    {
+                        BufferOutputArchive bo(buf, size);
+                        for (const_iterator it = start; it != end; ++it)
+                        {
+                            bo &*it;
+                        }
+                        MADNESS_CHECK(size == bo.size());
                     }
                 };
 
-                world->gop.fence();
-                double wall0=wall_time();
-                Mutex mutex;
-                size_t ntasks = std::max(size_t(1), ThreadPool::size());
+                // No need for LOCAL fence here since only master thread is busy
+                double wall0 = wall_time();
+                const size_t ntasks = std::min(size_t(count), std::max(size_t(1), ThreadPool::size()));
+                size_t local_size = 0;
+                double wall1 = wall0;
+                unsigned char* buf = 0;
+                if (ntasks > 0)
+                {
+                    const size_t max_items_per_task = (std::max(1, count) - 1) / ntasks + 1;
+                    // Compute the size of the buffer needed by each task
+                    const_iterator starts[ntasks], ends[ntasks];
+                    size_t local_sizes[ntasks];
+                    const_iterator start = t.begin();
+                    size_t nleft = count;
+                    for (size_t taskid = 0; taskid < ntasks; taskid++)
+                    {
+                        const_iterator end = start;
+                        if (taskid == (ntasks - 1))
+                        {
+                            end = t.end();
+                        }
+                        else
+                        {
+                            size_t nitems = std::min(max_items_per_task, nleft);
+                            std::advance(end, max_items_per_task);
+                            nleft -= nitems;
+                        }
+                        starts[taskid] = start;
+                        ends[taskid] = end;
+                        world->taskq.add(new op_inspector(start, end, local_sizes[taskid])); // Be sure to pass iterators by value!!
+                        start = end;
+                    }
+                    world->taskq.fence(); // just need LOCAL fence
+                    wall1 = wall_time();
+                    // if (world->rank() == 0)
+                        // printf("time in op_inspector: %8.4fs\n", wall1 - wall0);
+                    wall0 = wall1;
+
+                    // total size over all threads
+                    for (size_t taskid = 0; taskid < ntasks; taskid++)
+                    {
+                        local_size += local_sizes[taskid];
+                        // print("taskid",taskid,"size",local_sizes[taskid]);
+                    }
 
-                std::vector<std::vector<unsigned char>> v(ntasks);
-                for (size_t taskid=0; taskid<ntasks; taskid++)
-                    world->taskq.add(new op_serialize(ntasks, taskid, t, v[taskid]));
-                world->gop.fence();
-                // total size of all vectors
-                size_t total_size = 0;
-                for (size_t taskid=0; taskid<ntasks; taskid++) total_size += v[taskid].size();
-                std::vector<unsigned char> vtotal(total_size);
-                
-                size_t offset = 0;
-                for (size_t taskid=0; taskid<ntasks; taskid++) {
-                    world->taskq.add(new op_concat(&vtotal[offset], v[taskid]));
-                    offset += v[taskid].size();
+                    // Allocate the buffer for all threads
+                    buf = new unsigned char[local_size];
+
+                    // Now execute the serialization
+                    size_t offset = 0;
+                    for (size_t taskid = 0; taskid < ntasks; taskid++)
+                    {
+                        world->taskq.add(new op_executor(starts[taskid], ends[taskid], buf + offset, local_sizes[taskid]));
+                        offset += local_sizes[taskid];
+                    }
+                    world->taskq.fence(); // just need LOCAL fence
+
+                    wall1 = wall_time();
+                    // if (world->rank() == 0)
+                        // printf("time in op_executor: %8.4fs\n", wall1 - wall0);
+                    wall0 = wall1;
                 }
-                v.clear();
+                // VERify that the serialization worked!!
+                // {
+                //     BufferInputArchive bi(buf, local_size);
+                //     for (int item=0; item<count; item++) {
+                //         std::pair<keyT, valueT> datum;
+                //         bi & datum;
+                //         print("deserializing",datum.first);
+                //     }
+                // }
 
-                double wall1=wall_time();
-                if (world->rank()==0) printf("time in the taskq: %8.4fs\n",wall1-wall0);
                 // Gather all buffers to process 0
                 // first gather all of the sizes and counts to a vector in process 0
-                int size = vtotal.size();
-                int count = t.size();
+                const int size = local_size;
                 std::vector<int> sizes(world->size());
                 MPI_Gather(&size, 1, MPI_INT, sizes.data(), 1, MPI_INT, 0, world->mpi.comm().Get_mpi_comm());
                 world->gop.sum(count); // just need total number of elements
 
-                print("time 3",wall_time());
-                // build the cumulative sum of sizes
+                // print("time 3",wall_time());
+                //  build the cumulative sum of sizes
                 std::vector<int> offsets(world->size());
                 offsets[0] = 0;
-                for (int i=1; i<world->size(); ++i) offsets[i] = offsets[i-1] + sizes[i-1];
-                MADNESS_CHECK(offsets.back() + sizes.back() == total_size);
+                for (int i = 1; i < world->size(); ++i)
+                    offsets[i] = offsets[i - 1] + sizes[i - 1];
+                size_t total_size = offsets.back() + sizes.back();
+                // if (world->rank() == 0)
+                    // print("total_size", total_size);
 
-                print("time 4",wall_time());
+                // print("time 4",wall_time());
                 // gather the vector of data v from each process to process 0
-                unsigned char* all_data=0;
-                if (world->rank() == 0) {
+                unsigned char *all_data = 0;
+                if (world->rank() == 0)
+                {
                     all_data = new unsigned char[total_size];
                 }
-                MPI_Gatherv(vtotal.data(), vtotal.size(), MPI_BYTE, all_data, sizes.data(), offsets.data(), MPI_BYTE, 0, world->mpi.comm().Get_mpi_comm());
+                MPI_Gatherv(buf, local_size, MPI_BYTE, all_data, sizes.data(), offsets.data(), MPI_BYTE, 0, world->mpi.comm().Get_mpi_comm());
+
+                wall1 = wall_time();
+                // if (world->rank() == 0)
+                    // printf("time in gather+gatherv: %8.4fs\n", wall1 - wall0);
+                wall0 = wall1;
+
+                delete[] buf;
 
-                print("time 5",wall_time());
-                if (world->rank() == 0) {
-                    auto& localar = ar.local_archive();
+                // print("time 5",wall_time());
+                if (world->rank() == 0)
+                {
+                    auto &localar = ar.local_archive();
                     localar & magic & 1; // 1 client
                     // localar & t;
-                    ArchivePrePostImpl<localarchiveT,dcT>::preamble_store(localar);
-                    localar & -magic & count;
+                    ArchivePrePostImpl<localarchiveT, dcT>::preamble_store(localar);
+                    localar & -magic &(unsigned long)(count);
                     localar.store(all_data, total_size);
-                    ArchivePrePostImpl<localarchiveT,dcT>::postamble_store(localar);
+                    ArchivePrePostImpl<localarchiveT, dcT>::postamble_store(localar);
+                    wall1 = wall_time();
+                    // if (world->rank() == 0)
+                        // printf("time in final copy on node 0: %8.4fs\n", wall1 - wall0);
 
                     delete[] all_data;
                 }
                 world->gop.fence();
-                print("time 6",wall_time());
+                // print("time 6",wall_time());
             }
         };
 
-
         /// Write container to parallel archive with optional fence
 
         /// \ingroup worlddc
@@ -1751,57 +1977,68 @@ namespace madness {
         /// subsequent modifications. Also, there is always at least
         /// some synchronization between a client and its IO server.
         template <class keyT, class valueT, class localarchiveT>
-        struct ArchiveStoreImpl< ParallelOutputArchive<localarchiveT>, WorldContainer<keyT,valueT> > {
-            static void store(const ParallelOutputArchive<localarchiveT>& ar, const WorldContainer<keyT,valueT>& t) {
+        struct ArchiveStoreImpl<ParallelOutputArchive<localarchiveT>, WorldContainer<keyT, valueT>>
+        {
+            static void store(const ParallelOutputArchive<localarchiveT> &ar, const WorldContainer<keyT, valueT> &t)
+            {
                 const long magic = -5881828; // Sitar Indian restaurant in Knoxville (negative to indicate parallel!)
-                typedef WorldContainer<keyT,valueT> dcT;
+                typedef WorldContainer<keyT, valueT> dcT;
                 // typedef typename dcT::const_iterator iterator; // unused?
                 typedef typename dcT::pairT pairT;
-                World* world = ar.get_world();
+                World *world = ar.get_world();
                 Tag tag = world->mpi.unique_tag();
                 ProcessID me = world->rank();
-                if (ar.dofence()) world->gop.fence();
-                if (ar.is_io_node()) {
-                    auto& localar = ar.local_archive();
+                if (ar.dofence())
+                    world->gop.fence();
+                if (ar.is_io_node())
+                {
+                    auto &localar = ar.local_archive();
                     localar & magic & ar.num_io_clients();
-                    for (ProcessID p=0; p<world->size(); ++p) {
-                        if (p == me) {
+                    for (ProcessID p = 0; p < world->size(); ++p)
+                    {
+                        if (p == me)
+                        {
                             localar & t;
                         }
-                        else if (ar.io_node(p) == me) {
-                            world->mpi.Send(int(1),p,tag); // Tell client to start sending
+                        else if (ar.io_node(p) == me)
+                        {
+                            world->mpi.Send(int(1), p, tag); // Tell client to start sending
                             archive::MPIInputArchive source(*world, p);
                             long cookie = 0l;
                             unsigned long count = 0ul;
 
-                            ArchivePrePostImpl<localarchiveT,dcT>::preamble_store(localar);
+                            ArchivePrePostImpl<localarchiveT, dcT>::preamble_store(localar);
 
                             source & cookie & count;
                             localar & cookie & count;
-                            while (count--) {
+                            while (count--)
+                            {
                                 pairT datum;
                                 source & datum;
                                 localar & datum;
                             }
 
-                            ArchivePrePostImpl<localarchiveT,dcT>::postamble_store(localar);
+                            ArchivePrePostImpl<localarchiveT, dcT>::postamble_store(localar);
                         }
                     }
                 }
-                else {
+                else
+                {
                     ProcessID p = ar.my_io_node();
                     int flag;
-                    world->mpi.Recv(flag,p,tag);
+                    world->mpi.Recv(flag, p, tag);
                     MPIOutputArchive dest(*world, p);
                     dest & t;
                     dest.flush();
                 }
-                if (ar.dofence()) world->gop.fence();
+                if (ar.dofence())
+                    world->gop.fence();
             }
         };
 
         template <class keyT, class valueT, class localarchiveT>
-        struct ArchiveLoadImpl< ParallelInputArchive<localarchiveT>, WorldContainer<keyT,valueT> > {
+        struct ArchiveLoadImpl<ParallelInputArchive<localarchiveT>, WorldContainer<keyT, valueT>>
+        {
             /// Read container from parallel archive
 
             /// \ingroup worlddc
@@ -1811,24 +2048,29 @@ namespace madness {
             /// can always run a separate job to copy to a different number.
             ///
             /// The IO node simply reads all data and inserts entries.
-            static void load(const ParallelInputArchive<localarchiveT>& ar, WorldContainer<keyT,valueT>& t) {
+            static void load(const ParallelInputArchive<localarchiveT> &ar, WorldContainer<keyT, valueT> &t)
+            {
                 const long magic = -5881828; // Sitar Indian restaurant in Knoxville (negative to indicate parallel!)
                 // typedef WorldContainer<keyT,valueT> dcT; // unused
                 // typedef typename dcT::iterator iterator; // unused
                 // typedef typename dcT::pairT pairT; // unused
-                World* world = ar.get_world();
-                if (ar.dofence()) world->gop.fence();
-                if (ar.is_io_node()) {
+                World *world = ar.get_world();
+                if (ar.dofence())
+                    world->gop.fence();
+                if (ar.is_io_node())
+                {
                     long cookie = 0l;
                     int nclient = 0;
-                    auto& localar = ar.local_archive();
+                    auto &localar = ar.local_archive();
                     localar & cookie & nclient;
                     MADNESS_CHECK(cookie == magic);
-                    while (nclient--) {
+                    while (nclient--)
+                    {
                         localar & t;
                     }
                 }
-                if (ar.dofence()) world->gop.fence();
+                if (ar.dofence())
+                    world->gop.fence();
             }
         };
     }