diff --git a/src/BLR/BLRMatrix.cpp b/src/BLR/BLRMatrix.cpp index 51b5b580..8a529796 100644 --- a/src/BLR/BLRMatrix.cpp +++ b/src/BLR/BLRMatrix.cpp @@ -1403,7 +1403,8 @@ namespace strumpack { B11.piv_.resize(B11.rows()); auto rb = B11.rowblocks(); auto rb2 = B22.rowblocks(); - const std::size_t CP = 1; // code below is now simplified for CP == 1 + const std::size_t CP = 1; + // code below is now simplified for CP == 1 #pragma omp parallel if(!omp_in_parallel()) #pragma omp single nowait { @@ -1412,7 +1413,8 @@ namespace strumpack { B21.fill_col(0., i, CP); blockcol(i, true, CP); for (std::size_t k=0; k tpiv (B11.piv_.begin()+B11.tileroff(k), B11.piv_.begin()+B11.tileroff(k+1)); @@ -1449,16 +1451,6 @@ namespace strumpack { B12.fill_col(0., i, CP); B22.fill_col(0., i, CP); blockcol(i, false, CP); -#pragma omp taskloop - for (std::size_t k=0; k tpiv - (B11.piv_.begin()+B11.tileroff(k), - B11.piv_.begin()+B11.tileroff(k+1)); - B12.tile(k, i).laswp(tpiv, true); - trsm(Side::L, UpLo::L, Trans::N, Diag::U, - scalar_t(1.), B11.tile(k, k), B12.tile(k, i)); - } for (std::size_t k=0; k tpiv + (B11.piv_.begin()+B11.tileroff(k), + B11.piv_.begin()+B11.tileroff(k+1)); + B12.tile(k, i).laswp(tpiv, true); + trsm(Side::L, UpLo::L, Trans::N, Diag::U, + scalar_t(1.), B11.tile(k, k), B12.tile(k, i)); + } #pragma omp taskloop for (std::size_t k=0; kextend_add_to_blr_col - (F11blr_, F12blr_, F21blr_, F22blr_, this, F11blr_.tilecoff(i), - F11blr_.tilecoff(std::min(i+CP, F11blr_.colblocks())), - task_depth, opts); + (F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts); if (rchild_) rchild_->extend_add_to_blr_col - (F11blr_, F12blr_, F21blr_, F22blr_, this, F11blr_.tilecoff(i), - F11blr_.tilecoff(std::min(i+CP, F11blr_.colblocks())), - task_depth, opts); + (F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts); } else { + auto lo = F22blr_.tilecoff(i) + dim_sep(); + auto hi = F22blr_.tilecoff(std::min(i+CP, F22blr_.colblocks())) + dim_sep(); if (lchild_) lchild_->extend_add_to_blr_col - (F11blr_, F12blr_, F21blr_, F22blr_, this, - F22blr_.tilecoff(i) + dim_sep(), - F22blr_.tilecoff(std::min(i+CP, F22blr_.colblocks())) + dim_sep(), - task_depth, opts); + (F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts); if (rchild_) rchild_->extend_add_to_blr_col - (F11blr_, F12blr_, F21blr_, F22blr_, this, - F22blr_.tilecoff(i) + dim_sep(), - F22blr_.tilecoff(std::min(i+CP, F22blr_.colblocks())) + dim_sep(), - task_depth, opts); + (F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts); } }