Skip to content

Commit

Permalink
Bugfix in BLR COLWISE.
Browse files Browse the repository at this point in the history
  • Loading branch information
pghysels committed Jul 19, 2024
1 parent cf768ce commit e7f2c85
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 26 deletions.
26 changes: 14 additions & 12 deletions src/BLR/BLRMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,8 @@ namespace strumpack {
B11.piv_.resize(B11.rows());
auto rb = B11.rowblocks();
auto rb2 = B22.rowblocks();
const std::size_t CP = 1; // code below is now simplified for CP == 1
const std::size_t CP = 1;
// code below is now simplified for CP == 1
#pragma omp parallel if(!omp_in_parallel())
#pragma omp single nowait
{
Expand All @@ -1412,7 +1413,8 @@ namespace strumpack {
B21.fill_col(0., i, CP);
blockcol(i, true, CP);
for (std::size_t k=0; k<i; k++) {
if (admissible(k, i)) B11.compress_tile(k, i, opts);
if (admissible(k, i))
B11.compress_tile(k, i, opts);
std::vector<int> tpiv
(B11.piv_.begin()+B11.tileroff(k),
B11.piv_.begin()+B11.tileroff(k+1));
Expand Down Expand Up @@ -1449,16 +1451,6 @@ namespace strumpack {
B12.fill_col(0., i, CP);
B22.fill_col(0., i, CP);
blockcol(i, false, CP);
#pragma omp taskloop
for (std::size_t k=0; k<rb; k++) {
B12.compress_tile(k, i, opts);
std::vector<int> tpiv
(B11.piv_.begin()+B11.tileroff(k),
B11.piv_.begin()+B11.tileroff(k+1));
B12.tile(k, i).laswp(tpiv, true);
trsm(Side::L, UpLo::L, Trans::N, Diag::U,
scalar_t(1.), B11.tile(k, k), B12.tile(k, i));
}
for (std::size_t k=0; k<rb; k++) {
#pragma omp taskloop
for (std::size_t lk=k+1; lk<rb+rb2; lk++)
Expand All @@ -1471,6 +1463,16 @@ namespace strumpack {
B21.tile(lk-rb, k), B12.tile(k, i), scalar_t(1.),
B22.tile_dense(lk-rb, i).D());
}
#pragma omp taskloop
for (std::size_t k=0; k<rb; k++) {
B12.compress_tile(k, i, opts);
std::vector<int> tpiv
(B11.piv_.begin()+B11.tileroff(k),
B11.piv_.begin()+B11.tileroff(k+1));
B12.tile(k, i).laswp(tpiv, true);
trsm(Side::L, UpLo::L, Trans::N, Diag::U,
scalar_t(1.), B11.tile(k, k), B12.tile(k, i));
}
#pragma omp taskloop
for (std::size_t k=0; k<rb2; k++)
if (i != k)
Expand Down
22 changes: 8 additions & 14 deletions src/sparse/fronts/FrontBLR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,29 +100,23 @@ namespace strumpack {
F12blr_(e.r, e.c) = e.v;
}
if (part) {
auto lo = F11blr_.tilecoff(i);
auto hi = F11blr_.tilecoff(std::min(i+CP, F11blr_.colblocks()));
if (lchild_)
lchild_->extend_add_to_blr_col
(F11blr_, F12blr_, F21blr_, F22blr_, this, F11blr_.tilecoff(i),
F11blr_.tilecoff(std::min(i+CP, F11blr_.colblocks())),
task_depth, opts);
(F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts);
if (rchild_)
rchild_->extend_add_to_blr_col
(F11blr_, F12blr_, F21blr_, F22blr_, this, F11blr_.tilecoff(i),
F11blr_.tilecoff(std::min(i+CP, F11blr_.colblocks())),
task_depth, opts);
(F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts);
} else {
auto lo = F22blr_.tilecoff(i) + dim_sep();
auto hi = F22blr_.tilecoff(std::min(i+CP, F22blr_.colblocks())) + dim_sep();
if (lchild_)
lchild_->extend_add_to_blr_col
(F11blr_, F12blr_, F21blr_, F22blr_, this,
F22blr_.tilecoff(i) + dim_sep(),
F22blr_.tilecoff(std::min(i+CP, F22blr_.colblocks())) + dim_sep(),
task_depth, opts);
(F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts);
if (rchild_)
rchild_->extend_add_to_blr_col
(F11blr_, F12blr_, F21blr_, F22blr_, this,
F22blr_.tilecoff(i) + dim_sep(),
F22blr_.tilecoff(std::min(i+CP, F22blr_.colblocks())) + dim_sep(),
task_depth, opts);
(F11blr_, F12blr_, F21blr_, F22blr_, this, lo, hi, task_depth, opts);
}
}

Expand Down

0 comments on commit e7f2c85

Please sign in to comment.