8 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
19 #include <Xpetra_MultiVectorFactory.hpp>
21 #include <Xpetra_CrsMatrixWrap.hpp>
28 #include "MueLu_Utilities.hpp"
33 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
45 int numProcs = comm->getSize();
46 int myRank = comm->getRank();
48 size_t nDofsPerNode = 1;
49 if (A->IsView(
"stridedMaps")) {
51 nDofsPerNode = Teuchos::rcp_dynamic_cast<
const StridedMap>(permRowMapStrided)->getFixedBlockSize();
54 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
55 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
56 std::vector<MT> Weights;
60 for (
size_t row = 0; row < A->getRowMap()->getLocalNumElements(); row++) {
63 if (permRowMap->isNodeGlobalElement(grow) ==
true)
continue;
65 size_t nnz = A->getNumEntriesInLocalRow(row);
70 A->getLocalRowView(row, indices, vals);
73 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
79 for (
size_t j = 0; j < Teuchos::as<size_t>(indices.
size()); j++) {
83 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
87 if (grow == gMaxValIdx)
88 keepDiagonalEntries.push_back(std::make_pair(grow, grow));
93 for (
size_t row = 0; row < permRowMap->getLocalNumElements(); row++) {
95 LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
96 size_t nnz = A->getNumEntriesInLocalRow(lArow);
101 A->getLocalRowView(lArow, indices, vals);
104 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
110 for (
size_t j = 0; j < Teuchos::as<size_t>(indices.
size()); j++) {
114 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
119 permutedDiagCandidates.push_back(std::make_pair(grow, gMaxValIdx));
120 Weights.push_back(maxVal / (norm1 * Teuchos::as<MT>(nnz)));
122 std::cout <<
"ATTENTION: row " << grow <<
" has only zero entries -> singular matrix!" << std::endl;
127 std::vector<int> permutation;
138 gColVec->putScalar(SC_ZERO);
139 gDomVec->putScalar(SC_ZERO);
142 for (
typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
143 gColVec->sumIntoGlobalValue((*p).second, 1.0);
147 gDomVec->doExport(*gColVec, *exporter,
Xpetra::ADD);
150 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered;
151 std::map<GlobalOrdinal, MT> gColId2Weight;
155 for (
size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
157 std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
161 LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
167 ddata[lcol] += SC_ONE;
169 permutedDiagCandidatesFiltered.push_back(std::make_pair(grow, gcol));
170 gColId2Weight[gcol] = Weights[permutation[i]];
175 gDomVec->doExport(*gColVec, *exporter,
Xpetra::ADD);
183 std::vector<GlobalOrdinal> multipleColRequests;
187 std::queue<GlobalOrdinal> unusedColIdx;
189 for (
size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
197 multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
199 unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
204 LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
214 if (globalMultColRequests > 0) {
220 std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
221 std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
222 numMyMultColRequests[myRank] = localMultColRequests;
224 numMyMultColRequests.data(),
225 numGlobalMultColRequests.data());
229 for (
int i = 0; i < myRank - 1; i++)
230 nMyOffset += numGlobalMultColRequests[i];
233 std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
234 std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
237 for (
size_t i = 0; i < multipleColRequests.size(); i++) {
238 procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i];
243 procMultRequestedColIds.data(),
244 global_procMultRequestedColIds.data());
247 for (
size_t k = 0; k < global_procMultRequestedColIds.size(); k++) {
250 std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
251 std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
253 if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
254 MyWeightForColId[myRank] = gColId2Weight[globColId];
256 MyWeightForColId[myRank] = MT_ZERO;
260 MyWeightForColId.data(),
261 GlobalWeightForColId.data());
263 if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
266 MT winnerValue = MT_ZERO;
267 int winnerProcRank = 0;
268 for (
int proc = 0; proc < numProcs; proc++) {
269 if (GlobalWeightForColId[proc] > winnerValue) {
270 winnerValue = GlobalWeightForColId[proc];
271 winnerProcRank = proc;
278 if (myRank != winnerProcRank) {
280 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
281 while (p != permutedDiagCandidatesFiltered.end()) {
282 if ((*p).second == globColId)
283 p = permutedDiagCandidatesFiltered.erase(p);
295 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
296 RowColPairs.insert(RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
297 RowColPairs.insert(RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
302 gColVec->putScalar(SC_ZERO);
303 gDomVec->putScalar(SC_ZERO);
304 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
305 while (pl != RowColPairs.end()) {
309 gColVec->sumIntoGlobalValue(jk, 1.0);
312 gDomVec->doExport(*gColVec, *exporter,
Xpetra::ADD);
313 for (
size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
315 if (arrDomVec[sz] > 1.0) {
316 GetOStream(
Runtime0) <<
"RowColPairs has multiple column [" << sz <<
"]=" << arrDomVec[sz] << std::endl;
317 }
else if (arrDomVec[sz] == SC_ZERO) {
318 GetOStream(
Runtime0) <<
"RowColPairs has empty column [" << sz <<
"]=" << arrDomVec[sz] << std::endl;
355 Pperm->putScalar(SC_ZERO);
356 Qperm->putScalar(SC_ZERO);
357 lQperm->putScalar(SC_ZERO);
366 RowIdStatus->putScalar(SC_ZERO);
367 ColIdStatus->putScalar(SC_ZERO);
368 lColIdStatus->putScalar(SC_ZERO);
369 ColIdUsed->putScalar(SC_ZERO);
384 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.
begin();
385 while (p != RowColPairs.end()) {
392 if (RowIdStatusArray[lik] == SC_ZERO) {
393 RowIdStatusArray[lik] = SC_ONE;
394 lColIdStatusArray[ljk] = SC_ONE;
395 Pperm->replaceLocalValue(lik, ik);
396 lQperm->replaceLocalValue(ljk, ik);
397 ColIdUsed->replaceGlobalValue(ik, SC_ONE);
398 p = RowColPairs.erase(p);
401 if (floor(ik / nDofsPerNode) != floor(jk / nDofsPerNode)) {
402 lWideRangeColPermutations++;
410 ColIdStatus->doExport(*lColIdStatus, *QpermExporter,
Xpetra::ABSMAX);
413 if (RowColPairs.size() > 0) GetOStream(
Warnings0) <<
"MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl;
418 size_t cntFreeRowIdx = 0;
419 std::queue<GlobalOrdinal> qFreeGRowIdx;
420 for (
size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
421 if (RowIdStatusArray[lik] == SC_ZERO) {
423 qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
428 for (
size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
429 if (RowIdStatusArray[lik] == SC_ZERO) {
430 RowIdStatusArray[lik] = SC_ONE;
431 Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
433 if (floor(qFreeGRowIdx.front() / nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik) / nDofsPerNode)) {
434 lWideRangeRowPermutations++;
441 size_t cntUnusedColIdx = 0;
442 std::queue<GlobalOrdinal> qUnusedGColIdx;
443 size_t cntFreeColIdx = 0;
444 std::queue<GlobalOrdinal> qFreeGColIdx;
450 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
451 if (ColIdStatusArray[ljk] == SC_ZERO) {
453 qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
457 for (
size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
458 if (ColIdUsedArray[ljk] == SC_ZERO) {
460 qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
465 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
467 if (cntUnusedColIdx == 0)
break;
469 if (ColIdStatusArray[ljk] == SC_ZERO) {
470 ColIdStatusArray[ljk] = SC_ONE;
471 Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front());
472 ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(), SC_ONE);
474 if (floor(qUnusedGColIdx.front() / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
475 lWideRangeColPermutations++;
477 qUnusedGColIdx.pop();
489 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
490 if (ColIdStatusArray[ljk] == SC_ZERO) {
498 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
500 std::cout <<
"global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
504 if (global_cntFreeColIdx > 0) {
508 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
510 std::cout <<
"global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
514 std::vector<LocalOrdinal> local_UnusedColIdxOnProc(numProcs);
515 std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
516 local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
518 local_UnusedColIdxOnProc.data(),
519 global_UnusedColIdxOnProc.data());
522 std::cout <<
"PROC " << myRank <<
" global num unused indices per proc: ";
523 for (
size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
524 std::cout <<
" " << global_UnusedColIdxOnProc[ljk];
526 std::cout << std::endl;
530 std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
531 std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
533 for (
int proc = 0; proc < myRank; proc++) {
534 global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
536 for (
GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter + local_cntUnusedColIdx; k++) {
537 local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
538 qUnusedGColIdx.pop();
541 local_UnusedColIdxVector.data(),
542 global_UnusedColIdxVector.data());
544 std::cout <<
"PROC " << myRank <<
" global UnusedGColIdx: ";
545 for (
size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
546 std::cout <<
" " << global_UnusedColIdxVector[ljk];
548 std::cout << std::endl;
553 std::vector<LocalOrdinal> local_EmptyColIdxOnProc(numProcs);
554 std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
555 local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
557 local_EmptyColIdxOnProc.data(),
558 global_EmptyColIdxOnProc.data());
561 std::cout <<
"PROC " << myRank <<
" global num of needed column indices: ";
562 for (
size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
563 std::cout <<
" " << global_EmptyColIdxOnProc[ljk];
565 std::cout << std::endl;
571 for (
int proc = 0; proc < myRank; proc++) {
572 global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
576 GetOStream(
Statistics0) <<
"PROC " << myRank <<
" is allowd to use the following column gids: ";
577 for (
GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
578 GetOStream(
Statistics0) << global_UnusedColIdxVector[k] <<
" ";
587 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
588 if (ColIdStatusArray[ljk] == SC_ZERO) {
589 ColIdStatusArray[ljk] = SC_ONE;
590 Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
591 ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter], SC_ONE);
593 if (floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter] / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
594 lWideRangeColPermutations++;
612 for (
size_t row = 0; row < A->getLocalNumRows(); row++) {
619 permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.
view(0, indoutP.
size()), valout.
view(0, valout.
size()));
620 permQTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutQ.
view(0, indoutQ.
size()), valout.
view(0, valout.
size()));
624 permPTmatrix->fillComplete();
625 permQTmatrix->fillComplete();
629 for (
size_t row = 0; row < permPTmatrix->getLocalNumRows(); row++) {
630 if (permPTmatrix->getNumEntriesInLocalRow(row) != 1)
631 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
632 if (permPmatrix->getNumEntriesInLocalRow(row) != 1)
633 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
634 if (permQTmatrix->getNumEntriesInLocalRow(row) != 1)
635 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
653 permPApermQt->getLocalDiagCopy(*diagVec);
656 for (
size_t i = 0; i < diagVec->getMap()->getLocalNumElements(); ++i) {
657 if (diagVecData[i] != SC_ZERO)
658 invDiagVecData[i] = SC_ONE / diagVecData[i];
660 invDiagVecData[i] = SC_ONE;
661 GetOStream(
Statistics0) <<
"MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
665 for (
size_t row = 0; row < A->getLocalNumRows(); row++) {
668 diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.
view(0, valout.
size()));
671 diagScalingOp->fillComplete();
674 currentLevel.
Set(
"A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory );
676 currentLevel.
Set(
"permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory );
677 currentLevel.
Set(
"permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory );
678 currentLevel.
Set(
"permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory );
679 currentLevel.
Set(
"permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory );
684 permPmatrix->getLocalDiagCopy(*diagPVec);
689 for (
size_t i = 0; i < diagPVec->getMap()->getLocalNumElements(); ++i) {
690 if (diagPVecData[i] == SC_ZERO) {
691 lNumRowPermutations++;
697 MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
702 permQTmatrix->getLocalDiagCopy(*diagQTVec);
707 for (
size_t i = 0; i < diagQTVec->getMap()->getLocalNumElements(); ++i) {
708 if (diagQTVecData[i] == SC_ZERO) {
709 lNumColPermutations++;
715 MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
717 currentLevel.
Set(
"#RowPermutations", gNumRowPermutations, genFactory );
718 currentLevel.
Set(
"#ColPermutations", gNumColPermutations, genFactory );
719 currentLevel.
Set(
"#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory );
720 currentLevel.
Set(
"#WideRangeColPermutations", gWideRangeColPermutations, genFactory );
722 GetOStream(
Statistics0) <<
"#Row permutations/max possible permutations: " << gNumRowPermutations <<
"/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
723 GetOStream(
Statistics0) <<
"#Column permutations/max possible permutations: " << gNumColPermutations <<
"/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
724 GetOStream(
Runtime1) <<
"#wide range row permutations: " << gWideRangeRowPermutations <<
" #wide range column permutations: " << gWideRangeColPermutations << std::endl;
Important warning messages (one line)
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level ¤tLevel, const FactoryBase *genFactory) const
build permutation operators
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
One-liner description of what is happening.
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static magnitudeType magnitude(T a)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > ¶ms=null)
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
ArrayView< T > view(size_type lowerOffset, size_type size) const