17 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
18 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
28 #include <Xpetra_MultiVectorFactory.hpp>
30 #include <Xpetra_CrsMatrixWrap.hpp>
37 #include "MueLu_Utilities.hpp"
42 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
54 int numProcs = comm->getSize();
55 int myRank = comm->getRank();
57 size_t nDofsPerNode = 1;
58 if (A->IsView(
"stridedMaps")) {
60 nDofsPerNode = Teuchos::rcp_dynamic_cast<
const StridedMap>(permRowMapStrided)->getFixedBlockSize();
63 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
64 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
65 std::vector<MT> Weights;
69 for (
size_t row = 0; row < A->getRowMap()->getLocalNumElements(); row++) {
72 if (permRowMap->isNodeGlobalElement(grow) ==
true)
continue;
74 size_t nnz = A->getNumEntriesInLocalRow(row);
79 A->getLocalRowView(row, indices, vals);
82 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
88 for (
size_t j = 0; j < Teuchos::as<size_t>(indices.
size()); j++) {
92 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
96 if (grow == gMaxValIdx)
97 keepDiagonalEntries.push_back(std::make_pair(grow, grow));
102 for (
size_t row = 0; row < permRowMap->getLocalNumElements(); row++) {
104 LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
105 size_t nnz = A->getNumEntriesInLocalRow(lArow);
110 A->getLocalRowView(lArow, indices, vals);
113 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
119 for (
size_t j = 0; j < Teuchos::as<size_t>(indices.
size()); j++) {
123 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
128 permutedDiagCandidates.push_back(std::make_pair(grow, gMaxValIdx));
129 Weights.push_back(maxVal / (norm1 * Teuchos::as<MT>(nnz)));
131 std::cout <<
"ATTENTION: row " << grow <<
" has only zero entries -> singular matrix!" << std::endl;
136 std::vector<int> permutation;
147 gColVec->putScalar(SC_ZERO);
148 gDomVec->putScalar(SC_ZERO);
151 for (
typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
152 gColVec->sumIntoGlobalValue((*p).second, 1.0);
156 gDomVec->doExport(*gColVec, *exporter,
Xpetra::ADD);
159 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered;
160 std::map<GlobalOrdinal, MT> gColId2Weight;
164 for (
size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
166 std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
170 LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
176 ddata[lcol] += SC_ONE;
178 permutedDiagCandidatesFiltered.push_back(std::make_pair(grow, gcol));
179 gColId2Weight[gcol] = Weights[permutation[i]];
184 gDomVec->doExport(*gColVec, *exporter,
Xpetra::ADD);
192 std::vector<GlobalOrdinal> multipleColRequests;
196 std::queue<GlobalOrdinal> unusedColIdx;
198 for (
size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
206 multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
208 unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
213 LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
223 if (globalMultColRequests > 0) {
229 std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
230 std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
231 numMyMultColRequests[myRank] = localMultColRequests;
233 numMyMultColRequests.data(),
234 numGlobalMultColRequests.data());
238 for (
int i = 0; i < myRank - 1; i++)
239 nMyOffset += numGlobalMultColRequests[i];
242 std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
243 std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
246 for (
size_t i = 0; i < multipleColRequests.size(); i++) {
247 procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i];
252 procMultRequestedColIds.data(),
253 global_procMultRequestedColIds.data());
256 for (
size_t k = 0; k < global_procMultRequestedColIds.size(); k++) {
259 std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
260 std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
262 if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
263 MyWeightForColId[myRank] = gColId2Weight[globColId];
265 MyWeightForColId[myRank] = MT_ZERO;
269 MyWeightForColId.data(),
270 GlobalWeightForColId.data());
272 if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
275 MT winnerValue = MT_ZERO;
276 int winnerProcRank = 0;
277 for (
int proc = 0; proc < numProcs; proc++) {
278 if (GlobalWeightForColId[proc] > winnerValue) {
279 winnerValue = GlobalWeightForColId[proc];
280 winnerProcRank = proc;
287 if (myRank != winnerProcRank) {
289 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
290 while (p != permutedDiagCandidatesFiltered.end()) {
291 if ((*p).second == globColId)
292 p = permutedDiagCandidatesFiltered.erase(p);
304 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
305 RowColPairs.insert(RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
306 RowColPairs.insert(RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
311 gColVec->putScalar(SC_ZERO);
312 gDomVec->putScalar(SC_ZERO);
313 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
314 while (pl != RowColPairs.end()) {
318 gColVec->sumIntoGlobalValue(jk, 1.0);
321 gDomVec->doExport(*gColVec, *exporter,
Xpetra::ADD);
322 for (
size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
324 if (arrDomVec[sz] > 1.0) {
325 GetOStream(
Runtime0) <<
"RowColPairs has multiple column [" << sz <<
"]=" << arrDomVec[sz] << std::endl;
326 }
else if (arrDomVec[sz] == SC_ZERO) {
327 GetOStream(
Runtime0) <<
"RowColPairs has empty column [" << sz <<
"]=" << arrDomVec[sz] << std::endl;
364 Pperm->putScalar(SC_ZERO);
365 Qperm->putScalar(SC_ZERO);
366 lQperm->putScalar(SC_ZERO);
375 RowIdStatus->putScalar(SC_ZERO);
376 ColIdStatus->putScalar(SC_ZERO);
377 lColIdStatus->putScalar(SC_ZERO);
378 ColIdUsed->putScalar(SC_ZERO);
393 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.
begin();
394 while (p != RowColPairs.end()) {
401 if (RowIdStatusArray[lik] == SC_ZERO) {
402 RowIdStatusArray[lik] = SC_ONE;
403 lColIdStatusArray[ljk] = SC_ONE;
404 Pperm->replaceLocalValue(lik, ik);
405 lQperm->replaceLocalValue(ljk, ik);
406 ColIdUsed->replaceGlobalValue(ik, SC_ONE);
407 p = RowColPairs.erase(p);
410 if (floor(ik / nDofsPerNode) != floor(jk / nDofsPerNode)) {
411 lWideRangeColPermutations++;
419 ColIdStatus->doExport(*lColIdStatus, *QpermExporter,
Xpetra::ABSMAX);
422 if (RowColPairs.size() > 0) GetOStream(
Warnings0) <<
"MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl;
427 size_t cntFreeRowIdx = 0;
428 std::queue<GlobalOrdinal> qFreeGRowIdx;
429 for (
size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
430 if (RowIdStatusArray[lik] == SC_ZERO) {
432 qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
437 for (
size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
438 if (RowIdStatusArray[lik] == SC_ZERO) {
439 RowIdStatusArray[lik] = SC_ONE;
440 Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
442 if (floor(qFreeGRowIdx.front() / nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik) / nDofsPerNode)) {
443 lWideRangeRowPermutations++;
450 size_t cntUnusedColIdx = 0;
451 std::queue<GlobalOrdinal> qUnusedGColIdx;
452 size_t cntFreeColIdx = 0;
453 std::queue<GlobalOrdinal> qFreeGColIdx;
459 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
460 if (ColIdStatusArray[ljk] == SC_ZERO) {
462 qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
466 for (
size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
467 if (ColIdUsedArray[ljk] == SC_ZERO) {
469 qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
474 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
476 if (cntUnusedColIdx == 0)
break;
478 if (ColIdStatusArray[ljk] == SC_ZERO) {
479 ColIdStatusArray[ljk] = SC_ONE;
480 Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front());
481 ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(), SC_ONE);
483 if (floor(qUnusedGColIdx.front() / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
484 lWideRangeColPermutations++;
486 qUnusedGColIdx.pop();
498 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
499 if (ColIdStatusArray[ljk] == SC_ZERO) {
507 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
509 std::cout <<
"global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
513 if (global_cntFreeColIdx > 0) {
517 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
519 std::cout <<
"global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
523 std::vector<LocalOrdinal> local_UnusedColIdxOnProc(numProcs);
524 std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
525 local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
527 local_UnusedColIdxOnProc.data(),
528 global_UnusedColIdxOnProc.data());
531 std::cout <<
"PROC " << myRank <<
" global num unused indices per proc: ";
532 for (
size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
533 std::cout <<
" " << global_UnusedColIdxOnProc[ljk];
535 std::cout << std::endl;
539 std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
540 std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
542 for (
int proc = 0; proc < myRank; proc++) {
543 global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
545 for (
GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter + local_cntUnusedColIdx; k++) {
546 local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
547 qUnusedGColIdx.pop();
550 local_UnusedColIdxVector.data(),
551 global_UnusedColIdxVector.data());
553 std::cout <<
"PROC " << myRank <<
" global UnusedGColIdx: ";
554 for (
size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
555 std::cout <<
" " << global_UnusedColIdxVector[ljk];
557 std::cout << std::endl;
562 std::vector<LocalOrdinal> local_EmptyColIdxOnProc(numProcs);
563 std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
564 local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
566 local_EmptyColIdxOnProc.data(),
567 global_EmptyColIdxOnProc.data());
570 std::cout <<
"PROC " << myRank <<
" global num of needed column indices: ";
571 for (
size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
572 std::cout <<
" " << global_EmptyColIdxOnProc[ljk];
574 std::cout << std::endl;
580 for (
int proc = 0; proc < myRank; proc++) {
581 global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
585 GetOStream(
Statistics0) <<
"PROC " << myRank <<
" is allowd to use the following column gids: ";
586 for (
GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
587 GetOStream(
Statistics0) << global_UnusedColIdxVector[k] <<
" ";
596 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
597 if (ColIdStatusArray[ljk] == SC_ZERO) {
598 ColIdStatusArray[ljk] = SC_ONE;
599 Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
600 ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter], SC_ONE);
602 if (floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter] / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
603 lWideRangeColPermutations++;
621 for (
size_t row = 0; row < A->getLocalNumRows(); row++) {
628 permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.
view(0, indoutP.
size()), valout.
view(0, valout.
size()));
629 permQTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutQ.
view(0, indoutQ.
size()), valout.
view(0, valout.
size()));
633 permPTmatrix->fillComplete();
634 permQTmatrix->fillComplete();
638 for (
size_t row = 0; row < permPTmatrix->getLocalNumRows(); row++) {
639 if (permPTmatrix->getNumEntriesInLocalRow(row) != 1)
640 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
641 if (permPmatrix->getNumEntriesInLocalRow(row) != 1)
642 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
643 if (permQTmatrix->getNumEntriesInLocalRow(row) != 1)
644 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
662 permPApermQt->getLocalDiagCopy(*diagVec);
665 for (
size_t i = 0; i < diagVec->getMap()->getLocalNumElements(); ++i) {
666 if (diagVecData[i] != SC_ZERO)
667 invDiagVecData[i] = SC_ONE / diagVecData[i];
669 invDiagVecData[i] = SC_ONE;
670 GetOStream(
Statistics0) <<
"MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
674 for (
size_t row = 0; row < A->getLocalNumRows(); row++) {
677 diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.
view(0, valout.
size()));
680 diagScalingOp->fillComplete();
683 currentLevel.
Set(
"A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory );
685 currentLevel.
Set(
"permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory );
686 currentLevel.
Set(
"permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory );
687 currentLevel.
Set(
"permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory );
688 currentLevel.
Set(
"permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory );
693 permPmatrix->getLocalDiagCopy(*diagPVec);
698 for (
size_t i = 0; i < diagPVec->getMap()->getLocalNumElements(); ++i) {
699 if (diagPVecData[i] == SC_ZERO) {
700 lNumRowPermutations++;
706 MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
711 permQTmatrix->getLocalDiagCopy(*diagQTVec);
716 for (
size_t i = 0; i < diagQTVec->getMap()->getLocalNumElements(); ++i) {
717 if (diagQTVecData[i] == SC_ZERO) {
718 lNumColPermutations++;
724 MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
726 currentLevel.
Set(
"#RowPermutations", gNumRowPermutations, genFactory );
727 currentLevel.
Set(
"#ColPermutations", gNumColPermutations, genFactory );
728 currentLevel.
Set(
"#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory );
729 currentLevel.
Set(
"#WideRangeColPermutations", gWideRangeColPermutations, genFactory );
731 GetOStream(
Statistics0) <<
"#Row permutations/max possible permutations: " << gNumRowPermutations <<
"/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
732 GetOStream(
Statistics0) <<
"#Column permutations/max possible permutations: " << gNumColPermutations <<
"/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
733 GetOStream(
Runtime1) <<
"#wide range row permutations: " << gWideRangeRowPermutations <<
" #wide range column permutations: " << gWideRangeColPermutations << std::endl;
Important warning messages (one line)
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level ¤tLevel, const FactoryBase *genFactory) const
build permutation operators
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
One-liner description of what is happening.
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static magnitudeType magnitude(T a)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > ¶ms=null)
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
ArrayView< T > view(size_type lowerOffset, size_type size) const