8 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
15 #include <Xpetra_MultiVector.hpp>
16 #include <Xpetra_Matrix.hpp>
17 #include <Xpetra_CrsGraph.hpp>
18 #include <Xpetra_Vector.hpp>
19 #include <Xpetra_MultiVectorFactory.hpp>
20 #include <Xpetra_VectorFactory.hpp>
21 #include <Xpetra_CrsMatrixWrap.hpp>
22 #include <Xpetra_Export.hpp>
23 #include <Xpetra_ExportFactory.hpp>
24 #include <Xpetra_Import.hpp>
25 #include <Xpetra_ImportFactory.hpp>
26 #include <Xpetra_MatrixMatrix.hpp>
28 #include "MueLu_Utilities.hpp"
33 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
46 int numProcs = comm->getSize();
47 int myRank = comm->getRank();
49 size_t nDofsPerNode = 1;
50 if (A->IsView(
"stridedMaps")) {
52 nDofsPerNode = Teuchos::rcp_dynamic_cast<
const StridedMap>(permRowMapStrided)->getFixedBlockSize();
55 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
56 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
57 std::vector<MT> Weights;
61 for (
size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) {
64 if(permRowMap->isNodeGlobalElement(grow) ==
true)
continue;
66 size_t nnz = A->getNumEntriesInLocalRow(row);
71 A->getLocalRowView(row, indices, vals);
74 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
80 for (
size_t j = 0; j < Teuchos::as<size_t>(indices.
size()); j++) {
84 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
88 if(grow == gMaxValIdx)
89 keepDiagonalEntries.push_back(std::make_pair(grow,grow));
94 for (
size_t row = 0; row < permRowMap->getNodeNumElements(); row++) {
96 LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
97 size_t nnz = A->getNumEntriesInLocalRow(lArow);
102 A->getLocalRowView(lArow, indices, vals);
105 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
111 for (
size_t j = 0; j < Teuchos::as<size_t>(indices.
size()); j++) {
115 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
120 permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx));
121 Weights.push_back(maxVal/(norm1*Teuchos::as<MT>(nnz)));
123 std::cout <<
"ATTENTION: row " << grow <<
" has only zero entries -> singular matrix!" << std::endl;
129 std::vector<int> permutation;
140 gColVec->putScalar(SC_ZERO);
141 gDomVec->putScalar(SC_ZERO);
144 for (
typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
145 gColVec->sumIntoGlobalValue((*p).second,1.0);
149 gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
150 gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);
152 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered;
153 std::map<GlobalOrdinal, MT> gColId2Weight;
156 for(
size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
158 std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
162 LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
168 ddata[lcol] += SC_ONE;
170 permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol));
171 gColId2Weight[gcol] = Weights[permutation[i]];
175 gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
176 gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);
183 std::vector<GlobalOrdinal> multipleColRequests;
187 std::queue<GlobalOrdinal> unusedColIdx;
189 for(
size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
197 multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
199 unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
204 LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
214 if(globalMultColRequests > 0) {
220 std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
221 std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
222 numMyMultColRequests[myRank] = localMultColRequests;
224 numMyMultColRequests.data(),
225 numGlobalMultColRequests.data());
229 for (
int i=0; i<myRank-1; i++)
230 nMyOffset += numGlobalMultColRequests[i];
233 std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
234 std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
237 for(
size_t i = 0; i < multipleColRequests.size(); i++) {
238 procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i];
243 procMultRequestedColIds.data(),
244 global_procMultRequestedColIds.data());
247 for (
size_t k = 0; k<global_procMultRequestedColIds.size(); k++) {
250 std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
251 std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
253 if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
254 MyWeightForColId[myRank] = gColId2Weight[globColId];
256 MyWeightForColId[myRank] = MT_ZERO;
260 MyWeightForColId.data(),
261 GlobalWeightForColId.data());
263 if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
266 MT winnerValue = MT_ZERO;
267 int winnerProcRank = 0;
268 for (
int proc = 0; proc < numProcs; proc++) {
269 if(GlobalWeightForColId[proc] > winnerValue) {
270 winnerValue = GlobalWeightForColId[proc];
271 winnerProcRank = proc;
278 if(myRank != winnerProcRank) {
280 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
281 while(p != permutedDiagCandidatesFiltered.end() )
283 if((*p).second == globColId)
284 p = permutedDiagCandidatesFiltered.erase(p);
296 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
297 RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
298 RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
303 gColVec->putScalar(SC_ZERO);
304 gDomVec->putScalar(SC_ZERO);
305 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
306 while(pl != RowColPairs.end() )
311 gColVec->sumIntoGlobalValue(jk,1.0);
314 gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
315 for(
size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
317 if(arrDomVec[sz] > 1.0) {
318 GetOStream(
Runtime0) <<
"RowColPairs has multiple column [" << sz <<
"]=" << arrDomVec[sz] << std::endl;
319 }
else if(arrDomVec[sz] == SC_ZERO) {
320 GetOStream(
Runtime0) <<
"RowColPairs has empty column [" << sz <<
"]=" << arrDomVec[sz] << std::endl;
360 Pperm->putScalar(SC_ZERO);
361 Qperm->putScalar(SC_ZERO);
362 lQperm->putScalar(SC_ZERO);
375 RowIdStatus->putScalar(SC_ZERO);
376 ColIdStatus->putScalar(SC_ZERO);
377 lColIdStatus->putScalar(SC_ZERO);
378 ColIdUsed->putScalar(SC_ZERO);
389 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.
begin();
390 while(p != RowColPairs.end() )
398 if(RowIdStatusArray[lik] == SC_ZERO) {
399 RowIdStatusArray[lik] = SC_ONE;
400 lColIdStatusArray[ljk] = SC_ONE;
401 Pperm->replaceLocalValue(lik, ik);
402 lQperm->replaceLocalValue(ljk, ik);
403 ColIdUsed->replaceGlobalValue(ik,SC_ONE);
404 p = RowColPairs.erase(p);
407 if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) {
408 lWideRangeColPermutations++;
416 Qperm->doExport(*lQperm, *QpermExporter, Xpetra::ABSMAX);
417 ColIdStatus->doExport(*lColIdStatus, *QpermExporter, Xpetra::ABSMAX);
420 if(RowColPairs.size()>0) GetOStream(
Warnings0) <<
"MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl;
425 size_t cntFreeRowIdx = 0;
426 std::queue<GlobalOrdinal> qFreeGRowIdx;
427 for (
size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
428 if(RowIdStatusArray[lik] == SC_ZERO) {
430 qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
435 for (
size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
436 if(RowIdStatusArray[lik] == SC_ZERO) {
437 RowIdStatusArray[lik] = SC_ONE;
438 Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
440 if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) {
441 lWideRangeRowPermutations++;
448 size_t cntFreeColIdx = 0;
449 std::queue<GlobalOrdinal> qFreeGColIdx;
450 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
451 if(ColIdStatusArray[ljk] == SC_ZERO) {
453 qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
457 size_t cntUnusedColIdx = 0;
458 std::queue<GlobalOrdinal> qUnusedGColIdx;
459 for (
size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
460 if(ColIdUsedArray[ljk] == SC_ZERO) {
462 qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
467 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
469 if(cntUnusedColIdx == 0)
break;
471 if(ColIdStatusArray[ljk] == SC_ZERO) {
472 ColIdStatusArray[ljk] = SC_ONE;
473 Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front());
474 ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),SC_ONE);
476 if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
477 lWideRangeColPermutations++;
479 qUnusedGColIdx.pop();
491 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
492 if(ColIdStatusArray[ljk] == SC_ZERO) {
499 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
501 std::cout <<
"global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
505 if(global_cntFreeColIdx > 0) {
510 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
512 std::cout <<
"global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
516 std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs);
517 std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
518 local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
520 local_UnusedColIdxOnProc.data(),
521 global_UnusedColIdxOnProc.data());
524 std::cout <<
"PROC " << myRank <<
" global num unused indices per proc: ";
525 for (
size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
526 std::cout <<
" " << global_UnusedColIdxOnProc[ljk];
528 std::cout << std::endl;
532 std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
533 std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
535 for(
int proc=0; proc<myRank; proc++) {
536 global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
538 for(
GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) {
539 local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
540 qUnusedGColIdx.pop();
543 local_UnusedColIdxVector.data(),
544 global_UnusedColIdxVector.data());
546 std::cout <<
"PROC " << myRank <<
" global UnusedGColIdx: ";
547 for (
size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
548 std::cout <<
" " << global_UnusedColIdxVector[ljk];
550 std::cout << std::endl;
557 std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs);
558 std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
559 local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
561 local_EmptyColIdxOnProc.data(),
562 global_EmptyColIdxOnProc.data());
565 std::cout <<
"PROC " << myRank <<
" global num of needed column indices: ";
566 for (
size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
567 std::cout <<
" " << global_EmptyColIdxOnProc[ljk];
569 std::cout << std::endl;
575 for(
int proc=0; proc<myRank; proc++) {
576 global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
580 GetOStream(
Statistics0) <<
"PROC " << myRank <<
" is allowd to use the following column gids: ";
581 for(
GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
582 GetOStream(
Statistics0) << global_UnusedColIdxVector[k] <<
" ";
589 for (
size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
591 if(ColIdStatusArray[ljk] == SC_ZERO) {
592 ColIdStatusArray[ljk] = SC_ONE;
593 Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
594 ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],SC_ONE);
596 if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
597 lWideRangeColPermutations++;
611 for(
size_t row=0; row<A->getNodeNumRows(); row++) {
618 permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.
view(0,indoutP.
size()), valout.
view(0,valout.
size()));
619 permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.
view(0,indoutQ.
size()), valout.
view(0,valout.
size()));
622 permPTmatrix->fillComplete();
623 permQTmatrix->fillComplete();
627 for(
size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) {
628 if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
629 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
630 if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
631 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
632 if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
633 GetOStream(
Warnings0) <<
"#entries in row " << row <<
" of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
637 Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A,
false, *permQTmatrix,
false, GetOStream(
Statistics2));
638 Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix,
false, *ApermQt,
false, GetOStream(
Statistics2));
652 permPApermQt->getLocalDiagCopy(*diagVec);
653 for(
size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) {
654 if(diagVecData[i] != SC_ZERO)
655 invDiagVecData[i] = SC_ONE / diagVecData[i];
657 invDiagVecData[i] = SC_ONE;
658 GetOStream(
Statistics0) <<
"MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
664 for(
size_t row=0; row<A->getNodeNumRows(); row++) {
667 diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.
view(0,valout.
size()));
669 diagScalingOp->fillComplete();
671 Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp,
false, *permPApermQt,
false, GetOStream(
Statistics2));
672 currentLevel.
Set(
"A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory);
674 currentLevel.
Set(
"permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory);
675 currentLevel.
Set(
"permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory);
676 currentLevel.
Set(
"permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory);
677 currentLevel.
Set(
"permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory);
682 permPmatrix->getLocalDiagCopy(*diagPVec);
686 for(
size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) {
687 if(diagPVecData[i] == SC_ZERO) {
688 lNumRowPermutations++;
693 MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
698 permQTmatrix->getLocalDiagCopy(*diagQTVec);
702 for(
size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) {
703 if(diagQTVecData[i] == SC_ZERO) {
704 lNumColPermutations++;
709 MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
711 currentLevel.
Set(
"#RowPermutations", gNumRowPermutations, genFactory);
712 currentLevel.
Set(
"#ColPermutations", gNumColPermutations, genFactory);
713 currentLevel.
Set(
"#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory);
714 currentLevel.
Set(
"#WideRangeColPermutations", gWideRangeColPermutations, genFactory);
716 GetOStream(
Statistics0) <<
"#Row permutations/max possible permutations: " << gNumRowPermutations <<
"/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
717 GetOStream(
Statistics0) <<
"#Column permutations/max possible permutations: " << gNumColPermutations <<
"/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
718 GetOStream(
Runtime1) <<
"#wide range row permutations: " << gWideRangeRowPermutations <<
" #wide range column permutations: " << gWideRangeColPermutations << std::endl;
Important warning messages (one line)
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level ¤tLevel, const FactoryBase *genFactory) const
build permutation operators
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
One-liner description of what is happening.
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static magnitudeType magnitude(T a)
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
ArrayView< T > view(size_type lowerOffset, size_type size) const