MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 /*
11  * MueLu_AlgebraicPermutationStrategy_def.hpp
12  *
13  * Created on: Feb 20, 2013
14  * Author: tobias
15  */
16 
17 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
18 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
19 
20 #include <queue>
21 
22 #include <Teuchos_ScalarTraits.hpp>
23 
24 #include <Xpetra_MultiVector.hpp>
25 #include <Xpetra_Matrix.hpp>
26 #include <Xpetra_CrsGraph.hpp>
27 #include <Xpetra_Vector.hpp>
28 #include <Xpetra_MultiVectorFactory.hpp>
29 #include <Xpetra_VectorFactory.hpp>
30 #include <Xpetra_CrsMatrixWrap.hpp>
31 #include <Xpetra_Export.hpp>
32 #include <Xpetra_ExportFactory.hpp>
33 #include <Xpetra_Import.hpp>
34 #include <Xpetra_ImportFactory.hpp>
35 #include <Xpetra_MatrixMatrix.hpp>
36 
37 #include "MueLu_Utilities.hpp"
39 
40 namespace MueLu {
41 
42 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
45  Level& currentLevel, const FactoryBase* genFactory) const {
48 
50  const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
51  const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
52 
53  const Teuchos::RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
54  int numProcs = comm->getSize();
55  int myRank = comm->getRank();
56 
57  size_t nDofsPerNode = 1;
58  if (A->IsView("stridedMaps")) {
59  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
60  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
61  }
62 
63  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
64  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
65  std::vector<MT> Weights;
66 
67  // loop over all local rows in matrix A and keep diagonal entries if corresponding
68  // matrix rows are not contained in permRowMap
69  for (size_t row = 0; row < A->getRowMap()->getLocalNumElements(); row++) {
70  GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
71 
72  if (permRowMap->isNodeGlobalElement(grow) == true) continue;
73 
74  size_t nnz = A->getNumEntriesInLocalRow(row);
75 
76  // extract local row information from matrix
79  A->getLocalRowView(row, indices, vals);
80 
81  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
82  "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
83 
84  // find column entry with max absolute value
85  GlobalOrdinal gMaxValIdx = 0;
86  MT norm1 = MT_ZERO;
87  MT maxVal = MT_ZERO;
88  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
90  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
92  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
93  }
94  }
95 
96  if (grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
97  keepDiagonalEntries.push_back(std::make_pair(grow, grow));
98  }
99 
101  // handle rows that are marked to be relevant for permutations
102  for (size_t row = 0; row < permRowMap->getLocalNumElements(); row++) {
103  GlobalOrdinal grow = permRowMap->getGlobalElement(row);
104  LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
105  size_t nnz = A->getNumEntriesInLocalRow(lArow);
106 
107  // extract local row information from matrix
110  A->getLocalRowView(lArow, indices, vals);
111 
112  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
113  "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
114 
115  // find column entry with max absolute value
116  GlobalOrdinal gMaxValIdx = 0;
117  MT norm1 = MT_ZERO;
118  MT maxVal = MT_ZERO;
119  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
121  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
123  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
124  }
125  }
126 
127  if (maxVal > Teuchos::ScalarTraits<MT>::zero()) { // keep only max Entries \neq 0.0
128  permutedDiagCandidates.push_back(std::make_pair(grow, gMaxValIdx));
129  Weights.push_back(maxVal / (norm1 * Teuchos::as<MT>(nnz)));
130  } else {
131  std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
132  }
133  }
134 
135  // sort Weights in descending order
136  std::vector<int> permutation;
137  sortingPermutation(Weights, permutation);
138 
139  // create new vector with exactly one possible entry for each column
140 
141  // each processor which requests the global column id gcid adds 1 to gColVec
142  // gColVec will be summed up over all processors and communicated to gDomVec
143  // which is based on the non-overlapping domain map of A.
144 
145  Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
146  Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
147  gColVec->putScalar(SC_ZERO);
148  gDomVec->putScalar(SC_ZERO);
149 
150  // put in all keep diagonal entries
151  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
152  gColVec->sumIntoGlobalValue((*p).second, 1.0);
153  }
154 
155  Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
156  gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD); // communicate blocked gcolids to all procs
157  gColVec->doImport(*gDomVec, *exporter, Xpetra::INSERT);
158 
159  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
160  std::map<GlobalOrdinal, MT> gColId2Weight;
161 
162  {
163  Teuchos::ArrayRCP<Scalar> ddata = gColVec->getDataNonConst(0);
164  for (size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
165  // loop over all candidates
166  std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
167  GlobalOrdinal grow = pp.first;
168  GlobalOrdinal gcol = pp.second;
169 
170  LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
171  if (Teuchos::ScalarTraits<Scalar>::real(ddata[lcol]) > MT_ZERO) {
172  continue; // skip lcol: column already handled by another row
173  }
174 
175  // mark column as already taken
176  ddata[lcol] += SC_ONE;
177 
178  permutedDiagCandidatesFiltered.push_back(std::make_pair(grow, gcol));
179  gColId2Weight[gcol] = Weights[permutation[i]];
180  }
181  }
182 
183  // communicate how often each column index is requested by the different procs
184  gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD);
185  gColVec->doImport(*gDomVec, *exporter, Xpetra::INSERT); // probably not needed // TODO check me
186 
187  //*****************************************************************************************
188  // first communicate ALL global ids of column indices which are requested by more
189  // than one proc to all other procs
190  // detect which global col indices are requested by more than one proc
191  // and store them in the multipleColRequests vector
192  std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
193  // requested by another processor. This is possible, since they are stored
194  // in gDomVec which is based on the nonoverlapping domain map. That is, each
195  // global col id is handled by exactly one proc.
196  std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
197 
198  for (size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
199  Teuchos::ArrayRCP<const Scalar> arrDomVec = gDomVec->getData(0);
200  //
201  // FIXME (mfh 30 Oct 2015) I _think_ it's OK to check just the
202  // real part, because this is a count. (Shouldn't MueLu use
203  // mag_type instead of Scalar here to save space?)
204  //
205  if (Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) > MT_ONE) {
206  multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
207  } else if (Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) == MT_ZERO) {
208  unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
209  }
210  }
211 
212  // communicate the global number of column indices which are requested by more than one proc
213  LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
214  LocalOrdinal globalMultColRequests = 0;
215 
216  // sum up all entries in multipleColRequests over all processors
217  //
218  // FIXME (lbv 11 Feb 2019) why cast localMultColRequests as LocalOrdinal
219  // when it was properly declared as such just a line above?
220  //
221  MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
222 
223  if (globalMultColRequests > 0) {
224  // special handling: two processors request the same global column id.
225  // decide which processor gets it
226 
227  // distribute number of multipleColRequests to all processors
228  // each processor stores how many column ids for exchange are handled by the cur proc
229  std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
230  std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
231  numMyMultColRequests[myRank] = localMultColRequests;
232  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
233  numMyMultColRequests.data(),
234  numGlobalMultColRequests.data());
235 
236  // communicate multipleColRequests entries to all processors
237  int nMyOffset = 0;
238  for (int i = 0; i < myRank - 1; i++)
239  nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
240 
241  const GlobalOrdinal zero = 0;
242  std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
243  std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
244 
245  // loop over all local column GIDs that are also requested by other procs
246  for (size_t i = 0; i < multipleColRequests.size(); i++) {
247  procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
248  }
249 
250  // template ordinal, package (double)
251  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests),
252  procMultRequestedColIds.data(),
253  global_procMultRequestedColIds.data());
254 
255  // loop over global_procOverlappingWeights and eliminate wrong entries...
256  for (size_t k = 0; k < global_procMultRequestedColIds.size(); k++) {
257  GlobalOrdinal globColId = global_procMultRequestedColIds[k];
258 
259  std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
260  std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
261 
262  if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
263  MyWeightForColId[myRank] = gColId2Weight[globColId];
264  } else {
265  MyWeightForColId[myRank] = MT_ZERO;
266  }
267 
268  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
269  MyWeightForColId.data(),
270  GlobalWeightForColId.data());
271 
272  if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
273  // note: 2 procs could have the same weight for a column index.
274  // pick the first one.
275  MT winnerValue = MT_ZERO;
276  int winnerProcRank = 0;
277  for (int proc = 0; proc < numProcs; proc++) {
278  if (GlobalWeightForColId[proc] > winnerValue) {
279  winnerValue = GlobalWeightForColId[proc];
280  winnerProcRank = proc;
281  }
282  }
283 
284  // winnerProcRank is the winner for handling globColId.
285  // winnerProcRank is unique (even if two procs have the same weight for a column index)
286 
287  if (myRank != winnerProcRank) {
288  // remove corresponding entry from permutedDiagCandidatesFiltered
289  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
290  while (p != permutedDiagCandidatesFiltered.end()) {
291  if ((*p).second == globColId)
292  p = permutedDiagCandidatesFiltered.erase(p);
293  else
294  p++;
295  }
296  }
297 
298  } // end if isNodeGlobalElement
299  } // end loop over global_procOverlappingWeights and eliminate wrong entries...
300  } // end if globalMultColRequests > 0
301 
302  // put together all pairs:
303  // size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
304  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
305  RowColPairs.insert(RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
306  RowColPairs.insert(RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
307 
308 #ifdef DEBUG_OUTPUT
309  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
310  // plausibility check
311  gColVec->putScalar(SC_ZERO);
312  gDomVec->putScalar(SC_ZERO);
313  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
314  while (pl != RowColPairs.end()) {
315  // GlobalOrdinal ik = (*pl).first;
316  GlobalOrdinal jk = (*pl).second;
317 
318  gColVec->sumIntoGlobalValue(jk, 1.0);
319  pl++;
320  }
321  gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD);
322  for (size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
323  Teuchos::ArrayRCP<const Scalar> arrDomVec = gDomVec->getData(0);
324  if (arrDomVec[sz] > 1.0) {
325  GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
326  } else if (arrDomVec[sz] == SC_ZERO) {
327  GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
328  }
329  }
330  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
331 #endif
332 
334  // assumption: on each processor RowColPairs now contains
335  // a valid set of (row,column) pairs, where the row entries
336  // are a subset of the processor's rows and the column entries
337  // are unique throughout all processors.
338  // Note: the RowColPairs are only defined for a subset of all rows,
339  // so there might be rows without an entry in RowColPairs.
340  // It can be, that some rows seem to be missing in RowColPairs, since
341  // the entry in that row with maximum absolute value has been reserved
342  // by another row already (e.g. as already diagonal dominant row outside
343  // of perRowMap).
344  // In fact, the RowColPairs vector only defines the (row,column) pairs
345  // that will be definitely moved to the diagonal after permutation.
346 
347 #ifdef DEBUG_OUTPUT
348  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
349  // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
350  // }
351  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
352  // {
354  // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
355  // }
356  // std::cout << "\n";
357 #endif
358 
359  // vectors to store permutation information
360  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
361  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
362  Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
363 
364  Pperm->putScalar(SC_ZERO);
365  Qperm->putScalar(SC_ZERO);
366  lQperm->putScalar(SC_ZERO);
367 
368  // setup exporter for Qperm
369  Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
370 
371  Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
372  Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
373  Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
374  Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
375  RowIdStatus->putScalar(SC_ZERO);
376  ColIdStatus->putScalar(SC_ZERO);
377  lColIdStatus->putScalar(SC_ZERO);
378  ColIdUsed->putScalar(SC_ZERO); // no column ids are used
379 
380  // count wide-range permutations
381  // a wide-range permutation is defined as a permutation of rows/columns which do not
382  // belong to the same node
383  LocalOrdinal lWideRangeRowPermutations = 0;
384  GlobalOrdinal gWideRangeRowPermutations = 0;
385  LocalOrdinal lWideRangeColPermutations = 0;
386  GlobalOrdinal gWideRangeColPermutations = 0;
387 
388  // run 1: mark all "identity" permutations
389  {
390  Teuchos::ArrayRCP<Scalar> RowIdStatusArray = RowIdStatus->getDataNonConst(0);
391  Teuchos::ArrayRCP<Scalar> lColIdStatusArray = lColIdStatus->getDataNonConst(0);
392 
393  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
394  while (p != RowColPairs.end()) {
395  GlobalOrdinal ik = (*p).first;
396  GlobalOrdinal jk = (*p).second;
397 
398  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
399  LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
400 
401  if (RowIdStatusArray[lik] == SC_ZERO) {
402  RowIdStatusArray[lik] = SC_ONE; // use this row id
403  lColIdStatusArray[ljk] = SC_ONE; // use this column id
404  Pperm->replaceLocalValue(lik, ik);
405  lQperm->replaceLocalValue(ljk, ik); // use column map
406  ColIdUsed->replaceGlobalValue(ik, SC_ONE); // ik is now used
407  p = RowColPairs.erase(p);
408 
409  // detect wide range permutations
410  if (floor(ik / nDofsPerNode) != floor(jk / nDofsPerNode)) {
411  lWideRangeColPermutations++;
412  }
413  } else
414  p++;
415  }
416 
417  // communicate column map -> domain map
418  Qperm->doExport(*lQperm, *QpermExporter, Xpetra::ABSMAX);
419  ColIdStatus->doExport(*lColIdStatus, *QpermExporter, Xpetra::ABSMAX);
420 
421  // plausibility check
422  if (RowColPairs.size() > 0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
423 
424  // close Pperm
425 
426  // count, how many row permutations are missing on current proc
427  size_t cntFreeRowIdx = 0;
428  std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
429  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
430  if (RowIdStatusArray[lik] == SC_ZERO) {
431  cntFreeRowIdx++;
432  qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
433  }
434  }
435 
436  // fix Pperm
437  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
438  if (RowIdStatusArray[lik] == SC_ZERO) {
439  RowIdStatusArray[lik] = SC_ONE; // use this row id
440  Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
441  // detect wide range permutations
442  if (floor(qFreeGRowIdx.front() / nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik) / nDofsPerNode)) {
443  lWideRangeRowPermutations++;
444  }
445  qFreeGRowIdx.pop();
446  }
447  }
448  }
449 
450  size_t cntUnusedColIdx = 0;
451  std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
452  size_t cntFreeColIdx = 0;
453  std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
454  {
455  Teuchos::ArrayRCP<Scalar> ColIdStatusArray = ColIdStatus->getDataNonConst(0);
456  Teuchos::ArrayRCP<Scalar> ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
457 
458  // close Qperm (free permutation entries in Qperm)
459  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
460  if (ColIdStatusArray[ljk] == SC_ZERO) {
461  cntFreeColIdx++;
462  qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
463  }
464  }
465 
466  for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
467  if (ColIdUsedArray[ljk] == SC_ZERO) {
468  cntUnusedColIdx++;
469  qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
470  }
471  }
472 
473  // fix Qperm with local entries
474  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
475  // stop if no (local) unused column idx are left
476  if (cntUnusedColIdx == 0) break;
477 
478  if (ColIdStatusArray[ljk] == SC_ZERO) {
479  ColIdStatusArray[ljk] = SC_ONE; // use this row id
480  Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
481  ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(), SC_ONE); // ljk is now used, too
482  // detect wide range permutations
483  if (floor(qUnusedGColIdx.front() / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
484  lWideRangeColPermutations++;
485  }
486  qUnusedGColIdx.pop();
487  cntUnusedColIdx--;
488  cntFreeColIdx--;
489  }
490  }
491 
492  // Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
493  // ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
494 
495  // count, how many unused column idx are needed on current processor
496  // to complete Qperm
497  cntFreeColIdx = 0;
498  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
499  if (ColIdStatusArray[ljk] == SC_ZERO) {
500  cntFreeColIdx++;
501  }
502  }
503  }
504 
505  GlobalOrdinal global_cntFreeColIdx = 0;
506  LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
507  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
508 #ifdef DEBUG_OUTPUT
509  std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
510 #endif
511 
512  // avoid global communication if possible
513  if (global_cntFreeColIdx > 0) {
514  // 1) count how many unused column ids are left
515  GlobalOrdinal global_cntUnusedColIdx = 0;
516  LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
517  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
518 #ifdef DEBUG_OUTPUT
519  std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
520 #endif
521 
522  // 2) communicate how many unused column ids are available on procs
523  std::vector<LocalOrdinal> local_UnusedColIdxOnProc(numProcs);
524  std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
525  local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
526  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
527  local_UnusedColIdxOnProc.data(),
528  global_UnusedColIdxOnProc.data());
529 
530 #ifdef DEBUG_OUTPUT
531  std::cout << "PROC " << myRank << " global num unused indices per proc: ";
532  for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
533  std::cout << " " << global_UnusedColIdxOnProc[ljk];
534  }
535  std::cout << std::endl;
536 #endif
537 
538  // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
539  std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
540  std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
541  GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
542  for (int proc = 0; proc < myRank; proc++) {
543  global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
544  }
545  for (GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter + local_cntUnusedColIdx; k++) {
546  local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
547  qUnusedGColIdx.pop();
548  }
549  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx),
550  local_UnusedColIdxVector.data(),
551  global_UnusedColIdxVector.data());
552 #ifdef DEBUG_OUTPUT
553  std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
554  for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
555  std::cout << " " << global_UnusedColIdxVector[ljk];
556  }
557  std::cout << std::endl;
558 #endif
559 
560  // 4) communicate, how many column idx are needed on each processor
561  // to complete Qperm
562  std::vector<LocalOrdinal> local_EmptyColIdxOnProc(numProcs);
563  std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
564  local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
565  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
566  local_EmptyColIdxOnProc.data(),
567  global_EmptyColIdxOnProc.data());
568 
569 #ifdef DEBUG_OUTPUT
570  std::cout << "PROC " << myRank << " global num of needed column indices: ";
571  for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
572  std::cout << " " << global_EmptyColIdxOnProc[ljk];
573  }
574  std::cout << std::endl;
575 #endif
576 
577  // 5) determine first index in global_UnusedColIdxVector for unused column indices,
578  // that are marked to be used by this processor
579  GlobalOrdinal global_UnusedColStartIdx = 0;
580  for (int proc = 0; proc < myRank; proc++) {
581  global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
582  }
583 
584 #ifdef DEBUG_OUTPUT
585  GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
586  for (GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
587  GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
588  }
589  GetOStream(Statistics0) << std::endl;
590 #endif
591 
592  // 6.) fix Qperm with global entries
593  {
594  Teuchos::ArrayRCP<Scalar> ColIdStatusArray = ColIdStatus->getDataNonConst(0);
595  GlobalOrdinal array_iter = 0;
596  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
597  if (ColIdStatusArray[ljk] == SC_ZERO) {
598  ColIdStatusArray[ljk] = SC_ONE; // use this row id
599  Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
600  ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter], SC_ONE);
601  // detect wide range permutations
602  if (floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter] / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
603  lWideRangeColPermutations++;
604  }
605  array_iter++;
606  // cntUnusedColIdx--; // check me
607  }
608  }
609  }
610  } // end if global_cntFreeColIdx > 0
612 
613  // create new empty Matrix
614  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
615  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
616 
617  {
618  Teuchos::ArrayRCP<Scalar> PpermData = Pperm->getDataNonConst(0);
619  Teuchos::ArrayRCP<Scalar> QpermData = Qperm->getDataNonConst(0);
620 
621  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
622  // FIXME (mfh 30 Oct 2015): Teuchos::as doesn't know how to
623  // convert from complex Scalar to GO, so we have to take the real
624  // part first. I think that's the right thing to do in this case.
625  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
626  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
627  Teuchos::ArrayRCP<Scalar> valout(1, SC_ONE);
628  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0, indoutP.size()), valout.view(0, valout.size()));
629  permQTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutQ.view(0, indoutQ.size()), valout.view(0, valout.size()));
630  }
631  }
632 
633  permPTmatrix->fillComplete();
634  permQTmatrix->fillComplete();
635 
636  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
637 
638  for (size_t row = 0; row < permPTmatrix->getLocalNumRows(); row++) {
639  if (permPTmatrix->getNumEntriesInLocalRow(row) != 1)
640  GetOStream(Warnings0) << "#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
641  if (permPmatrix->getNumEntriesInLocalRow(row) != 1)
642  GetOStream(Warnings0) << "#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
643  if (permQTmatrix->getNumEntriesInLocalRow(row) != 1)
644  GetOStream(Warnings0) << "#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
645  }
646 
647  // build permP * A * permQT
649  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
650 
651  /*
652  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
653  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
654  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
655  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
656  */
657  // build scaling matrix
658  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
659  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
660  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(), 1));
661  {
662  permPApermQt->getLocalDiagCopy(*diagVec);
663  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
664  Teuchos::ArrayRCP<Scalar> invDiagVecData = invDiagVec->getDataNonConst(0);
665  for (size_t i = 0; i < diagVec->getMap()->getLocalNumElements(); ++i) {
666  if (diagVecData[i] != SC_ZERO)
667  invDiagVecData[i] = SC_ONE / diagVecData[i];
668  else {
669  invDiagVecData[i] = SC_ONE;
670  GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
671  }
672  }
673 
674  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
675  Teuchos::ArrayRCP<GlobalOrdinal> indout(1, permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
676  Teuchos::ArrayRCP<Scalar> valout(1, invDiagVecData[row]);
677  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
678  }
679  }
680  diagScalingOp->fillComplete();
681 
682  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
683  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory /*this*/);
684 
685  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory /*this*/); // TODO careful with this!!!
686  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory /*this*/);
687  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory /*this*/);
688  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory /*this*/);
689 
691  // count zeros on diagonal in P -> number of row permutations
692  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(), true);
693  permPmatrix->getLocalDiagCopy(*diagPVec);
694  LocalOrdinal lNumRowPermutations = 0;
695  GlobalOrdinal gNumRowPermutations = 0;
696  {
697  Teuchos::ArrayRCP<const Scalar> diagPVecData = diagPVec->getData(0);
698  for (size_t i = 0; i < diagPVec->getMap()->getLocalNumElements(); ++i) {
699  if (diagPVecData[i] == SC_ZERO) {
700  lNumRowPermutations++;
701  }
702  }
703  }
704 
705  // sum up all entries in multipleColRequests over all processors
706  MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
707 
709  // count zeros on diagonal in Q^T -> number of column permutations
710  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(), true);
711  permQTmatrix->getLocalDiagCopy(*diagQTVec);
712  LocalOrdinal lNumColPermutations = 0;
713  GlobalOrdinal gNumColPermutations = 0;
714  {
715  Teuchos::ArrayRCP<const Scalar> diagQTVecData = diagQTVec->getData(0);
716  for (size_t i = 0; i < diagQTVec->getMap()->getLocalNumElements(); ++i) {
717  if (diagQTVecData[i] == SC_ZERO) {
718  lNumColPermutations++;
719  }
720  }
721  }
722 
723  // sum up all entries in multipleColRequests over all processors
724  MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
725 
726  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory /*this*/);
727  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory /*this*/);
728  currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory /*this*/);
729  currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory /*this*/);
730 
731  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
732  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
733  GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
734 }
735 
736 } // namespace MueLu
737 
738 #endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
size_type size() const
One-liner description of what is happening.
size_type size() const
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static magnitudeType magnitude(T a)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
iterator begin() const
ArrayView< T > view(size_type lowerOffset, size_type size) const