MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1 /*
2  * MueLu_AlgebraicPermutationStrategy_def.hpp
3  *
4  * Created on: Feb 20, 2013
5  * Author: tobias
6  */
7 
8 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
10 
11 #include <queue>
12 
13 #include <Teuchos_ScalarTraits.hpp>
14 
15 #include <Xpetra_MultiVector.hpp>
16 #include <Xpetra_Matrix.hpp>
17 #include <Xpetra_CrsGraph.hpp>
18 #include <Xpetra_Vector.hpp>
19 #include <Xpetra_MultiVectorFactory.hpp>
20 #include <Xpetra_VectorFactory.hpp>
21 #include <Xpetra_CrsMatrixWrap.hpp>
22 #include <Xpetra_Export.hpp>
23 #include <Xpetra_ExportFactory.hpp>
24 #include <Xpetra_Import.hpp>
25 #include <Xpetra_ImportFactory.hpp>
26 #include <Xpetra_MatrixMatrix.hpp>
27 
28 #include "MueLu_Utilities.hpp"
30 
31 namespace MueLu {
32 
33 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
36  Level& currentLevel, const FactoryBase* genFactory) const {
39 
41  const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
42  const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
43 
44  const Teuchos::RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
45  int numProcs = comm->getSize();
46  int myRank = comm->getRank();
47 
48  size_t nDofsPerNode = 1;
49  if (A->IsView("stridedMaps")) {
50  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
51  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
52  }
53 
54  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
55  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
56  std::vector<MT> Weights;
57 
58  // loop over all local rows in matrix A and keep diagonal entries if corresponding
59  // matrix rows are not contained in permRowMap
60  for (size_t row = 0; row < A->getRowMap()->getLocalNumElements(); row++) {
61  GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
62 
63  if (permRowMap->isNodeGlobalElement(grow) == true) continue;
64 
65  size_t nnz = A->getNumEntriesInLocalRow(row);
66 
67  // extract local row information from matrix
70  A->getLocalRowView(row, indices, vals);
71 
72  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
73  "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
74 
75  // find column entry with max absolute value
76  GlobalOrdinal gMaxValIdx = 0;
77  MT norm1 = MT_ZERO;
78  MT maxVal = MT_ZERO;
79  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
81  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
83  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
84  }
85  }
86 
87  if (grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
88  keepDiagonalEntries.push_back(std::make_pair(grow, grow));
89  }
90 
92  // handle rows that are marked to be relevant for permutations
93  for (size_t row = 0; row < permRowMap->getLocalNumElements(); row++) {
94  GlobalOrdinal grow = permRowMap->getGlobalElement(row);
95  LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
96  size_t nnz = A->getNumEntriesInLocalRow(lArow);
97 
98  // extract local row information from matrix
101  A->getLocalRowView(lArow, indices, vals);
102 
103  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
104  "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
105 
106  // find column entry with max absolute value
107  GlobalOrdinal gMaxValIdx = 0;
108  MT norm1 = MT_ZERO;
109  MT maxVal = MT_ZERO;
110  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
112  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
114  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
115  }
116  }
117 
118  if (maxVal > Teuchos::ScalarTraits<MT>::zero()) { // keep only max Entries \neq 0.0
119  permutedDiagCandidates.push_back(std::make_pair(grow, gMaxValIdx));
120  Weights.push_back(maxVal / (norm1 * Teuchos::as<MT>(nnz)));
121  } else {
122  std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
123  }
124  }
125 
126  // sort Weights in descending order
127  std::vector<int> permutation;
128  sortingPermutation(Weights, permutation);
129 
130  // create new vector with exactly one possible entry for each column
131 
132  // each processor which requests the global column id gcid adds 1 to gColVec
133  // gColVec will be summed up over all processors and communicated to gDomVec
134  // which is based on the non-overlapping domain map of A.
135 
136  Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
137  Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
138  gColVec->putScalar(SC_ZERO);
139  gDomVec->putScalar(SC_ZERO);
140 
141  // put in all keep diagonal entries
142  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
143  gColVec->sumIntoGlobalValue((*p).second, 1.0);
144  }
145 
146  Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
147  gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD); // communicate blocked gcolids to all procs
148  gColVec->doImport(*gDomVec, *exporter, Xpetra::INSERT);
149 
150  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
151  std::map<GlobalOrdinal, MT> gColId2Weight;
152 
153  {
154  Teuchos::ArrayRCP<Scalar> ddata = gColVec->getDataNonConst(0);
155  for (size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
156  // loop over all candidates
157  std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
158  GlobalOrdinal grow = pp.first;
159  GlobalOrdinal gcol = pp.second;
160 
161  LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
162  if (Teuchos::ScalarTraits<Scalar>::real(ddata[lcol]) > MT_ZERO) {
163  continue; // skip lcol: column already handled by another row
164  }
165 
166  // mark column as already taken
167  ddata[lcol] += SC_ONE;
168 
169  permutedDiagCandidatesFiltered.push_back(std::make_pair(grow, gcol));
170  gColId2Weight[gcol] = Weights[permutation[i]];
171  }
172  }
173 
174  // communicate how often each column index is requested by the different procs
175  gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD);
176  gColVec->doImport(*gDomVec, *exporter, Xpetra::INSERT); // probably not needed // TODO check me
177 
178  //*****************************************************************************************
179  // first communicate ALL global ids of column indices which are requested by more
180  // than one proc to all other procs
181  // detect which global col indices are requested by more than one proc
182  // and store them in the multipleColRequests vector
183  std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
184  // requested by another processor. This is possible, since they are stored
185  // in gDomVec which is based on the nonoverlapping domain map. That is, each
186  // global col id is handled by exactly one proc.
187  std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
188 
189  for (size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
190  Teuchos::ArrayRCP<const Scalar> arrDomVec = gDomVec->getData(0);
191  //
192  // FIXME (mfh 30 Oct 2015) I _think_ it's OK to check just the
193  // real part, because this is a count. (Shouldn't MueLu use
194  // mag_type instead of Scalar here to save space?)
195  //
196  if (Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) > MT_ONE) {
197  multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
198  } else if (Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) == MT_ZERO) {
199  unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
200  }
201  }
202 
203  // communicate the global number of column indices which are requested by more than one proc
204  LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
205  LocalOrdinal globalMultColRequests = 0;
206 
207  // sum up all entries in multipleColRequests over all processors
208  //
209  // FIXME (lbv 11 Feb 2019) why cast localMultColRequests as LocalOrdinal
210  // when it was properly declared as such just a line above?
211  //
212  MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
213 
214  if (globalMultColRequests > 0) {
215  // special handling: two processors request the same global column id.
216  // decide which processor gets it
217 
218  // distribute number of multipleColRequests to all processors
219  // each processor stores how many column ids for exchange are handled by the cur proc
220  std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
221  std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
222  numMyMultColRequests[myRank] = localMultColRequests;
223  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
224  numMyMultColRequests.data(),
225  numGlobalMultColRequests.data());
226 
227  // communicate multipleColRequests entries to all processors
228  int nMyOffset = 0;
229  for (int i = 0; i < myRank - 1; i++)
230  nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
231 
232  const GlobalOrdinal zero = 0;
233  std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
234  std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
235 
236  // loop over all local column GIDs that are also requested by other procs
237  for (size_t i = 0; i < multipleColRequests.size(); i++) {
238  procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
239  }
240 
241  // template ordinal, package (double)
242  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests),
243  procMultRequestedColIds.data(),
244  global_procMultRequestedColIds.data());
245 
246  // loop over global_procOverlappingWeights and eliminate wrong entries...
247  for (size_t k = 0; k < global_procMultRequestedColIds.size(); k++) {
248  GlobalOrdinal globColId = global_procMultRequestedColIds[k];
249 
250  std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
251  std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
252 
253  if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
254  MyWeightForColId[myRank] = gColId2Weight[globColId];
255  } else {
256  MyWeightForColId[myRank] = MT_ZERO;
257  }
258 
259  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
260  MyWeightForColId.data(),
261  GlobalWeightForColId.data());
262 
263  if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
264  // note: 2 procs could have the same weight for a column index.
265  // pick the first one.
266  MT winnerValue = MT_ZERO;
267  int winnerProcRank = 0;
268  for (int proc = 0; proc < numProcs; proc++) {
269  if (GlobalWeightForColId[proc] > winnerValue) {
270  winnerValue = GlobalWeightForColId[proc];
271  winnerProcRank = proc;
272  }
273  }
274 
275  // winnerProcRank is the winner for handling globColId.
276  // winnerProcRank is unique (even if two procs have the same weight for a column index)
277 
278  if (myRank != winnerProcRank) {
279  // remove corresponding entry from permutedDiagCandidatesFiltered
280  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
281  while (p != permutedDiagCandidatesFiltered.end()) {
282  if ((*p).second == globColId)
283  p = permutedDiagCandidatesFiltered.erase(p);
284  else
285  p++;
286  }
287  }
288 
289  } // end if isNodeGlobalElement
290  } // end loop over global_procOverlappingWeights and eliminate wrong entries...
291  } // end if globalMultColRequests > 0
292 
293  // put together all pairs:
294  // size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
295  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
296  RowColPairs.insert(RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
297  RowColPairs.insert(RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
298 
299 #ifdef DEBUG_OUTPUT
300  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
301  // plausibility check
302  gColVec->putScalar(SC_ZERO);
303  gDomVec->putScalar(SC_ZERO);
304  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
305  while (pl != RowColPairs.end()) {
306  // GlobalOrdinal ik = (*pl).first;
307  GlobalOrdinal jk = (*pl).second;
308 
309  gColVec->sumIntoGlobalValue(jk, 1.0);
310  pl++;
311  }
312  gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD);
313  for (size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
314  Teuchos::ArrayRCP<const Scalar> arrDomVec = gDomVec->getData(0);
315  if (arrDomVec[sz] > 1.0) {
316  GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
317  } else if (arrDomVec[sz] == SC_ZERO) {
318  GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
319  }
320  }
321  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
322 #endif
323 
325  // assumption: on each processor RowColPairs now contains
326  // a valid set of (row,column) pairs, where the row entries
327  // are a subset of the processor's rows and the column entries
328  // are unique throughout all processors.
329  // Note: the RowColPairs are only defined for a subset of all rows,
330  // so there might be rows without an entry in RowColPairs.
331  // It can be, that some rows seem to be missing in RowColPairs, since
332  // the entry in that row with maximum absolute value has been reserved
333  // by another row already (e.g. as already diagonal dominant row outside
334  // of perRowMap).
335  // In fact, the RowColPairs vector only defines the (row,column) pairs
336  // that will be definitely moved to the diagonal after permutation.
337 
338 #ifdef DEBUG_OUTPUT
339  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
340  // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
341  // }
342  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
343  // {
345  // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
346  // }
347  // std::cout << "\n";
348 #endif
349 
350  // vectors to store permutation information
351  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
352  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
353  Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
354 
355  Pperm->putScalar(SC_ZERO);
356  Qperm->putScalar(SC_ZERO);
357  lQperm->putScalar(SC_ZERO);
358 
359  // setup exporter for Qperm
360  Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
361 
362  Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
363  Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
364  Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
365  Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
366  RowIdStatus->putScalar(SC_ZERO);
367  ColIdStatus->putScalar(SC_ZERO);
368  lColIdStatus->putScalar(SC_ZERO);
369  ColIdUsed->putScalar(SC_ZERO); // no column ids are used
370 
371  // count wide-range permutations
372  // a wide-range permutation is defined as a permutation of rows/columns which do not
373  // belong to the same node
374  LocalOrdinal lWideRangeRowPermutations = 0;
375  GlobalOrdinal gWideRangeRowPermutations = 0;
376  LocalOrdinal lWideRangeColPermutations = 0;
377  GlobalOrdinal gWideRangeColPermutations = 0;
378 
379  // run 1: mark all "identity" permutations
380  {
381  Teuchos::ArrayRCP<Scalar> RowIdStatusArray = RowIdStatus->getDataNonConst(0);
382  Teuchos::ArrayRCP<Scalar> lColIdStatusArray = lColIdStatus->getDataNonConst(0);
383 
384  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
385  while (p != RowColPairs.end()) {
386  GlobalOrdinal ik = (*p).first;
387  GlobalOrdinal jk = (*p).second;
388 
389  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
390  LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
391 
392  if (RowIdStatusArray[lik] == SC_ZERO) {
393  RowIdStatusArray[lik] = SC_ONE; // use this row id
394  lColIdStatusArray[ljk] = SC_ONE; // use this column id
395  Pperm->replaceLocalValue(lik, ik);
396  lQperm->replaceLocalValue(ljk, ik); // use column map
397  ColIdUsed->replaceGlobalValue(ik, SC_ONE); // ik is now used
398  p = RowColPairs.erase(p);
399 
400  // detect wide range permutations
401  if (floor(ik / nDofsPerNode) != floor(jk / nDofsPerNode)) {
402  lWideRangeColPermutations++;
403  }
404  } else
405  p++;
406  }
407 
408  // communicate column map -> domain map
409  Qperm->doExport(*lQperm, *QpermExporter, Xpetra::ABSMAX);
410  ColIdStatus->doExport(*lColIdStatus, *QpermExporter, Xpetra::ABSMAX);
411 
412  // plausibility check
413  if (RowColPairs.size() > 0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
414 
415  // close Pperm
416 
417  // count, how many row permutations are missing on current proc
418  size_t cntFreeRowIdx = 0;
419  std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
420  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
421  if (RowIdStatusArray[lik] == SC_ZERO) {
422  cntFreeRowIdx++;
423  qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
424  }
425  }
426 
427  // fix Pperm
428  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
429  if (RowIdStatusArray[lik] == SC_ZERO) {
430  RowIdStatusArray[lik] = SC_ONE; // use this row id
431  Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
432  // detect wide range permutations
433  if (floor(qFreeGRowIdx.front() / nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik) / nDofsPerNode)) {
434  lWideRangeRowPermutations++;
435  }
436  qFreeGRowIdx.pop();
437  }
438  }
439  }
440 
441  size_t cntUnusedColIdx = 0;
442  std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
443  size_t cntFreeColIdx = 0;
444  std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
445  {
446  Teuchos::ArrayRCP<Scalar> ColIdStatusArray = ColIdStatus->getDataNonConst(0);
447  Teuchos::ArrayRCP<Scalar> ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
448 
449  // close Qperm (free permutation entries in Qperm)
450  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
451  if (ColIdStatusArray[ljk] == SC_ZERO) {
452  cntFreeColIdx++;
453  qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
454  }
455  }
456 
457  for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
458  if (ColIdUsedArray[ljk] == SC_ZERO) {
459  cntUnusedColIdx++;
460  qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
461  }
462  }
463 
464  // fix Qperm with local entries
465  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
466  // stop if no (local) unused column idx are left
467  if (cntUnusedColIdx == 0) break;
468 
469  if (ColIdStatusArray[ljk] == SC_ZERO) {
470  ColIdStatusArray[ljk] = SC_ONE; // use this row id
471  Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
472  ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(), SC_ONE); // ljk is now used, too
473  // detect wide range permutations
474  if (floor(qUnusedGColIdx.front() / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
475  lWideRangeColPermutations++;
476  }
477  qUnusedGColIdx.pop();
478  cntUnusedColIdx--;
479  cntFreeColIdx--;
480  }
481  }
482 
483  // Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
484  // ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
485 
486  // count, how many unused column idx are needed on current processor
487  // to complete Qperm
488  cntFreeColIdx = 0;
489  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
490  if (ColIdStatusArray[ljk] == SC_ZERO) {
491  cntFreeColIdx++;
492  }
493  }
494  }
495 
496  GlobalOrdinal global_cntFreeColIdx = 0;
497  LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
498  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
499 #ifdef DEBUG_OUTPUT
500  std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
501 #endif
502 
503  // avoid global communication if possible
504  if (global_cntFreeColIdx > 0) {
505  // 1) count how many unused column ids are left
506  GlobalOrdinal global_cntUnusedColIdx = 0;
507  LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
508  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
509 #ifdef DEBUG_OUTPUT
510  std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
511 #endif
512 
513  // 2) communicate how many unused column ids are available on procs
514  std::vector<LocalOrdinal> local_UnusedColIdxOnProc(numProcs);
515  std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
516  local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
517  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
518  local_UnusedColIdxOnProc.data(),
519  global_UnusedColIdxOnProc.data());
520 
521 #ifdef DEBUG_OUTPUT
522  std::cout << "PROC " << myRank << " global num unused indices per proc: ";
523  for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
524  std::cout << " " << global_UnusedColIdxOnProc[ljk];
525  }
526  std::cout << std::endl;
527 #endif
528 
529  // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
530  std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
531  std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
532  GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
533  for (int proc = 0; proc < myRank; proc++) {
534  global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
535  }
536  for (GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter + local_cntUnusedColIdx; k++) {
537  local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
538  qUnusedGColIdx.pop();
539  }
540  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx),
541  local_UnusedColIdxVector.data(),
542  global_UnusedColIdxVector.data());
543 #ifdef DEBUG_OUTPUT
544  std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
545  for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
546  std::cout << " " << global_UnusedColIdxVector[ljk];
547  }
548  std::cout << std::endl;
549 #endif
550 
551  // 4) communicate, how many column idx are needed on each processor
552  // to complete Qperm
553  std::vector<LocalOrdinal> local_EmptyColIdxOnProc(numProcs);
554  std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
555  local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
556  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
557  local_EmptyColIdxOnProc.data(),
558  global_EmptyColIdxOnProc.data());
559 
560 #ifdef DEBUG_OUTPUT
561  std::cout << "PROC " << myRank << " global num of needed column indices: ";
562  for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
563  std::cout << " " << global_EmptyColIdxOnProc[ljk];
564  }
565  std::cout << std::endl;
566 #endif
567 
568  // 5) determine first index in global_UnusedColIdxVector for unused column indices,
569  // that are marked to be used by this processor
570  GlobalOrdinal global_UnusedColStartIdx = 0;
571  for (int proc = 0; proc < myRank; proc++) {
572  global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
573  }
574 
575 #ifdef DEBUG_OUTPUT
576  GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
577  for (GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
578  GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
579  }
580  GetOStream(Statistics0) << std::endl;
581 #endif
582 
583  // 6.) fix Qperm with global entries
584  {
585  Teuchos::ArrayRCP<Scalar> ColIdStatusArray = ColIdStatus->getDataNonConst(0);
586  GlobalOrdinal array_iter = 0;
587  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
588  if (ColIdStatusArray[ljk] == SC_ZERO) {
589  ColIdStatusArray[ljk] = SC_ONE; // use this row id
590  Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
591  ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter], SC_ONE);
592  // detect wide range permutations
593  if (floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter] / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
594  lWideRangeColPermutations++;
595  }
596  array_iter++;
597  // cntUnusedColIdx--; // check me
598  }
599  }
600  }
601  } // end if global_cntFreeColIdx > 0
603 
604  // create new empty Matrix
605  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
606  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
607 
608  {
609  Teuchos::ArrayRCP<Scalar> PpermData = Pperm->getDataNonConst(0);
610  Teuchos::ArrayRCP<Scalar> QpermData = Qperm->getDataNonConst(0);
611 
612  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
613  // FIXME (mfh 30 Oct 2015): Teuchos::as doesn't know how to
614  // convert from complex Scalar to GO, so we have to take the real
615  // part first. I think that's the right thing to do in this case.
616  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
617  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
618  Teuchos::ArrayRCP<Scalar> valout(1, SC_ONE);
619  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0, indoutP.size()), valout.view(0, valout.size()));
620  permQTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutQ.view(0, indoutQ.size()), valout.view(0, valout.size()));
621  }
622  }
623 
624  permPTmatrix->fillComplete();
625  permQTmatrix->fillComplete();
626 
627  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
628 
629  for (size_t row = 0; row < permPTmatrix->getLocalNumRows(); row++) {
630  if (permPTmatrix->getNumEntriesInLocalRow(row) != 1)
631  GetOStream(Warnings0) << "#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
632  if (permPmatrix->getNumEntriesInLocalRow(row) != 1)
633  GetOStream(Warnings0) << "#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
634  if (permQTmatrix->getNumEntriesInLocalRow(row) != 1)
635  GetOStream(Warnings0) << "#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
636  }
637 
638  // build permP * A * permQT
640  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
641 
642  /*
643  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
644  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
645  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
646  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
647  */
648  // build scaling matrix
649  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
650  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
651  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(), 1));
652  {
653  permPApermQt->getLocalDiagCopy(*diagVec);
654  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
655  Teuchos::ArrayRCP<Scalar> invDiagVecData = invDiagVec->getDataNonConst(0);
656  for (size_t i = 0; i < diagVec->getMap()->getLocalNumElements(); ++i) {
657  if (diagVecData[i] != SC_ZERO)
658  invDiagVecData[i] = SC_ONE / diagVecData[i];
659  else {
660  invDiagVecData[i] = SC_ONE;
661  GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
662  }
663  }
664 
665  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
666  Teuchos::ArrayRCP<GlobalOrdinal> indout(1, permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
667  Teuchos::ArrayRCP<Scalar> valout(1, invDiagVecData[row]);
668  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
669  }
670  }
671  diagScalingOp->fillComplete();
672 
673  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
674  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory /*this*/);
675 
676  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory /*this*/); // TODO careful with this!!!
677  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory /*this*/);
678  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory /*this*/);
679  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory /*this*/);
680 
682  // count zeros on diagonal in P -> number of row permutations
683  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(), true);
684  permPmatrix->getLocalDiagCopy(*diagPVec);
685  LocalOrdinal lNumRowPermutations = 0;
686  GlobalOrdinal gNumRowPermutations = 0;
687  {
688  Teuchos::ArrayRCP<const Scalar> diagPVecData = diagPVec->getData(0);
689  for (size_t i = 0; i < diagPVec->getMap()->getLocalNumElements(); ++i) {
690  if (diagPVecData[i] == SC_ZERO) {
691  lNumRowPermutations++;
692  }
693  }
694  }
695 
696  // sum up all entries in multipleColRequests over all processors
697  MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
698 
700  // count zeros on diagonal in Q^T -> number of column permutations
701  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(), true);
702  permQTmatrix->getLocalDiagCopy(*diagQTVec);
703  LocalOrdinal lNumColPermutations = 0;
704  GlobalOrdinal gNumColPermutations = 0;
705  {
706  Teuchos::ArrayRCP<const Scalar> diagQTVecData = diagQTVec->getData(0);
707  for (size_t i = 0; i < diagQTVec->getMap()->getLocalNumElements(); ++i) {
708  if (diagQTVecData[i] == SC_ZERO) {
709  lNumColPermutations++;
710  }
711  }
712  }
713 
714  // sum up all entries in multipleColRequests over all processors
715  MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
716 
717  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory /*this*/);
718  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory /*this*/);
719  currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory /*this*/);
720  currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory /*this*/);
721 
722  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
723  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
724  GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
725 }
726 
727 } // namespace MueLu
728 
729 #endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
size_type size() const
One-liner description of what is happening.
size_type size() const
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static magnitudeType magnitude(T a)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
iterator begin() const
ArrayView< T > view(size_type lowerOffset, size_type size) const