MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_LocalPermutationStrategy_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 /*
11  * MueLu_LocalPermutationStrategy_def.hpp
12  *
13  * Created on: Feb 19, 2013
14  * Author: tobias
15  */
16 
17 #ifndef MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_
18 #define MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_
19 
20 #include <algorithm>
21 
22 #include <Xpetra_MultiVector.hpp>
23 #include <Xpetra_Matrix.hpp>
24 #include <Xpetra_MatrixMatrix.hpp>
25 #include <Xpetra_CrsGraph.hpp>
26 #include <Xpetra_Vector.hpp>
27 #include <Xpetra_VectorFactory.hpp>
28 #include <Xpetra_CrsMatrixWrap.hpp>
29 
30 #include "MueLu_Utilities.hpp"
32 
33 namespace MueLu {
34 
35 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
37  permWidth_ = nDofsPerNode;
38 
39  result_permvecs_.clear();
40 
41  // build permutation string
42  std::stringstream ss;
43  for (size_t t = 0; t < nDofsPerNode; t++)
44  ss << t;
45  std::string cs = ss.str();
46  // std::vector<std::string> result_perms;
47  do {
48  // result_perms.push_back(cs);
49 
50  std::vector<int> newPerm(cs.length(), -1);
51  for (size_t len = 0; len < cs.length(); len++) {
52  newPerm[len] = Teuchos::as<int>(cs[len] - '0');
53  }
54  result_permvecs_.push_back(newPerm);
55 
56  } while (std::next_permutation(cs.begin(), cs.end()));
57 }
58 
59 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
62 
63  size_t nDofsPerNode = 1;
64  if (A->IsView("stridedMaps")) {
65  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
66  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
67  }
68 
69  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
70 
72  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
73 
74  // check whether we have to (re)build the permutation vector
75  if (permWidth_ != nDofsPerNode)
76  BuildPermutations(nDofsPerNode);
77 
78  // declare local variables used inside loop
79  LocalOrdinal lonDofsPerNode = Teuchos::as<LocalOrdinal>(nDofsPerNode);
82  Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar> subBlockMatrix(nDofsPerNode, nDofsPerNode, true);
83  std::vector<GlobalOrdinal> growIds(nDofsPerNode);
84 
85  // loop over local nodes
86  // TODO what about nOffset?
87  LocalOrdinal numLocalNodes = A->getRowMap()->getLocalNumElements() / nDofsPerNode;
88  for (LocalOrdinal node = 0; node < numLocalNodes; ++node) {
89  // zero out block matrix
90  subBlockMatrix.putScalar();
91 
92  // loop over all DOFs in current node
93  // Note: were assuming constant number of Dofs per node here!
94  // TODO This is more complicated for variable dofs per node
95  for (LocalOrdinal lrdof = 0; lrdof < lonDofsPerNode; ++lrdof) {
96  GlobalOrdinal grow = getGlobalDofId(A, node, lrdof);
97  growIds[lrdof] = grow;
98 
99  // extract local row information from matrix
100  A->getLocalRowView(A->getRowMap()->getLocalElement(grow), indices, vals);
101 
102  // find column entry with max absolute value
104  MT maxVal = 0.0;
105  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
106  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
108  }
109  }
110 
111  GlobalOrdinal grnodeid = globalDofId2globalNodeId(A, grow);
112 
113  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
114  GlobalOrdinal gcol = A->getColMap()->getGlobalElement(indices[j]);
115  GlobalOrdinal gcnodeid = globalDofId2globalNodeId(A, gcol); // -> global node id
116  if (grnodeid == gcnodeid) {
117  if (maxVal != Teuchos::ScalarTraits<MT>::zero()) {
118  subBlockMatrix(lrdof, gcol % nDofsPerNode) = vals[j] / maxVal;
119  } else {
120  subBlockMatrix(lrdof, gcol % nDofsPerNode) = vals[j]; // there is a problem
121  std::cout << "maxVal never should be zero!!!!" << std::endl;
122  }
123  }
124  }
125  }
126 
127  // now we have the sub block matrix
128 
129  // build permutation string
130  /*std::stringstream ss;
131  for(size_t t = 0; t<nDofsPerNode; t++)
132  ss << t;
133  std::string cs = ss.str();
134  std::vector<std::string> result_perms;
135  do {
136  result_perms.push_back(cs);
137  //std::cout << result_perms.back() << std::endl;
138  } while (std::next_permutation(cs.begin(),cs.end()));*/
139 
140  std::vector<Scalar> performance_vector = std::vector<Scalar>(result_permvecs_.size());
141  for (size_t t = 0; t < result_permvecs_.size(); t++) {
142  std::vector<int> vv = result_permvecs_[t];
143  Scalar value = 1.0;
144  for (size_t j = 0; j < vv.size(); j++) {
145  value = value * subBlockMatrix(j, vv[j]);
146  }
147  performance_vector[t] = value;
148  }
149  /*for(size_t t = 0; t < result_perms.size(); t++) {
150  std::string s = result_perms[t];
151  Scalar value = 1.0;
152  for(size_t len=0; len<s.length(); len++) {
153  int col = Teuchos::as<int>(s[len]-'0');
154  value = value * subBlockMatrix(len,col);
155  }
156  performance_vector[t] = value;
157  }*/
158 
159  // find permutation with maximum performance value
161  MT maxVal = Teuchos::ScalarTraits<MT>::zero();
162  size_t maxPerformancePermutationIdx = 0;
163  for (size_t j = 0; j < Teuchos::as<size_t>(performance_vector.size()); j++) {
164  if (Teuchos::ScalarTraits<Scalar>::magnitude(performance_vector[j]) > maxVal) {
165  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(performance_vector[j]);
166  maxPerformancePermutationIdx = j;
167  }
168  }
169 
170  // build RowColPairs for best permutation
171  std::vector<int> bestPerformancePermutation = result_permvecs_[maxPerformancePermutationIdx];
172  for (size_t t = 0; t < nDofsPerNode; t++) {
173  RowColPairs.push_back(std::make_pair(growIds[t], growIds[bestPerformancePermutation[t]]));
174  }
175 
176  } // end loop over local nodes
177 
178  // build Pperm and Qperm vectors
179  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
180  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap());
181 
182  Pperm->putScalar(SC_ZERO);
183  Qperm->putScalar(SC_ZERO);
184 
185  Teuchos::ArrayRCP<Scalar> PpermData = Pperm->getDataNonConst(0);
186  Teuchos::ArrayRCP<Scalar> QpermData = Qperm->getDataNonConst(0);
187 
188  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
189  while (p != RowColPairs.end()) {
190  GlobalOrdinal ik = (*p).first;
191  GlobalOrdinal jk = (*p).second;
192 
193  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
194  LocalOrdinal ljk = A->getDomainMap()->getLocalElement(jk);
195 
196  Pperm->replaceLocalValue(lik, ik);
197  Qperm->replaceLocalValue(ljk, ik);
198 
199  p = RowColPairs.erase(p);
200  }
201 
202  if (RowColPairs.size() > 0) GetOStream(Warnings0) << "MueLu::LocalPermutationStrategy: There are Row/col pairs left!" << std::endl;
203 
204  // Qperm should be fine
205  // build matrices
206 
207  // create new empty Matrix
208  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
209  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
210 
211  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
212  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
213  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
215  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0, indoutP.size()), valout.view(0, valout.size()));
216  permQTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutQ.view(0, indoutQ.size()), valout.view(0, valout.size()));
217  }
218 
219  permPTmatrix->fillComplete();
220  permQTmatrix->fillComplete();
221 
222  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
223 
224  /*for(size_t row=0; row<permPTmatrix->getLocalNumRows(); row++) {
225  if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
226  GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
227  if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
228  GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
229  if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
230  GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
231  }*/
232 
233  // build permP * A * permQT
234  Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2), true, true);
235  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2), true, true);
236 
237  /*
238  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
239  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
240  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
241  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
242  */
243  // build scaling matrix
244  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
245  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
246  Teuchos::ArrayRCP<Scalar> invDiagVecData = invDiagVec->getDataNonConst(0);
247 
248  LO lCntZeroDiagonals = 0;
249  permPApermQt->getLocalDiagCopy(*diagVec);
250  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
251  for (size_t i = 0; i < diagVec->getMap()->getLocalNumElements(); ++i) {
252  if (diagVecData[i] != SC_ZERO)
253  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one() / diagVecData[i];
254  else {
255  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one();
256  lCntZeroDiagonals++;
257  // GetOStream(Statistics0) << "MueLu::LocalPermutationStrategy: found zero on diagonal in row " << i << std::endl;
258  }
259  }
260 
261 #if 0
262  GO gCntZeroDiagonals = 0;
263  GO glCntZeroDiagonals = Teuchos::as<GlobalOrdinal>(lCntZeroDiagonals); /* LO->GO conversion */
264  MueLu_sumAll(comm,glCntZeroDiagonals,gCntZeroDiagonals);
265  GetOStream(Statistics0) << "MueLu::LocalPermutationStrategy: found " << gCntZeroDiagonals << " zeros on diagonal" << std::endl;
266 #endif
267 
268  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(), 1));
269 
270  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
271  Teuchos::ArrayRCP<GlobalOrdinal> indout(1, permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
272  Teuchos::ArrayRCP<Scalar> valout(1, invDiagVecData[row]);
273  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
274  }
275  diagScalingOp->fillComplete();
276 
277  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2), true, true);
278  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory);
279 
280  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory);
281  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory);
282  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory);
283  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory);
284 
286  // count zeros on diagonal in P -> number of row permutations
287  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(), true);
288  permPmatrix->getLocalDiagCopy(*diagPVec);
289  Teuchos::ArrayRCP<const Scalar> diagPVecData = diagPVec->getData(0);
290  GlobalOrdinal lNumRowPermutations = 0;
291  GlobalOrdinal gNumRowPermutations = 0;
292  for (size_t i = 0; i < diagPVec->getMap()->getLocalNumElements(); ++i) {
293  if (diagPVecData[i] == SC_ZERO) {
294  lNumRowPermutations++;
295  }
296  }
297 
298  // sum up all entries in multipleColRequests over all processors
299  MueLu_sumAll(diagPVec->getMap()->getComm(), lNumRowPermutations, gNumRowPermutations);
300 
302  // count zeros on diagonal in Q^T -> number of column permutations
303  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(), true);
304  permQTmatrix->getLocalDiagCopy(*diagQTVec);
305  Teuchos::ArrayRCP<const Scalar> diagQTVecData = diagQTVec->getData(0);
306  GlobalOrdinal lNumColPermutations = 0;
307  GlobalOrdinal gNumColPermutations = 0;
308  for (size_t i = 0; i < diagQTVec->getMap()->getLocalNumElements(); ++i) {
309  if (diagQTVecData[i] == SC_ZERO) {
310  lNumColPermutations++;
311  }
312  }
313 
314  // sum up all entries in multipleColRequests over all processors
315  MueLu_sumAll(diagQTVec->getMap()->getComm(), lNumColPermutations, gNumColPermutations);
316 
317  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory /*this*/);
318  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory /*this*/);
319  currentLevel.Set("#WideRangeRowPermutations", 0, genFactory /*this*/);
320  currentLevel.Set("#WideRangeColPermutations", 0, genFactory /*this*/);
321 
322  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
323  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
324 }
325 
326 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
328  size_t nDofsPerNode = 1;
329  if (A->IsView("stridedMaps")) {
330  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
331  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
332  }
333 
334  LocalOrdinal localDofId = localNodeId * nDofsPerNode + localDof;
335 
336  return A->getRowMap()->getGlobalElement(localDofId);
337 }
338 
339 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
341  size_t nDofsPerNode = 1;
342  if (A->IsView("stridedMaps")) {
343  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
344  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
345  }
346 
347  return (GlobalOrdinal)grid / (GlobalOrdinal)nDofsPerNode; // TODO what about nOffset???
348 }
349 
350 } // namespace MueLu
351 
352 #endif /* MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
GlobalOrdinal GO
LocalOrdinal LO
size_type size() const
Print even more statistics.
GlobalOrdinal getGlobalDofId(const Teuchos::RCP< Matrix > &A, LocalOrdinal localNodeId, LocalOrdinal localDof) const
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
void BuildPermutations(size_t nDofsPerNode) const
int putScalar(const ScalarType value=Teuchos::ScalarTraits< ScalarType >::zero())
GlobalOrdinal globalDofId2globalNodeId(const Teuchos::RCP< Matrix > &A, GlobalOrdinal grid) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static magnitudeType magnitude(T a)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Scalar SC
iterator begin() const
ArrayView< T > view(size_type lowerOffset, size_type size) const