MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_LocalPermutationStrategy_def.hpp
Go to the documentation of this file.
1 /*
2  * MueLu_LocalPermutationStrategy_def.hpp
3  *
4  * Created on: Feb 19, 2013
5  * Author: tobias
6  */
7 
8 #ifndef MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_
10 
11 #include <algorithm>
12 
13 #include <Xpetra_MultiVector.hpp>
14 #include <Xpetra_Matrix.hpp>
15 #include <Xpetra_MatrixMatrix.hpp>
16 #include <Xpetra_CrsGraph.hpp>
17 #include <Xpetra_Vector.hpp>
18 #include <Xpetra_VectorFactory.hpp>
19 #include <Xpetra_CrsMatrixWrap.hpp>
20 
21 #include "MueLu_Utilities.hpp"
23 
24 namespace MueLu {
25 
26 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
28  permWidth_ = nDofsPerNode;
29 
30  result_permvecs_.clear();
31 
32  // build permutation string
33  std::stringstream ss;
34  for (size_t t = 0; t < nDofsPerNode; t++)
35  ss << t;
36  std::string cs = ss.str();
37  // std::vector<std::string> result_perms;
38  do {
39  // result_perms.push_back(cs);
40 
41  std::vector<int> newPerm(cs.length(), -1);
42  for (size_t len = 0; len < cs.length(); len++) {
43  newPerm[len] = Teuchos::as<int>(cs[len] - '0');
44  }
45  result_permvecs_.push_back(newPerm);
46 
47  } while (std::next_permutation(cs.begin(), cs.end()));
48 }
49 
50 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
53 
54  size_t nDofsPerNode = 1;
55  if (A->IsView("stridedMaps")) {
56  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
57  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
58  }
59 
60  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
61 
63  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
64 
65  // check whether we have to (re)build the permutation vector
66  if (permWidth_ != nDofsPerNode)
67  BuildPermutations(nDofsPerNode);
68 
69  // declare local variables used inside loop
70  LocalOrdinal lonDofsPerNode = Teuchos::as<LocalOrdinal>(nDofsPerNode);
73  Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar> subBlockMatrix(nDofsPerNode, nDofsPerNode, true);
74  std::vector<GlobalOrdinal> growIds(nDofsPerNode);
75 
76  // loop over local nodes
77  // TODO what about nOffset?
78  LocalOrdinal numLocalNodes = A->getRowMap()->getLocalNumElements() / nDofsPerNode;
79  for (LocalOrdinal node = 0; node < numLocalNodes; ++node) {
80  // zero out block matrix
81  subBlockMatrix.putScalar();
82 
83  // loop over all DOFs in current node
84  // Note: were assuming constant number of Dofs per node here!
85  // TODO This is more complicated for variable dofs per node
86  for (LocalOrdinal lrdof = 0; lrdof < lonDofsPerNode; ++lrdof) {
87  GlobalOrdinal grow = getGlobalDofId(A, node, lrdof);
88  growIds[lrdof] = grow;
89 
90  // extract local row information from matrix
91  A->getLocalRowView(A->getRowMap()->getLocalElement(grow), indices, vals);
92 
93  // find column entry with max absolute value
95  MT maxVal = 0.0;
96  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
97  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
99  }
100  }
101 
102  GlobalOrdinal grnodeid = globalDofId2globalNodeId(A, grow);
103 
104  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
105  GlobalOrdinal gcol = A->getColMap()->getGlobalElement(indices[j]);
106  GlobalOrdinal gcnodeid = globalDofId2globalNodeId(A, gcol); // -> global node id
107  if (grnodeid == gcnodeid) {
108  if (maxVal != Teuchos::ScalarTraits<MT>::zero()) {
109  subBlockMatrix(lrdof, gcol % nDofsPerNode) = vals[j] / maxVal;
110  } else {
111  subBlockMatrix(lrdof, gcol % nDofsPerNode) = vals[j]; // there is a problem
112  std::cout << "maxVal never should be zero!!!!" << std::endl;
113  }
114  }
115  }
116  }
117 
118  // now we have the sub block matrix
119 
120  // build permutation string
121  /*std::stringstream ss;
122  for(size_t t = 0; t<nDofsPerNode; t++)
123  ss << t;
124  std::string cs = ss.str();
125  std::vector<std::string> result_perms;
126  do {
127  result_perms.push_back(cs);
128  //std::cout << result_perms.back() << std::endl;
129  } while (std::next_permutation(cs.begin(),cs.end()));*/
130 
131  std::vector<Scalar> performance_vector = std::vector<Scalar>(result_permvecs_.size());
132  for (size_t t = 0; t < result_permvecs_.size(); t++) {
133  std::vector<int> vv = result_permvecs_[t];
134  Scalar value = 1.0;
135  for (size_t j = 0; j < vv.size(); j++) {
136  value = value * subBlockMatrix(j, vv[j]);
137  }
138  performance_vector[t] = value;
139  }
140  /*for(size_t t = 0; t < result_perms.size(); t++) {
141  std::string s = result_perms[t];
142  Scalar value = 1.0;
143  for(size_t len=0; len<s.length(); len++) {
144  int col = Teuchos::as<int>(s[len]-'0');
145  value = value * subBlockMatrix(len,col);
146  }
147  performance_vector[t] = value;
148  }*/
149 
150  // find permutation with maximum performance value
152  MT maxVal = Teuchos::ScalarTraits<MT>::zero();
153  size_t maxPerformancePermutationIdx = 0;
154  for (size_t j = 0; j < Teuchos::as<size_t>(performance_vector.size()); j++) {
155  if (Teuchos::ScalarTraits<Scalar>::magnitude(performance_vector[j]) > maxVal) {
156  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(performance_vector[j]);
157  maxPerformancePermutationIdx = j;
158  }
159  }
160 
161  // build RowColPairs for best permutation
162  std::vector<int> bestPerformancePermutation = result_permvecs_[maxPerformancePermutationIdx];
163  for (size_t t = 0; t < nDofsPerNode; t++) {
164  RowColPairs.push_back(std::make_pair(growIds[t], growIds[bestPerformancePermutation[t]]));
165  }
166 
167  } // end loop over local nodes
168 
169  // build Pperm and Qperm vectors
170  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
171  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap());
172 
173  Pperm->putScalar(SC_ZERO);
174  Qperm->putScalar(SC_ZERO);
175 
176  Teuchos::ArrayRCP<Scalar> PpermData = Pperm->getDataNonConst(0);
177  Teuchos::ArrayRCP<Scalar> QpermData = Qperm->getDataNonConst(0);
178 
179  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
180  while (p != RowColPairs.end()) {
181  GlobalOrdinal ik = (*p).first;
182  GlobalOrdinal jk = (*p).second;
183 
184  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
185  LocalOrdinal ljk = A->getDomainMap()->getLocalElement(jk);
186 
187  Pperm->replaceLocalValue(lik, ik);
188  Qperm->replaceLocalValue(ljk, ik);
189 
190  p = RowColPairs.erase(p);
191  }
192 
193  if (RowColPairs.size() > 0) GetOStream(Warnings0) << "MueLu::LocalPermutationStrategy: There are Row/col pairs left!" << std::endl;
194 
195  // Qperm should be fine
196  // build matrices
197 
198  // create new empty Matrix
199  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
200  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
201 
202  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
203  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
204  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
206  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0, indoutP.size()), valout.view(0, valout.size()));
207  permQTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutQ.view(0, indoutQ.size()), valout.view(0, valout.size()));
208  }
209 
210  permPTmatrix->fillComplete();
211  permQTmatrix->fillComplete();
212 
213  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
214 
215  /*for(size_t row=0; row<permPTmatrix->getLocalNumRows(); row++) {
216  if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
217  GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
218  if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
219  GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
220  if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
221  GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
222  }*/
223 
224  // build permP * A * permQT
225  Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2), true, true);
226  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2), true, true);
227 
228  /*
229  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
230  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
231  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
232  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
233  */
234  // build scaling matrix
235  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
236  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
237  Teuchos::ArrayRCP<Scalar> invDiagVecData = invDiagVec->getDataNonConst(0);
238 
239  LO lCntZeroDiagonals = 0;
240  permPApermQt->getLocalDiagCopy(*diagVec);
241  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
242  for (size_t i = 0; i < diagVec->getMap()->getLocalNumElements(); ++i) {
243  if (diagVecData[i] != SC_ZERO)
244  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one() / diagVecData[i];
245  else {
246  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one();
247  lCntZeroDiagonals++;
248  // GetOStream(Statistics0) << "MueLu::LocalPermutationStrategy: found zero on diagonal in row " << i << std::endl;
249  }
250  }
251 
252 #if 0
253  GO gCntZeroDiagonals = 0;
254  GO glCntZeroDiagonals = Teuchos::as<GlobalOrdinal>(lCntZeroDiagonals); /* LO->GO conversion */
255  MueLu_sumAll(comm,glCntZeroDiagonals,gCntZeroDiagonals);
256  GetOStream(Statistics0) << "MueLu::LocalPermutationStrategy: found " << gCntZeroDiagonals << " zeros on diagonal" << std::endl;
257 #endif
258 
259  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(), 1));
260 
261  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
262  Teuchos::ArrayRCP<GlobalOrdinal> indout(1, permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
263  Teuchos::ArrayRCP<Scalar> valout(1, invDiagVecData[row]);
264  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
265  }
266  diagScalingOp->fillComplete();
267 
268  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2), true, true);
269  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory);
270 
271  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory);
272  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory);
273  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory);
274  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory);
275 
277  // count zeros on diagonal in P -> number of row permutations
278  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(), true);
279  permPmatrix->getLocalDiagCopy(*diagPVec);
280  Teuchos::ArrayRCP<const Scalar> diagPVecData = diagPVec->getData(0);
281  GlobalOrdinal lNumRowPermutations = 0;
282  GlobalOrdinal gNumRowPermutations = 0;
283  for (size_t i = 0; i < diagPVec->getMap()->getLocalNumElements(); ++i) {
284  if (diagPVecData[i] == SC_ZERO) {
285  lNumRowPermutations++;
286  }
287  }
288 
289  // sum up all entries in multipleColRequests over all processors
290  MueLu_sumAll(diagPVec->getMap()->getComm(), lNumRowPermutations, gNumRowPermutations);
291 
293  // count zeros on diagonal in Q^T -> number of column permutations
294  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(), true);
295  permQTmatrix->getLocalDiagCopy(*diagQTVec);
296  Teuchos::ArrayRCP<const Scalar> diagQTVecData = diagQTVec->getData(0);
297  GlobalOrdinal lNumColPermutations = 0;
298  GlobalOrdinal gNumColPermutations = 0;
299  for (size_t i = 0; i < diagQTVec->getMap()->getLocalNumElements(); ++i) {
300  if (diagQTVecData[i] == SC_ZERO) {
301  lNumColPermutations++;
302  }
303  }
304 
305  // sum up all entries in multipleColRequests over all processors
306  MueLu_sumAll(diagQTVec->getMap()->getComm(), lNumColPermutations, gNumColPermutations);
307 
308  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory /*this*/);
309  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory /*this*/);
310  currentLevel.Set("#WideRangeRowPermutations", 0, genFactory /*this*/);
311  currentLevel.Set("#WideRangeColPermutations", 0, genFactory /*this*/);
312 
313  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
314  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
315 }
316 
317 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
319  size_t nDofsPerNode = 1;
320  if (A->IsView("stridedMaps")) {
321  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
322  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
323  }
324 
325  LocalOrdinal localDofId = localNodeId * nDofsPerNode + localDof;
326 
327  return A->getRowMap()->getGlobalElement(localDofId);
328 }
329 
330 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
332  size_t nDofsPerNode = 1;
333  if (A->IsView("stridedMaps")) {
334  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
335  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
336  }
337 
338  return (GlobalOrdinal)grid / (GlobalOrdinal)nDofsPerNode; // TODO what about nOffset???
339 }
340 
341 } // namespace MueLu
342 
343 #endif /* MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
GlobalOrdinal GO
LocalOrdinal LO
size_type size() const
Print even more statistics.
GlobalOrdinal getGlobalDofId(const Teuchos::RCP< Matrix > &A, LocalOrdinal localNodeId, LocalOrdinal localDof) const
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void BuildPermutations(size_t nDofsPerNode) const
int putScalar(const ScalarType value=Teuchos::ScalarTraits< ScalarType >::zero())
GlobalOrdinal globalDofId2globalNodeId(const Teuchos::RCP< Matrix > &A, GlobalOrdinal grid) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static magnitudeType magnitude(T a)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Scalar SC
iterator begin() const
ArrayView< T > view(size_type lowerOffset, size_type size) const