MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
48 
49 #include <Xpetra_CrsGraphFactory.hpp>
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_Matrix.hpp>
55 #include <Xpetra_MultiVectorFactory.hpp>
56 #include <Xpetra_MultiVector.hpp>
57 #include <Xpetra_StridedMap.hpp>
58 #include <Xpetra_VectorFactory.hpp>
59 #include <Xpetra_Vector.hpp>
60 
62 
63 #include "MueLu_AmalgamationFactory.hpp"
64 #include "MueLu_AmalgamationInfo.hpp"
65 #include "MueLu_Exceptions.hpp"
66 #include "MueLu_GraphBase.hpp"
67 #include "MueLu_Graph.hpp"
68 #include "MueLu_Level.hpp"
69 #include "MueLu_LWGraph.hpp"
70 #include "MueLu_MasterList.hpp"
71 #include "MueLu_Monitor.hpp"
72 #include "MueLu_PreDropFunctionBaseClass.hpp"
73 #include "MueLu_PreDropFunctionConstVal.hpp"
74 #include "MueLu_Utilities.hpp"
75 
76 #include <algorithm>
77 #include <cstdlib>
78 #include <string>
79 
80 // If defined, read environment variables.
81 // Should be removed once we are confident that this works.
82 //#define DJS_READ_ENV_VARIABLES
83 
84 namespace MueLu {
85 
86  namespace Details {
87  template<class real_type, class LO>
88  struct DropTol {
89 
90  DropTol() = default;
91  DropTol(DropTol const&) = default;
92  DropTol(DropTol &&) = default;
93 
94  DropTol& operator=(DropTol const&) = default;
95  DropTol& operator=(DropTol &&) = default;
96 
97  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
98  : val{val_}, diag{diag_}, col{col_}, drop{drop_}
99  {}
100 
104  bool drop {true};
105  };
106  }
107 
108 
109  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
111  RCP<ParameterList> validParamList = rcp(new ParameterList());
112 
113 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
114  SET_VALID_ENTRY("aggregation: drop tol");
115  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
116  SET_VALID_ENTRY("aggregation: drop scheme");
117  {
119  validParamList->getEntry("aggregation: drop scheme").setValidator(
120  rcp(new validatorType(Teuchos::tuple<std::string>("classical", "distance laplacian"), "aggregation: drop scheme")));
121  }
122  SET_VALID_ENTRY("aggregation: distance laplacian algo");
123  SET_VALID_ENTRY("aggregation: classical algo");
124 #undef SET_VALID_ENTRY
125  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
126 
127  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
128  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
129  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
130 
131  return validParamList;
132  }
133 
134  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
136 
137  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
139  Input(currentLevel, "A");
140  Input(currentLevel, "UnAmalgamationInfo");
141 
142  const ParameterList& pL = GetParameterList();
143  if (pL.get<bool>("lightweight wrap") == true) {
144  if (pL.get<std::string>("aggregation: drop scheme") == "distance laplacian")
145  Input(currentLevel, "Coordinates");
146 
147  }
148  }
149 
150  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
152 
153  FactoryMonitor m(*this, "Build", currentLevel);
154 
155  typedef Teuchos::ScalarTraits<SC> STS;
156  typedef typename STS::magnitudeType real_type;
157  typedef Xpetra::MultiVector<real_type,LO,GO,NO> RealValuedMultiVector;
158 
159  if (predrop_ != Teuchos::null)
160  GetOStream(Parameters0) << predrop_->description();
161 
162  RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
163  RCP<AmalgamationInfo> amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
164 
165  const ParameterList & pL = GetParameterList();
166  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
167 
168  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
169 
170  // decide wether to use the fast-track code path for standard maps or the somewhat slower
171  // code path for non-standard maps
172  /*bool bNonStandardMaps = false;
173  if (A->IsView("stridedMaps") == true) {
174  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
175  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
176  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
177  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
178  bNonStandardMaps = true;
179  }*/
180 
181  if (doExperimentalWrap) {
182  std::string algo = pL.get<std::string>("aggregation: drop scheme");
183 
184  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
185  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian)");
186 
187  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
188  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
189  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
190  real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime
192  // Remove this bit once we are confident that cut-based dropping works.
193 #ifdef HAVE_MUELU_DEBUG
194  int distanceLaplacianCutVerbose = 0;
195 #endif
196 #ifdef DJS_READ_ENV_VARIABLES
197  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
198  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
199  }
200 
201  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
202  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
203  realThreshold = 1e-4*tmp;
204  }
205 
206 # ifdef HAVE_MUELU_DEBUG
207  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
208  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
209  }
210 # endif
211 #endif
212 
214  enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut};
215 
216  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
217  decisionAlgoType classicalAlgo = defaultAlgo;
218  if (algo == "distance laplacian") {
219  if (distanceLaplacianAlgoStr == "default")
220  distanceLaplacianAlgo = defaultAlgo;
221  else if (distanceLaplacianAlgoStr == "unscaled cut")
222  distanceLaplacianAlgo = unscaled_cut;
223  else if (distanceLaplacianAlgoStr == "scaled cut")
224  distanceLaplacianAlgo = scaled_cut;
225  else
226  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
227  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
228  } else if (algo == "classical") {
229  if (classicalAlgoStr == "default")
230  classicalAlgo = defaultAlgo;
231  else if (classicalAlgoStr == "unscaled cut")
232  classicalAlgo = unscaled_cut;
233  else if (classicalAlgoStr == "scaled cut")
234  classicalAlgo = scaled_cut;
235  else
236  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
237  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
238 
239  } else
240  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
241  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
242 
243  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
244 
245  GO numDropped = 0, numTotal = 0;
246  std::string graphType = "unamalgamated"; //for description purposes only
247  if (algo == "classical") {
248  if (predrop_ == null) {
249  // ap: this is a hack: had to declare predrop_ as mutable
250  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
251  }
252 
253  if (predrop_ != null) {
254  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
255  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
256  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
257  // If a user provided a predrop function, it overwrites the XML threshold parameter
258  SC newt = predropConstVal->GetThreshold();
259  if (newt != threshold) {
260  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
261  threshold = newt;
262  }
263  }
264  // At this points we either have
265  // (predrop_ != null)
266  // Therefore, it is sufficient to check only threshold
267  if (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && A->hasCrsGraph()) {
268  // Case 1: scalar problem, no dropping => just use matrix graph
269  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
270  // Detect and record rows that correspond to Dirichlet boundary conditions
271  ArrayRCP<const bool > boundaryNodes;
272  boundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
273  graph->SetBoundaryNodeMap(boundaryNodes);
274  numTotal = A->getNodeNumEntries();
275 
276  if (GetVerbLevel() & Statistics1) {
277  GO numLocalBoundaryNodes = 0;
278  GO numGlobalBoundaryNodes = 0;
279  for (LO i = 0; i < boundaryNodes.size(); ++i)
280  if (boundaryNodes[i])
281  numLocalBoundaryNodes++;
282  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
283  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
284  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
285  }
286 
287  Set(currentLevel, "DofsPerNode", 1);
288  Set(currentLevel, "Graph", graph);
289 
290  } else if ( (A->GetFixedBlockSize() == 1 && threshold != STS::zero()) ||
291  (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && !A->hasCrsGraph())) {
292  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
293  // graph's map information, e.g., whether index is local
294  // OR a matrix without a CrsGraph
295 
296  // allocate space for the local graph
297  ArrayRCP<LO> rows (A->getNodeNumRows()+1);
298  ArrayRCP<LO> columns(A->getNodeNumEntries());
299 
301  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
302  ArrayRCP<const bool> boundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
303 
304  LO realnnz = 0;
305  rows[0] = 0;
306  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
307  size_t nnz = A->getNumEntriesInLocalRow(row);
308  ArrayView<const LO> indices;
309  ArrayView<const SC> vals;
310  A->getLocalRowView(row, indices, vals);
311 
312  if(classicalAlgo == defaultAlgo) {
313  //FIXME the current predrop function uses the following
314  //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
315  //FIXME but the threshold doesn't take into account the rows' diagonal entries
316  //FIXME For now, hardwiring the dropping in here
317 
318  LO rownnz = 0;
319  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
320  LO col = indices[colID];
321 
322  // we avoid a square root by using squared values
323  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
324  typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
325 
326  if (aij > aiiajj || row == col) {
327  columns[realnnz++] = col;
328  rownnz++;
329  } else
330  numDropped++;
331  }
332  rows[row+1] = realnnz;
333  }
334  else {
335  /* Cut Algorithm */
336  //CMS
337  using DropTol = Details::DropTol<real_type,LO>;
338  std::vector<DropTol> drop_vec;
339  drop_vec.reserve(nnz);
340  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
341  const real_type one = Teuchos::ScalarTraits<real_type>::one();
342  LO rownnz = 0;
343 
344  // find magnitudes
345  for (LO colID = 0; colID < (LO)nnz; colID++) {
346  LO col = indices[colID];
347  if (row == col) {
348  drop_vec.emplace_back( zero, one, colID, false);
349  continue;
350  }
351 
352  // Don't aggregate boundaries
353  if(boundaryNodes[colID]) continue;
354  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
355  typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
356  drop_vec.emplace_back(aij, aiiajj, colID, false);
357  }
358 
359  const size_t n = drop_vec.size();
360 
361  if (classicalAlgo == unscaled_cut) {
362  std::sort( drop_vec.begin(), drop_vec.end()
363  , [](DropTol const& a, DropTol const& b) {
364  return a.val > b.val;
365  });
366 
367  bool drop = false;
368  for (size_t i=1; i<n; ++i) {
369  if (!drop) {
370  auto const& x = drop_vec[i-1];
371  auto const& y = drop_vec[i];
372  auto a = x.val;
373  auto b = y.val;
374  if (a > realThreshold*b) {
375  drop = true;
376 #ifdef HAVE_MUELU_DEBUG
377  if (distanceLaplacianCutVerbose) {
378  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
379  }
380 #endif
381  }
382  }
383  drop_vec[i].drop = drop;
384  }
385  } else if (classicalAlgo == scaled_cut) {
386  std::sort( drop_vec.begin(), drop_vec.end()
387  , [](DropTol const& a, DropTol const& b) {
388  return a.val/a.diag > b.val/b.diag;
389  });
390  bool drop = false;
391  // printf("[%d] Scaled Cut: ",(int)row);
392  // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
393  for (size_t i=1; i<n; ++i) {
394  if (!drop) {
395  auto const& x = drop_vec[i-1];
396  auto const& y = drop_vec[i];
397  auto a = x.val/x.diag;
398  auto b = y.val/y.diag;
399  if (a > realThreshold*b) {
400  drop = true;
401 
402 #ifdef HAVE_MUELU_DEBUG
403  if (distanceLaplacianCutVerbose) {
404  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
405  }
406 #endif
407  }
408  // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
409 
410  }
411  drop_vec[i].drop = drop;
412  }
413  // printf("\n");
414  }
415  std::sort( drop_vec.begin(), drop_vec.end()
416  , [](DropTol const& a, DropTol const& b) {
417  return a.col < b.col;
418  }
419  );
420 
421  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
422  LO col = indices[drop_vec[idxID].col];
423  // don't drop diagonal
424  if (row == col) {
425  columns[realnnz++] = col;
426  rownnz++;
427  continue;
428  }
429 
430  if (!drop_vec[idxID].drop) {
431  columns[realnnz++] = col;
432  rownnz++;
433  } else {
434  numDropped++;
435  }
436  }
437  // CMS
438  rows[row+1] = realnnz;
439 
440  }
441  }//end for row
442 
443  columns.resize(realnnz);
444  numTotal = A->getNodeNumEntries();
445  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A"));
446  graph->SetBoundaryNodeMap(boundaryNodes);
447  if (GetVerbLevel() & Statistics1) {
448  GO numLocalBoundaryNodes = 0;
449  GO numGlobalBoundaryNodes = 0;
450  for (LO i = 0; i < boundaryNodes.size(); ++i)
451  if (boundaryNodes[i])
452  numLocalBoundaryNodes++;
453  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
454  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
455  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
456  }
457  Set(currentLevel, "Graph", graph);
458  Set(currentLevel, "DofsPerNode", 1);
459 
460  } else if (A->GetFixedBlockSize() > 1 && threshold == STS::zero()) {
461  // Case 3: Multiple DOF/node problem without dropping
462  const RCP<const Map> rowMap = A->getRowMap();
463  const RCP<const Map> colMap = A->getColMap();
464 
465  graphType = "amalgamated";
466 
467  // build node row map (uniqueMap) and node column map (nonUniqueMap)
468  // the arrays rowTranslation and colTranslation contain the local node id
469  // given a local dof id. The data is calculated by the AmalgamationFactory and
470  // stored in the variable container "UnAmalgamationInfo"
471  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
472  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
473  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
474  Array<LO> colTranslation = *(amalInfo->getColTranslation());
475 
476  // get number of local nodes
477  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
478 
479  // Allocate space for the local graph
480  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
481  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
482 
483  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
484 
485  // Detect and record rows that correspond to Dirichlet boundary conditions
486  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
487  // TODO the array one bigger than the number of local rows, and the last entry can
488  // TODO hold the actual number of boundary nodes. Clever, huh?
489  ArrayRCP<const bool > pointBoundaryNodes;
490  pointBoundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
491 
492  // extract striding information
493  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
494  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
495  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
496  if (A->IsView("stridedMaps") == true) {
497  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
498  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
499  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
500  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
501  blkId = strMap->getStridedBlockId();
502  if (blkId > -1)
503  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
504  }
505 
506  // loop over all local nodes
507  LO realnnz = 0;
508  rows[0] = 0;
509  Array<LO> indicesExtra;
510  for (LO row = 0; row < numRows; row++) {
511  ArrayView<const LO> indices;
512  indicesExtra.resize(0);
513 
514  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
515  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
516  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
517  // with local ids.
518  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
519  // node.
520  bool isBoundary = false;
521  isBoundary = true;
522  for (LO j = 0; j < blkPartSize; j++) {
523  if (!pointBoundaryNodes[row*blkPartSize+j]) {
524  isBoundary = false;
525  break;
526  }
527  }
528 
529  // Merge rows of A
530  // The array indicesExtra contains local column node ids for the current local node "row"
531  if (!isBoundary)
532  MergeRows(*A, row, indicesExtra, colTranslation);
533  else
534  indicesExtra.push_back(row);
535  indices = indicesExtra;
536  numTotal += indices.size();
537 
538  // add the local column node ids to the full columns array which
539  // contains the local column node ids for all local node rows
540  LO nnz = indices.size(), rownnz = 0;
541  for (LO colID = 0; colID < nnz; colID++) {
542  LO col = indices[colID];
543  columns[realnnz++] = col;
544  rownnz++;
545  }
546 
547  if (rownnz == 1) {
548  // If the only element remaining after filtering is diagonal, mark node as boundary
549  // FIXME: this should really be replaced by the following
550  // if (indices.size() == 1 && indices[0] == row)
551  // boundaryNodes[row] = true;
552  // We do not do it this way now because there is no framework for distinguishing isolated
553  // and boundary nodes in the aggregation algorithms
554  amalgBoundaryNodes[row] = true;
555  }
556  rows[row+1] = realnnz;
557  } //for (LO row = 0; row < numRows; row++)
558  columns.resize(realnnz);
559 
560  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
561  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
562 
563  if (GetVerbLevel() & Statistics1) {
564  GO numLocalBoundaryNodes = 0;
565  GO numGlobalBoundaryNodes = 0;
566 
567  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
568  if (amalgBoundaryNodes[i])
569  numLocalBoundaryNodes++;
570 
571  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
572  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
573  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
574  << " agglomerated Dirichlet nodes" << std::endl;
575  }
576 
577  Set(currentLevel, "Graph", graph);
578  Set(currentLevel, "DofsPerNode", blkSize); // full block size
579 
580  } else if (A->GetFixedBlockSize() > 1 && threshold != STS::zero()) {
581  // Case 4: Multiple DOF/node problem with dropping
582  const RCP<const Map> rowMap = A->getRowMap();
583  const RCP<const Map> colMap = A->getColMap();
584 
585  graphType = "amalgamated";
586 
587  // build node row map (uniqueMap) and node column map (nonUniqueMap)
588  // the arrays rowTranslation and colTranslation contain the local node id
589  // given a local dof id. The data is calculated by the AmalgamationFactory and
590  // stored in the variable container "UnAmalgamationInfo"
591  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
592  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
593  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
594  Array<LO> colTranslation = *(amalInfo->getColTranslation());
595 
596  // get number of local nodes
597  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
598 
599  // Allocate space for the local graph
600  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
601  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
602 
603  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
604 
605  // Detect and record rows that correspond to Dirichlet boundary conditions
606  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
607  // TODO the array one bigger than the number of local rows, and the last entry can
608  // TODO hold the actual number of boundary nodes. Clever, huh?
609  ArrayRCP<const bool > pointBoundaryNodes;
610  pointBoundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
611 
612  // extract striding information
613  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
614  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
615  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
616  if (A->IsView("stridedMaps") == true) {
617  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
618  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
619  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
620  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
621  blkId = strMap->getStridedBlockId();
622  if (blkId > -1)
623  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
624  }
625 
626  // extract diagonal data for dropping strategy
628  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
629 
630  // loop over all local nodes
631  LO realnnz = 0;
632  rows[0] = 0;
633  Array<LO> indicesExtra;
634  for (LO row = 0; row < numRows; row++) {
635  ArrayView<const LO> indices;
636  indicesExtra.resize(0);
637 
638  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
639  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
640  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
641  // with local ids.
642  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
643  // node.
644  bool isBoundary = false;
645  isBoundary = true;
646  for (LO j = 0; j < blkPartSize; j++) {
647  if (!pointBoundaryNodes[row*blkPartSize+j]) {
648  isBoundary = false;
649  break;
650  }
651  }
652 
653  // Merge rows of A
654  // The array indicesExtra contains local column node ids for the current local node "row"
655  if (!isBoundary)
656  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
657  else
658  indicesExtra.push_back(row);
659  indices = indicesExtra;
660  numTotal += indices.size();
661 
662  // add the local column node ids to the full columns array which
663  // contains the local column node ids for all local node rows
664  LO nnz = indices.size(), rownnz = 0;
665  for (LO colID = 0; colID < nnz; colID++) {
666  LO col = indices[colID];
667  columns[realnnz++] = col;
668  rownnz++;
669  }
670 
671  if (rownnz == 1) {
672  // If the only element remaining after filtering is diagonal, mark node as boundary
673  // FIXME: this should really be replaced by the following
674  // if (indices.size() == 1 && indices[0] == row)
675  // boundaryNodes[row] = true;
676  // We do not do it this way now because there is no framework for distinguishing isolated
677  // and boundary nodes in the aggregation algorithms
678  amalgBoundaryNodes[row] = true;
679  }
680  rows[row+1] = realnnz;
681  } //for (LO row = 0; row < numRows; row++)
682  columns.resize(realnnz);
683 
684  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
685  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
686 
687  if (GetVerbLevel() & Statistics1) {
688  GO numLocalBoundaryNodes = 0;
689  GO numGlobalBoundaryNodes = 0;
690 
691  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
692  if (amalgBoundaryNodes[i])
693  numLocalBoundaryNodes++;
694 
695  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
696  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
697  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
698  << " agglomerated Dirichlet nodes" << std::endl;
699  }
700 
701  Set(currentLevel, "Graph", graph);
702  Set(currentLevel, "DofsPerNode", blkSize); // full block size
703  }
704 
705  } else if (algo == "distance laplacian") {
706  LO blkSize = A->GetFixedBlockSize();
707  GO indexBase = A->getRowMap()->getIndexBase();
708 
709  // [*0*] : FIXME
710  // ap: somehow, if I move this line to [*1*], Belos throws an error
711  // I'm not sure what's going on. Do we always have to Get data, if we did
712  // DeclareInput for it?
713  RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
714 
715  // Detect and record rows that correspond to Dirichlet boundary conditions
716  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
717  // TODO the array one bigger than the number of local rows, and the last entry can
718  // TODO hold the actual number of boundary nodes. Clever, huh?
719  ArrayRCP<const bool > pointBoundaryNodes;
720  pointBoundaryNodes = MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold);
721 
722  if ( (blkSize == 1) && (threshold == STS::zero()) ) {
723  // Trivial case: scalar problem, no dropping. Can return original graph
724  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
725  graph->SetBoundaryNodeMap(pointBoundaryNodes);
726  graphType="unamalgamated";
727  numTotal = A->getNodeNumEntries();
728 
729  if (GetVerbLevel() & Statistics1) {
730  GO numLocalBoundaryNodes = 0;
731  GO numGlobalBoundaryNodes = 0;
732  for (LO i = 0; i < pointBoundaryNodes.size(); ++i)
733  if (pointBoundaryNodes[i])
734  numLocalBoundaryNodes++;
735  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
736  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
737  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
738  }
739 
740  Set(currentLevel, "DofsPerNode", blkSize);
741  Set(currentLevel, "Graph", graph);
742 
743  } else {
744  // ap: We make quite a few assumptions here; general case may be a lot different,
745  // but much much harder to implement. We assume that:
746  // 1) all maps are standard maps, not strided maps
747  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
748  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
749  //
750  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
751  // but as I totally don't understand that code, here is my solution
752 
753  // [*1*]: see [*0*]
754 
755  // Check that the number of local coordinates is consistent with the #rows in A
756  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getNodeNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
757  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getNodeNumElements() << ") by modulo block size ("<< blkSize <<").");
758 
759  const RCP<const Map> colMap = A->getColMap();
760  RCP<const Map> uniqueMap, nonUniqueMap;
761  Array<LO> colTranslation;
762  if (blkSize == 1) {
763  uniqueMap = A->getRowMap();
764  nonUniqueMap = A->getColMap();
765  graphType="unamalgamated";
766 
767  } else {
768  uniqueMap = Coords->getMap();
769  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
770  "Different index bases for matrix and coordinates");
771 
772  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
773 
774  graphType = "amalgamated";
775  }
776  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
777 
778  RCP<RealValuedMultiVector> ghostedCoords;
779  RCP<Vector> ghostedLaplDiag;
780  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
781  if (threshold != STS::zero()) {
782  // Get ghost coordinates
783  RCP<const Import> importer;
784  {
785  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
786  if (blkSize == 1 && A->getCrsGraph()->getImporter() != Teuchos::null) {
787  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
788  importer = A->getCrsGraph()->getImporter();
789  } else {
790  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
791  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
792  }
793  } //subtimer
794  ghostedCoords = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(nonUniqueMap, Coords->getNumVectors());
795  {
796  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
797  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
798  } //subtimer
799 
800  // Construct Distance Laplacian diagonal
801  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
802  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
803  Array<LO> indicesExtra;
805  if (threshold != STS::zero()) {
806  const size_t numVectors = ghostedCoords->getNumVectors();
807  coordData.reserve(numVectors);
808  for (size_t j = 0; j < numVectors; j++) {
809  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
810  coordData.push_back(tmpData);
811  }
812  }
813  {
814  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
815  for (LO row = 0; row < numRows; row++) {
816  ArrayView<const LO> indices;
817 
818  if (blkSize == 1) {
819  ArrayView<const SC> vals;
820  A->getLocalRowView(row, indices, vals);
821 
822  } else {
823  // Merge rows of A
824  indicesExtra.resize(0);
825  MergeRows(*A, row, indicesExtra, colTranslation);
826  indices = indicesExtra;
827  }
828 
829  LO nnz = indices.size();
830  for (LO colID = 0; colID < nnz; colID++) {
831  const LO col = indices[colID];
832 
833  if (row != col) {
834  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
835  }
836  }
837  }
838  } //subtimer
839  {
840  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
841  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
842  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
843  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
844  } //subtimer
845 
846  } else {
847  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
848  }
849 
850  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
851 
852  // allocate space for the local graph
853  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
854  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
855 
856  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
857 
858  LO realnnz = 0;
859  rows[0] = 0;
860  Array<LO> indicesExtra;
861  {
862  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
864  if (threshold != STS::zero()) {
865  const size_t numVectors = ghostedCoords->getNumVectors();
866  coordData.reserve(numVectors);
867  for (size_t j = 0; j < numVectors; j++) {
868  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
869  coordData.push_back(tmpData);
870  }
871  }
872 
873  for (LO row = 0; row < numRows; row++) {
874  ArrayView<const LO> indices;
875  indicesExtra.resize(0);
876  bool isBoundary = false;
877 
878  if (blkSize == 1) {
879  ArrayView<const SC> vals;
880  A->getLocalRowView(row, indices, vals);
881  isBoundary = pointBoundaryNodes[row];
882  } else {
883  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
884  for (LO j = 0; j < blkSize; j++) {
885  if (!pointBoundaryNodes[row*blkSize+j]) {
886  isBoundary = false;
887  break;
888  }
889  }
890 
891  // Merge rows of A
892  if (!isBoundary)
893  MergeRows(*A, row, indicesExtra, colTranslation);
894  else
895  indicesExtra.push_back(row);
896  indices = indicesExtra;
897  }
898  numTotal += indices.size();
899 
900  LO nnz = indices.size(), rownnz = 0;
901 
902  if (threshold != STS::zero()) {
903  // default
904  if (distanceLaplacianAlgo == defaultAlgo) {
905  /* Standard Distance Laplacian */
906  for (LO colID = 0; colID < nnz; colID++) {
907 
908  LO col = indices[colID];
909 
910  if (row == col) {
911  columns[realnnz++] = col;
912  rownnz++;
913  continue;
914  }
915 
916  // We do not want the distance Laplacian aggregating boundary nodes
917  if(isBoundary) continue;
918 
919 
920  SC laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
921  real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
922  real_type aij = STS::magnitude(laplVal*laplVal);
923 
924  if (aij > aiiajj) {
925  columns[realnnz++] = col;
926  rownnz++;
927  } else {
928  numDropped++;
929  }
930  }
931  } else {
932  /* Cut Algorithm */
933  using DropTol = Details::DropTol<real_type,LO>;
934  std::vector<DropTol> drop_vec;
935  drop_vec.reserve(nnz);
936  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
937  const real_type one = Teuchos::ScalarTraits<real_type>::one();
938 
939  // find magnitudes
940  for (LO colID = 0; colID < nnz; colID++) {
941 
942  LO col = indices[colID];
943 
944  if (row == col) {
945  drop_vec.emplace_back( zero, one, colID, false);
946  continue;
947  }
948  // We do not want the distance Laplacian aggregating boundary nodes
949  if(isBoundary) continue;
950 
951  SC laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
952  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
953  real_type aij = STS::magnitude(laplVal*laplVal);
954 
955  drop_vec.emplace_back(aij, aiiajj, colID, false);
956  }
957 
958  const size_t n = drop_vec.size();
959 
960  if (distanceLaplacianAlgo == unscaled_cut) {
961 
962  std::sort( drop_vec.begin(), drop_vec.end()
963  , [](DropTol const& a, DropTol const& b) {
964  return a.val > b.val;
965  }
966  );
967 
968  bool drop = false;
969  for (size_t i=1; i<n; ++i) {
970  if (!drop) {
971  auto const& x = drop_vec[i-1];
972  auto const& y = drop_vec[i];
973  auto a = x.val;
974  auto b = y.val;
975  if (a > realThreshold*b) {
976  drop = true;
977 #ifdef HAVE_MUELU_DEBUG
978  if (distanceLaplacianCutVerbose) {
979  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
980  }
981 #endif
982  }
983  }
984  drop_vec[i].drop = drop;
985  }
986  }
987  else if (distanceLaplacianAlgo == scaled_cut) {
988 
989  std::sort( drop_vec.begin(), drop_vec.end()
990  , [](DropTol const& a, DropTol const& b) {
991  return a.val/a.diag > b.val/b.diag;
992  }
993  );
994 
995  bool drop = false;
996  for (size_t i=1; i<n; ++i) {
997  if (!drop) {
998  auto const& x = drop_vec[i-1];
999  auto const& y = drop_vec[i];
1000  auto a = x.val/x.diag;
1001  auto b = y.val/y.diag;
1002  if (a > realThreshold*b) {
1003  drop = true;
1004 #ifdef HAVE_MUELU_DEBUG
1005  if (distanceLaplacianCutVerbose) {
1006  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1007  }
1008 #endif
1009  }
1010  }
1011  drop_vec[i].drop = drop;
1012  }
1013  }
1014 
1015  std::sort( drop_vec.begin(), drop_vec.end()
1016  , [](DropTol const& a, DropTol const& b) {
1017  return a.col < b.col;
1018  }
1019  );
1020 
1021  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
1022  LO col = indices[drop_vec[idxID].col];
1023 
1024 
1025  // don't drop diagonal
1026  if (row == col) {
1027  columns[realnnz++] = col;
1028  rownnz++;
1029  continue;
1030  }
1031 
1032  if (!drop_vec[idxID].drop) {
1033  columns[realnnz++] = col;
1034  rownnz++;
1035  } else {
1036  numDropped++;
1037  }
1038  }
1039  }
1040  } else {
1041  // Skip laplace calculation and threshold comparison for zero threshold
1042  for (LO colID = 0; colID < nnz; colID++) {
1043  LO col = indices[colID];
1044  columns[realnnz++] = col;
1045  rownnz++;
1046  }
1047  }
1048 
1049  if ( rownnz == 1) {
1050  // If the only element remaining after filtering is diagonal, mark node as boundary
1051  // FIXME: this should really be replaced by the following
1052  // if (indices.size() == 1 && indices[0] == row)
1053  // boundaryNodes[row] = true;
1054  // We do not do it this way now because there is no framework for distinguishing isolated
1055  // and boundary nodes in the aggregation algorithms
1056  amalgBoundaryNodes[row] = true;
1057  }
1058  rows[row+1] = realnnz;
1059  } //for (LO row = 0; row < numRows; row++)
1060 
1061  } //subtimer
1062  columns.resize(realnnz);
1063 
1064  RCP<GraphBase> graph;
1065  {
1066  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1067  graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1068  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1069  } //subtimer
1070 
1071  if (GetVerbLevel() & Statistics1) {
1072  GO numLocalBoundaryNodes = 0;
1073  GO numGlobalBoundaryNodes = 0;
1074 
1075  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1076  if (amalgBoundaryNodes[i])
1077  numLocalBoundaryNodes++;
1078 
1079  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1080  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1081  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1082  << " using threshold " << dirichletThreshold << std::endl;
1083  }
1084 
1085  Set(currentLevel, "Graph", graph);
1086  Set(currentLevel, "DofsPerNode", blkSize);
1087  }
1088  }
1089 
1090  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1091  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1092  GO numGlobalTotal, numGlobalDropped;
1093  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1094  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1095  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1096  if (numGlobalTotal != 0)
1097  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1098  GetOStream(Statistics1) << std::endl;
1099  }
1100 
1101  } else {
1102  //what Tobias has implemented
1103 
1104  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1105  //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1106  GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1107  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1108 
1109  RCP<const Map> rowMap = A->getRowMap();
1110  RCP<const Map> colMap = A->getColMap();
1111 
1112  LO blockdim = 1; // block dim for fixed size blocks
1113  GO indexBase = rowMap->getIndexBase(); // index base of maps
1114  GO offset = 0;
1115 
1116  // 1) check for blocking/striding information
1117  if(A->IsView("stridedMaps") &&
1118  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1119  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1120  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1121  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1122  blockdim = strMap->getFixedBlockSize();
1123  offset = strMap->getOffset();
1124  oldView = A->SwitchToView(oldView);
1125  GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1126  } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1127 
1128  // 2) get row map for amalgamated matrix (graph of A)
1129  // with same distribution over all procs as row map of A
1130  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1131  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getNodeNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1132 
1133  // 3) create graph of amalgamated matrix
1134  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getNodeMaxNumRowEntries()*blockdim);
1135 
1136  LO numRows = A->getRowMap()->getNodeNumElements();
1137  LO numNodes = nodeMap->getNodeNumElements();
1138  const ArrayRCP<bool> amalgBoundaryNodes(numNodes, false);
1139  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1140  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1141 
1142  // 4) do amalgamation. generate graph of amalgamated matrix
1143  // Note, this code is much more inefficient than the leightwight implementation
1144  // Most of the work has already been done in the AmalgamationFactory
1145  for(LO row=0; row<numRows; row++) {
1146  // get global DOF id
1147  GO grid = rowMap->getGlobalElement(row);
1148 
1149  // reinitialize boolean helper variable
1150  bIsDiagonalEntry = false;
1151 
1152  // translate grid to nodeid
1153  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1154 
1155  size_t nnz = A->getNumEntriesInLocalRow(row);
1158  A->getLocalRowView(row, indices, vals);
1159 
1160  RCP<std::vector<GO> > cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1161  LO realnnz = 0;
1162  for(LO col=0; col<Teuchos::as<LO>(nnz); col++) {
1163  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1164 
1165  if(vals[col]!=STS::zero()) {
1166  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1167  cnodeIds->push_back(cnodeId);
1168  realnnz++; // increment number of nnz in matrix row
1169  if (grid == gcid) bIsDiagonalEntry = true;
1170  }
1171  }
1172 
1173  if(realnnz == 1 && bIsDiagonalEntry == true) {
1174  LO lNodeId = nodeMap->getLocalElement(nodeId);
1175  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1176  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1177  amalgBoundaryNodes[lNodeId] = true;
1178  }
1179 
1180  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp( cnodeIds );
1181 
1182  if(arr_cnodeIds.size() > 0 )
1183  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1184  }
1185  // fill matrix graph
1186  crsGraph->fillComplete(nodeMap,nodeMap);
1187 
1188  // 5) create MueLu Graph object
1189  RCP<GraphBase> graph = rcp(new Graph(crsGraph, "amalgamated graph of A"));
1190 
1191  // Detect and record rows that correspond to Dirichlet boundary conditions
1192  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1193 
1194  if (GetVerbLevel() & Statistics1) {
1195  GO numLocalBoundaryNodes = 0;
1196  GO numGlobalBoundaryNodes = 0;
1197  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1198  if (amalgBoundaryNodes[i])
1199  numLocalBoundaryNodes++;
1200  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1201  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1202  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1203  }
1204 
1205  // 6) store results in Level
1206  //graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1207  Set(currentLevel, "DofsPerNode", blockdim);
1208  Set(currentLevel, "Graph", graph);
1209 
1210  } //if (doExperimentalWrap) ... else ...
1211 
1212  } //Build
1213 
1214  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1215  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1216  typedef typename ArrayView<const LO>::size_type size_type;
1217 
1218  // extract striding information
1219  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1220  if (A.IsView("stridedMaps") == true) {
1221  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1222  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1223  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1224  if (strMap->getStridedBlockId() > -1)
1225  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1226  }
1227 
1228  // count nonzero entries in all dof rows associated with node row
1229  size_t nnz = 0, pos = 0;
1230  for (LO j = 0; j < blkSize; j++)
1231  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1232 
1233  if (nnz == 0) {
1234  cols.resize(0);
1235  return;
1236  }
1237 
1238  cols.resize(nnz);
1239 
1240  // loop over all local dof rows associated with local node "row"
1241  ArrayView<const LO> inds;
1242  ArrayView<const SC> vals;
1243  for (LO j = 0; j < blkSize; j++) {
1244  A.getLocalRowView(row*blkSize+j, inds, vals);
1245  size_type numIndices = inds.size();
1246 
1247  if (numIndices == 0) // skip empty dof rows
1248  continue;
1249 
1250  // cols: stores all local node ids for current local node id "row"
1251  cols[pos++] = translation[inds[0]];
1252  for (size_type k = 1; k < numIndices; k++) {
1253  LO nodeID = translation[inds[k]];
1254  // Here we try to speed up the process by reducing the size of an array
1255  // to sort. This works if the column nonzeros belonging to the same
1256  // node are stored consequently.
1257  if (nodeID != cols[pos-1])
1258  cols[pos++] = nodeID;
1259  }
1260  }
1261  cols.resize(pos);
1262  nnz = pos;
1263 
1264  // Sort and remove duplicates
1265  std::sort(cols.begin(), cols.end());
1266  pos = 0;
1267  for (size_t j = 1; j < nnz; j++)
1268  if (cols[j] != cols[pos])
1269  cols[++pos] = cols[j];
1270  cols.resize(pos+1);
1271  }
1272 
1273  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1274  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1275  typedef typename ArrayView<const LO>::size_type size_type;
1276  typedef Teuchos::ScalarTraits<SC> STS;
1277 
1278  // extract striding information
1279  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1280  if (A.IsView("stridedMaps") == true) {
1281  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1282  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1283  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1284  if (strMap->getStridedBlockId() > -1)
1285  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1286  }
1287 
1288  // count nonzero entries in all dof rows associated with node row
1289  size_t nnz = 0, pos = 0;
1290  for (LO j = 0; j < blkSize; j++)
1291  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1292 
1293  if (nnz == 0) {
1294  cols.resize(0);
1295  return;
1296  }
1297 
1298  cols.resize(nnz);
1299 
1300  // loop over all local dof rows associated with local node "row"
1301  ArrayView<const LO> inds;
1302  ArrayView<const SC> vals;
1303  for (LO j = 0; j < blkSize; j++) {
1304  A.getLocalRowView(row*blkSize+j, inds, vals);
1305  size_type numIndices = inds.size();
1306 
1307  if (numIndices == 0) // skip empty dof rows
1308  continue;
1309 
1310  // cols: stores all local node ids for current local node id "row"
1311  LO prevNodeID = -1;
1312  for (size_type k = 0; k < numIndices; k++) {
1313  LO dofID = inds[k];
1314  LO nodeID = translation[inds[k]];
1315 
1316  // we avoid a square root by using squared values
1317  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj|
1318  typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]);
1319 
1320  // check dropping criterion
1321  if (aij > aiiajj || (row*blkSize+j == dofID)) {
1322  // accept entry in graph
1323 
1324  // Here we try to speed up the process by reducing the size of an array
1325  // to sort. This works if the column nonzeros belonging to the same
1326  // node are stored consequently.
1327  if (nodeID != prevNodeID) {
1328  cols[pos++] = nodeID;
1329  prevNodeID = nodeID;
1330  }
1331  }
1332  }
1333  }
1334  cols.resize(pos);
1335  nnz = pos;
1336 
1337  // Sort and remove duplicates
1338  std::sort(cols.begin(), cols.end());
1339  pos = 0;
1340  for (size_t j = 1; j < nnz; j++)
1341  if (cols[j] != cols[pos])
1342  cols[++pos] = cols[j];
1343  cols.resize(pos+1);
1344 
1345  return;
1346  }
1347 } //namespace MueLu
1348 
1349 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Exception indicating invalid cast attempted.
void reserve(size_type n)
#define MueLu_sumAll(rcpComm, in, out)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
MueLu representation of a compressed row storage graph.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
size_type size() const
DropTol & operator=(DropTol const &)=default
One-liner description of what is happening.
size_type size() const
Exception throws to report incompatible objects (like maps).
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
translate global (row/column) id to global amalgamation block id
virtual void SetBoundaryNodeMap(const ArrayRCP< const bool > &boundaryArray)=0
Scalar GetThreshold() const
Return threshold value.
Additional warnings.
void resize(const size_type n, const T &val=T())
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
void resize(size_type new_size, const value_type &x=value_type())
void Build(Level &currentLevel) const
Build an object with this factory.
iterator end()
void DeclareInput(Level &currentLevel) const
Input.
Print class parameters.
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
ParameterEntry & getEntry(const std::string &name)
iterator begin()
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const