MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_BrickAggregationFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_BRICKAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_BRICKAGGREGATIONFACTORY_DEF_HPP_
48 
50 #ifdef HAVE_MPI
52 #include <Teuchos_CommHelpers.hpp>
53 #endif
54 
55 #include <Xpetra_Import.hpp>
56 #include <Xpetra_ImportFactory.hpp>
57 #include <Xpetra_Map.hpp>
58 #include <Xpetra_MapFactory.hpp>
59 #include <Xpetra_Matrix.hpp>
60 #include <Xpetra_MultiVector.hpp>
62 
63 
64 #include "MueLu_Aggregates.hpp"
65 #include "MueLu_Level.hpp"
66 #include "MueLu_MasterList.hpp"
67 #include "MueLu_Monitor.hpp"
68 #include "MueLu_Utilities.hpp"
69 
70 namespace MueLu {
71 
72  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
74  RCP<ParameterList> validParamList = rcp(new ParameterList());
75 
76 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
77  SET_VALID_ENTRY("aggregation: brick x size");
78  SET_VALID_ENTRY("aggregation: brick y size");
79  SET_VALID_ENTRY("aggregation: brick z size");
80 #undef SET_VALID_ENTRY
81 
82  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory for matrix");
83  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coordinates");
84 
85  return validParamList;
86  }
87 
88  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
90  Input(currentLevel, "A");
91  Input(currentLevel, "Coordinates");
92  }
93 
94  // The current implementation cannot deal with bricks larger than 3x3(x3) in
95  // parallel. The reason is that aggregation infrastructure in place has
96  // major drawbacks.
97  //
98  // Aggregates class is constructed with a help of a provided map, either
99  // taken from a graph, or provided directly. This map is usually taken to be
100  // a column map of a matrix. The reason for that is that if we have an
101  // overlapped aggregation, we want the processor owning aggregates to store
102  // agg id for all nodes in this aggregate. If we used row map, there would
103  // be no way for the processor to know whether there are some other nodes on
104  // a different processor which belong to its aggregate. On the other hand,
105  // using column map allows both vertex2AggId and procWinner arrays in
106  // Aggregates class to store some extra data, such as whether nodes belonging
107  // to a different processor belong to this processor aggregate.
108  //
109  // The drawback of this is that it stores only overlap=1 data. For aggressive
110  // coarsening, such a brick aggregation with a large single dimension of
111  // brick, it could happen that we need to know depth two or more extra nodes
112  // in the other processor subdomain.
113  //
114  // Another issue is that we may have some implicit connection between
115  // aggregate map and maps of A used in the construction of a tentative
116  // prolongator.
117  //
118  // Another issue is that it seems that some info is unused or not required.
119  // Specifically, it seems that if a node belongs to an aggregate on a
120  // different processor, we don't actually need to set vertex2AggId and
121  // procWinner, despite the following comment in
122  // Aggregates decl:
123  // vertex2AggId[k] gives a local id
124  // corresponding to the aggregate to which
125  // local id k has been assigned. While k
126  // is the local id on my processor (MyPID)
127  // vertex2AggId[k] is the local id on the
128  // processor which actually owns the
129  // aggregate. This owning processor has id
130  // given by procWinner[k].
131  // It is possible that that info is only used during arbitration in
132  // CoupledAggregationFactory.
133  //
134  // The steps that we need to do to resolve this issue:
135  // - Break the link between maps in TentativePFactory, allowing any maps in Aggregates
136  // - Allow Aggregates to construct their own maps, if necessary, OR
137  // - construct aggregates based on row map
138  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
140  FactoryMonitor m(*this, "Build", currentLevel);
141 
143 
144  const ParameterList& pL = GetParameterList();
145 
146  RCP<MultiVector_d> coords = Get<RCP<MultiVector_d> >(currentLevel, "Coordinates");
147  RCP<Matrix> A = Get< RCP<Matrix> > (currentLevel, "A");
148  RCP<const Map> rowMap = A->getRowMap();
149  RCP<const Map> colMap = A->getColMap();
150 
151  RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
152  int numProcs = comm->getSize();
153  int myRank = comm->getRank();
154 
155  int numPoints = colMap->getNodeNumElements();
156 
157  bx_ = pL.get<int>("aggregation: brick x size");
158  by_ = pL.get<int>("aggregation: brick y size");
159  bz_ = pL.get<int>("aggregation: brick z size");
160 
161  if (numProcs > 1) {
162  // TODO: deal with block size > 1 (see comments above)
163  //TEUCHOS_TEST_FOR_EXCEPTION(bx_ > 3 || by_ > 3 || bz_ > 3, Exceptions::RuntimeError, "Currently cannot deal with brick size > 3");
164  }
165 
166  RCP<MultiVector_d> overlappedCoords = coords;
167  RCP<const Import> importer = ImportFactory::Build(coords->getMap(), colMap);
168  if (!importer.is_null()) {
169  overlappedCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO>::Build(colMap, coords->getNumVectors());
170  overlappedCoords->doImport(*coords, *importer, Xpetra::INSERT);
171  }
172 
173  // Setup misc structures
174  // Logically, we construct enough data to query topological information of a rectangular grid
175  Setup(comm, overlappedCoords, colMap);
176 
177  GetOStream(Runtime0) << "Using brick size: " << bx_
178  << (nDim_ > 1 ? "x " + toString(by_) : "")
179  << (nDim_ > 2 ? "x " + toString(bz_) : "") << std::endl;
180 
181  // Construct aggregates
182  RCP<Aggregates> aggregates = rcp(new Aggregates(colMap));
183  aggregates->setObjectLabel("Brick");
184 
185  ArrayRCP<LO> vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0);
186  ArrayRCP<LO> procWinner = aggregates->GetProcWinner() ->getDataNonConst(0);
187 
188  // In the first pass, we set a mapping from a vertex to aggregate global id. We deal with a structured
189  // rectangular mesh, therefore we know the structure of aggregates. For each vertex we can tell exactly
190  // which aggregate it belongs to.
191  // If we determine that the aggregate does not belong to us (i.e. the root vertex does not belong to this
192  // processor, or is outside and we lost "" arbitration), we record the global aggregate id in order to
193  // fetch the local info from the processor owning the aggregate. This is required for aggregates, as it
194  // uses the local aggregate ids of the owning processor.
195  std::set<GO> myAggGIDs, remoteAggGIDs;
196  for (LO LID = 0; LID < numPoints; LID++) {
197  GO aggGID = getAggGID(LID);
198 
199  if ((revMap_.find(getRoot(LID)) != revMap_.end()) && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) {
200  // Root of the brick aggregate containing GID (<- LID) belongs to us
201  vertex2AggId[LID] = aggGID;
202  myAggGIDs.insert(aggGID);
203 
204  if (isRoot(LID))
205  aggregates->SetIsRoot(LID);
206 
207  } else {
208  remoteAggGIDs.insert(aggGID);
209  }
210  }
211  size_t numAggregates = myAggGIDs .size();
212  size_t numRemote = remoteAggGIDs.size();
213  aggregates->SetNumAggregates(numAggregates);
214 
215  std::map<GO,LO> AggG2L; // Map: Agg GID -> Agg LID (possibly on a different processor)
216  std::map<GO,int> AggG2R; // Map: Agg GID -> processor rank owning aggregate
217 
218  Array<GO> myAggGIDsArray(numAggregates), remoteAggGIDsArray(numRemote);
219 
220  // Fill in the maps for aggregates that we own
221  size_t ind = 0;
222  for (typename std::set<GO>::const_iterator it = myAggGIDs.begin(); it != myAggGIDs.end(); it++) {
223  AggG2L[*it] = ind;
224  AggG2R[*it] = myRank;
225 
226  myAggGIDsArray[ind++] = *it;
227  }
228 
229  // The map is a convenient way to fetch remote local indices from global indices.
230  RCP<Map> aggMap = MapFactory::Build(rowMap->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
231  myAggGIDsArray, 0, comm);
232 
233  ind = 0;
234  for (typename std::set<GO>::const_iterator it = remoteAggGIDs.begin(); it != remoteAggGIDs.end(); it++)
235  remoteAggGIDsArray[ind++] = *it;
236 
237  // Fetch the required aggregate local ids and ranks
238  Array<int> remoteProcIDs(numRemote);
239  Array<LO> remoteLIDs (numRemote);
240  aggMap->getRemoteIndexList(remoteAggGIDsArray, remoteProcIDs, remoteLIDs);
241 
242  // Fill in the maps for aggregates that we don't own but which have some of our vertices
243  for (size_t i = 0; i < numRemote; i++) {
244  AggG2L[remoteAggGIDsArray[i]] = remoteLIDs [i];
245  AggG2R[remoteAggGIDsArray[i]] = remoteProcIDs[i];
246  }
247 
248  // Remap aggregate GIDs to LIDs and set up owning processors
249  for (LO LID = 0; LID < numPoints; LID++) {
250  if (revMap_.find(getRoot(LID)) != revMap_.end() && rowMap->isNodeGlobalElement(colMap->getGlobalElement(revMap_[getRoot(LID)]))) {
251  GO aggGID = vertex2AggId[LID];
252 
253  vertex2AggId[LID] = AggG2L[aggGID];
254  procWinner [LID] = AggG2R[aggGID];
255  }
256  }
257 
258  GO numGlobalRemote;
259  MueLu_sumAll(comm, as<GO>(numRemote), numGlobalRemote);
260  aggregates->AggregatesCrossProcessors(numGlobalRemote);
261 
262  Set(currentLevel, "Aggregates", aggregates);
263 
264  GetOStream(Statistics1) << aggregates->description() << std::endl;
265  }
266 
267  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
269  Setup(const RCP<const Teuchos::Comm<int> >& comm, const RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> >& coords, const RCP<const Map>& /* map */) const {
270  nDim_ = coords->getNumVectors();
271 
272  x_ = coords->getData(0);
273  xMap_ = Construct1DMap(comm, x_);
274  nx_ = xMap_->size();
275 
276  ny_ = 1;
277  if (nDim_ > 1) {
278  y_ = coords->getData(1);
279  yMap_ = Construct1DMap(comm, y_);
280  ny_ = yMap_->size();
281  }
282 
283  nz_ = 1;
284  if (nDim_ > 2) {
285  z_ = coords->getData(2);
286  zMap_ = Construct1DMap(comm, z_);
287  nz_ = zMap_->size();
288  }
289 
290  for (size_t ind = 0; ind < coords->getLocalLength(); ind++) {
291  GO i = (*xMap_)[(coords->getData(0))[ind]], j = 0, k = 0;
292  if (nDim_ > 1)
293  j = (*yMap_)[(coords->getData(1))[ind]];
294  if (nDim_ > 2)
295  k = (*zMap_)[(coords->getData(2))[ind]];
296 
297  revMap_[k*ny_*nx_ + j*nx_ + i] = ind;
298  }
299  }
300 
301  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
305  const ArrayRCP<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType>& x) const
306  {
307  int n = x.size();
308 
309  // Step 1: Create a local vector with unique coordinate points
310  RCP<container> gMap = rcp(new container);
311  for (int i = 0; i < n; i++)
312  (*gMap)[x[i]] = 0;
313 
314 #ifdef HAVE_MPI
315  // Step 2: exchange coordinates
316  // NOTE: we assume the coordinates are double, or double compatible
317  // That means that for complex case, we assume that all imaginary parts are zeros
318  int numProcs = comm->getSize();
319  if (numProcs > 1) {
320  RCP<const Teuchos::MpiComm<int> > dupMpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());
321 
322  MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())();
323 
324  int sendCnt = gMap->size(), cnt = 0, recvSize;
325  Array<int> recvCnt(numProcs), Displs(numProcs);
326  Array<double> sendBuf, recvBuf;
327 
328  sendBuf.resize(sendCnt);
329  for (typename container::const_iterator cit = gMap->begin(); cit != gMap->end(); cit++)
330  sendBuf[cnt++] = Teuchos::as<double>(STS::real(cit->first));
331 
332  MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, rawComm);
333  Displs[0] = 0;
334  for (int i = 0; i < numProcs-1; i++)
335  Displs[i+1] = Displs[i] + recvCnt[i];
336  recvSize = Displs[numProcs-1] + recvCnt[numProcs-1];
337  recvBuf.resize(recvSize);
338  MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), MPI_DOUBLE, rawComm);
339 
340  for (int i = 0; i < recvSize; i++)
341  (*gMap)[as<SC>(recvBuf[i])] = 0;
342  }
343 #endif
344 
345  GO cnt = 0;
346  for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++)
347  it->second = cnt++;
348 
349  return gMap;
350  }
351 
352  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
354  int i = (*xMap_)[x_[LID]], j = 0, k = 0;
355  if (nDim_ > 1)
356  j = (*yMap_)[y_[LID]];
357  if (nDim_ > 2)
358  k = (*zMap_)[z_[LID]];
359 
360  return (k*ny_*nx_ + j*nx_ + i) == getRoot(LID);
361  }
362 
363  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
365  int i = ((*xMap_)[x_[LID]]/bx_)*bx_ + (bx_-1)/2, j = 0, k = 0;
366  if (nDim_ > 1)
367  j = ((*yMap_)[y_[LID]]/by_)*by_ + (by_-1)/2;
368  if (nDim_ > 2)
369  k = ((*zMap_)[z_[LID]]/bz_)*bz_ + (bz_-1)/2;
370 
371  // Check if the actual root is outside of the domain. If it is, project the root to the domain
372  if (i > nx_) i = nx_;
373  if (j > ny_) j = ny_;
374  if (k > nz_) k = nz_;
375 
376  return k*ny_*nx_ + j*nx_ + i;
377  }
378 
379  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
381  int naggx = nx_/bx_ + (nx_ % bx_ ? 1 : 0), naggy = 1;
382  int i = (*xMap_)[x_[LID]]/bx_, j = 0;
383  GlobalOrdinal k = 0;
384  if (nDim_ > 1) {
385  naggy = ny_/by_ + (ny_ % by_ ? 1 : 0);
386  j = (*yMap_)[y_[LID]]/by_;
387  }
388  if (nDim_ == 3)
389  k = (*zMap_)[z_[LID]]/bz_;
390 
391  return Teuchos::as<GlobalOrdinal>(k*naggy*naggx) + Teuchos::as<GlobalOrdinal>(j*naggx) + i;
392  }
393 
394 
395 } //namespace MueLu
396 
397 #endif /* MUELU_BRICKAGGREGATIONFACTORY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
Container class for aggregation information.
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
LocalOrdinal LO
One-liner description of what is happening.
void Build(Level &currentLevel) const
Build aggregates.
size_type size() const
GlobalOrdinal getAggGID(LocalOrdinal LID) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void Setup(const RCP< const Teuchos::Comm< int > > &comm, const RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coords, const RCP< const Map > &map) const
void resize(size_type new_size, const value_type &x=value_type())
RCP< container > Construct1DMap(const RCP< const Teuchos::Comm< int > > &comm, const ArrayRCP< const typename Teuchos::ScalarTraits< Scalar >::magnitudeType > &x) const
Node NO
std::map< Scalar, GlobalOrdinal, compare > container
GlobalOrdinal getRoot(LocalOrdinal LID) const
void DeclareInput(Level &currentLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
#define SET_VALID_ENTRY(name)
bool is_null() const