Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef __Tpetra_ComputeGatherMap_hpp
11 #define __Tpetra_ComputeGatherMap_hpp
12 
17 #include "Tpetra_Map.hpp"
18 #include <numeric>
19 
20 // Macro that marks a function as "possibly unused," in order to
21 // suppress build warnings.
22 #if ! defined(TRILINOS_UNUSED_FUNCTION)
23 # if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
24 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
25 # elif defined(__clang__)
26 # if __has_attribute(unused)
27 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
28 # else
29 # define TRILINOS_UNUSED_FUNCTION
30 # endif // Clang has 'unused' attribute
31 # elif defined(__IBMCPP__)
32 // IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
33 //
34 // http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
35 # define TRILINOS_UNUSED_FUNCTION
36 # else // some other compiler
37 # define TRILINOS_UNUSED_FUNCTION
38 # endif
39 #endif // ! defined(TRILINOS_UNUSED_FUNCTION)
40 
41 
42 namespace Tpetra {
43  namespace Details {
44 
45  namespace {
46 #ifdef HAVE_MPI
47  // We're communicating integers of type IntType. Figure
48  // out the right MPI_Datatype for IntType. Usually it
49  // is int or long, so these are good enough for now.
50  template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype
51  getMpiDatatype () {
52  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
53  "Not implemented for IntType != int, long, or long long");
54  }
55 
56  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
57  getMpiDatatype<int> () { return MPI_INT; }
58 
59  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
60  getMpiDatatype<long> () { return MPI_LONG; }
61 
62  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
63  getMpiDatatype<long long> () { return MPI_LONG_LONG; }
64 #endif // HAVE_MPI
65 
66  template<class IntType>
67  void
68  gather (const IntType sendbuf[], const int sendcnt,
69  IntType recvbuf[], const int recvcnt,
70  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
71  {
72  using Teuchos::RCP;
73  using Teuchos::rcp_dynamic_cast;
74 #ifdef HAVE_MPI
75  using Teuchos::MpiComm;
76 
77  // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
78  RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm);
79  if (! mpiComm.is_null ()) {
80  MPI_Datatype sendtype = getMpiDatatype<IntType> ();
81  MPI_Datatype recvtype = sendtype;
82  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
83  const int err = MPI_Gather (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
84  sendcnt,
85  sendtype,
86  reinterpret_cast<void*> (recvbuf),
87  recvcnt,
88  recvtype,
89  root,
90  rawMpiComm);
91  TEUCHOS_TEST_FOR_EXCEPTION(
92  err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
93  return;
94  }
95 #endif // HAVE_MPI
96 
97  TEUCHOS_TEST_FOR_EXCEPTION(
98  recvcnt > sendcnt, std::invalid_argument,
99  "gather: If the input communicator contains only one process, "
100  "then you cannot receive more entries than you send. "
101  "You aim to receive " << recvcnt << " entries, "
102  "but to send " << sendcnt << ".");
103  // Serial communicator case: just copy. recvcnt is the
104  // amount to receive, so it's the amount to copy.
105  std::copy (sendbuf, sendbuf + recvcnt, recvbuf);
106  }
107 
108  template<class IntType>
109  void
110  gatherv (const IntType sendbuf[], const int sendcnt,
111  IntType recvbuf[], const int recvcnts[], const int displs[],
112  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
113  {
114  using Teuchos::RCP;
115  using Teuchos::rcp_dynamic_cast;
116 #ifdef HAVE_MPI
117  using Teuchos::MpiComm;
118 
119  // Get the raw MPI communicator, so we don't have to wrap
120  // MPI_Gather in Teuchos.
121  RCP<const MpiComm<int> > mpiComm =
122  rcp_dynamic_cast<const MpiComm<int> > (comm);
123  if (! mpiComm.is_null ()) {
124  MPI_Datatype sendtype = getMpiDatatype<IntType> ();
125  MPI_Datatype recvtype = sendtype;
126  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
127  const int err = MPI_Gatherv (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
128  sendcnt,
129  sendtype,
130  reinterpret_cast<void*> (recvbuf),
131  const_cast<int*> (recvcnts),
132  const_cast<int*> (displs),
133  recvtype,
134  root,
135  rawMpiComm);
136  TEUCHOS_TEST_FOR_EXCEPTION(
137  err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
138  return;
139  }
140 #endif // HAVE_MPI
141  TEUCHOS_TEST_FOR_EXCEPTION(
142  recvcnts[0] > sendcnt,
143  std::invalid_argument,
144  "gatherv: If the input communicator contains only one process, "
145  "then you cannot receive more entries than you send. "
146  "You aim to receive " << recvcnts[0] << " entries, "
147  "but to send " << sendcnt << ".");
148  // Serial communicator case: just copy. recvcnts[0] is the
149  // amount to receive, so it's the amount to copy. Start
150  // writing to recvbuf at the offset displs[0].
151  std::copy (sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
152  }
153  } // namespace (anonymous)
154 
155 
156  // Given an arbitrary Map, compute a Map containing all the GIDs
157  // in the same order as in (the one-to-one version of) map, but
158  // all owned exclusively by Proc 0.
159  template<class MapType>
160  Teuchos::RCP<const MapType>
161  computeGatherMap (Teuchos::RCP<const MapType> map,
162  const Teuchos::RCP<Teuchos::FancyOStream>& err,
163  const bool dbg=false)
164  {
166  using Tpetra::global_size_t;
167  using Teuchos::Array;
168  using Teuchos::ArrayRCP;
169  using Teuchos::ArrayView;
170  using Teuchos::as;
171  using Teuchos::Comm;
172  using Teuchos::RCP;
173  using std::endl;
174  typedef typename MapType::local_ordinal_type LO;
175  typedef typename MapType::global_ordinal_type GO;
176  typedef typename MapType::node_type NT;
177 
178  RCP<const Comm<int> > comm = map->getComm ();
179  const int numProcs = comm->getSize ();
180  const int myRank = comm->getRank ();
181 
182  if (! err.is_null ()) {
183  err->pushTab ();
184  }
185  if (dbg) {
186  *err << myRank << ": computeGatherMap:" << endl;
187  }
188  if (! err.is_null ()) {
189  err->pushTab ();
190  }
191 
192  RCP<const MapType> oneToOneMap;
193  if (map->isContiguous ()) {
194  oneToOneMap = map; // contiguous Maps are always 1-to-1
195  } else {
196  if (dbg) {
197  *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
198  }
199  // It could be that Map is one-to-one, but the class doesn't
200  // give us a way to test this, other than to create the
201  // one-to-one Map.
202  oneToOneMap = createOneToOne<LO, GO, NT> (map);
203  }
204 
205  RCP<const MapType> gatherMap;
206  if (numProcs == 1) {
207  gatherMap = oneToOneMap;
208  } else {
209  if (dbg) {
210  *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
211  }
212  // Gather each process' count of Map elements to Proc 0,
213  // into the recvCounts array. This will tell Proc 0 how
214  // many GIDs to expect from each process when calling
215  // MPI_Gatherv. Counts and offsets are all int, because
216  // that's what MPI uses. Teuchos::as will at least prevent
217  // bad casts to int in a debug build.
218  //
219  // Yeah, it's not memory scalable. Guess what: We're going
220  // to bring ALL the data to Proc 0, not just the receive
221  // counts. The first MPI_Gather is only the beginning...
222  // Seriously, if you want to make this memory scalable, the
223  // right thing to do (after the Export to the one-to-one
224  // Map) is to go round robin through the processes, having
225  // each send a chunk of data (with its GIDs, to get the
226  // order of rows right) at a time, and overlapping writing
227  // to the file (resp. reading from it) with receiving (resp.
228  // sending) the next chunk.
229  const int myEltCount = as<int> (oneToOneMap->getLocalNumElements ());
230  Array<int> recvCounts (numProcs);
231  const int rootProc = 0;
232  gather<int> (&myEltCount, 1, recvCounts.getRawPtr (), 1, rootProc, comm);
233 
234  ArrayView<const GO> myGlobalElts = oneToOneMap->getLocalElementList ();
235  const int numMyGlobalElts = as<int> (myGlobalElts.size ());
236  // Only Proc 0 needs to receive and store all the GIDs (from
237  // all processes).
238  ArrayRCP<GO> allGlobalElts;
239  if (myRank == 0) {
240  allGlobalElts = Teuchos::arcp<GO> (oneToOneMap->getGlobalNumElements ());
241  std::fill (allGlobalElts.begin (), allGlobalElts.end (), 0);
242  }
243 
244  if (dbg) {
245  *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
246  "displacements" << endl;
247  }
248  // Displacements for gatherv() in this case (where we are
249  // gathering into a contiguous array) are an exclusive
250  // partial sum (first entry is zero, second starts the
251  // partial sum) of recvCounts.
252  Array<int> displs (numProcs, 0);
253  std::partial_sum (recvCounts.begin (), recvCounts.end () - 1,
254  displs.begin () + 1);
255  if (dbg) {
256  *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
257  }
258  gatherv<GO> (myGlobalElts.getRawPtr (), numMyGlobalElts,
259  allGlobalElts.getRawPtr (), recvCounts.getRawPtr (),
260  displs.getRawPtr (), rootProc, comm);
261 
262  if (dbg) {
263  *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
264  }
265  // Create a Map with all the GIDs, in the same order as in
266  // the one-to-one Map, but owned by Proc 0.
267  ArrayView<const GO> allElts (NULL, 0);
268  if (myRank == 0) {
269  allElts = allGlobalElts ();
270  }
271  const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid ();
272  gatherMap = rcp (new MapType (INVALID, allElts,
273  oneToOneMap->getIndexBase (),
274  comm));
275  }
276  if (! err.is_null ()) {
277  err->popTab ();
278  }
279  if (dbg) {
280  *err << myRank << ": computeGatherMap: done" << endl;
281  }
282  if (! err.is_null ()) {
283  err->popTab ();
284  }
285  return gatherMap;
286  }
287 
288  } // namespace Details
289 } // namespace Tpetra
290 
291 #endif // __Tpetra_ComputeGatherMap_hpp
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.