Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef __Tpetra_ComputeGatherMap_hpp
11 #define __Tpetra_ComputeGatherMap_hpp
12 
17 #include "Tpetra_Map.hpp"
18 #include <numeric>
19 
20 // Macro that marks a function as "possibly unused," in order to
21 // suppress build warnings.
22 #if !defined(TRILINOS_UNUSED_FUNCTION)
23 #if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
24 #define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
25 #elif defined(__clang__)
26 #if __has_attribute(unused)
27 #define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
28 #else
29 #define TRILINOS_UNUSED_FUNCTION
30 #endif // Clang has 'unused' attribute
31 #elif defined(__IBMCPP__)
32 // IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
33 //
34 // http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
35 #define TRILINOS_UNUSED_FUNCTION
36 #else // some other compiler
37 #define TRILINOS_UNUSED_FUNCTION
38 #endif
39 #endif // ! defined(TRILINOS_UNUSED_FUNCTION)
40 
41 namespace Tpetra {
42 namespace Details {
43 
44 namespace {
45 #ifdef HAVE_MPI
46 // We're communicating integers of type IntType. Figure
47 // out the right MPI_Datatype for IntType. Usually it
48 // is int or long, so these are good enough for now.
49 template <class IntType>
50 TRILINOS_UNUSED_FUNCTION MPI_Datatype
51 getMpiDatatype() {
52  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
53  "Not implemented for IntType != int, long, or long long");
54 }
55 
56 template <>
57 TRILINOS_UNUSED_FUNCTION MPI_Datatype
58 getMpiDatatype<int>() { return MPI_INT; }
59 
60 template <>
61 TRILINOS_UNUSED_FUNCTION MPI_Datatype
62 getMpiDatatype<long>() { return MPI_LONG; }
63 
64 template <>
65 TRILINOS_UNUSED_FUNCTION MPI_Datatype
66 getMpiDatatype<long long>() { return MPI_LONG_LONG; }
67 #endif // HAVE_MPI
68 
69 template <class IntType>
70 void gather(const IntType sendbuf[], const int sendcnt,
71  IntType recvbuf[], const int recvcnt,
72  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm) {
73  using Teuchos::RCP;
74  using Teuchos::rcp_dynamic_cast;
75 #ifdef HAVE_MPI
76  using Teuchos::MpiComm;
77 
78  // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
79  RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> >(comm);
80  if (!mpiComm.is_null()) {
81  MPI_Datatype sendtype = getMpiDatatype<IntType>();
82  MPI_Datatype recvtype = sendtype;
83  MPI_Comm rawMpiComm = *(mpiComm->getRawMpiComm());
84  const int err = MPI_Gather(reinterpret_cast<void*>(const_cast<IntType*>(sendbuf)),
85  sendcnt,
86  sendtype,
87  reinterpret_cast<void*>(recvbuf),
88  recvcnt,
89  recvtype,
90  root,
91  rawMpiComm);
92  TEUCHOS_TEST_FOR_EXCEPTION(
93  err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
94  return;
95  }
96 #endif // HAVE_MPI
97 
98  TEUCHOS_TEST_FOR_EXCEPTION(
99  recvcnt > sendcnt, std::invalid_argument,
100  "gather: If the input communicator contains only one process, "
101  "then you cannot receive more entries than you send. "
102  "You aim to receive "
103  << recvcnt << " entries, "
104  "but to send "
105  << sendcnt << ".");
106  // Serial communicator case: just copy. recvcnt is the
107  // amount to receive, so it's the amount to copy.
108  std::copy(sendbuf, sendbuf + recvcnt, recvbuf);
109 }
110 
111 template <class IntType>
112 void gatherv(const IntType sendbuf[], const int sendcnt,
113  IntType recvbuf[], const int recvcnts[], const int displs[],
114  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm) {
115  using Teuchos::RCP;
116  using Teuchos::rcp_dynamic_cast;
117 #ifdef HAVE_MPI
118  using Teuchos::MpiComm;
119 
120  // Get the raw MPI communicator, so we don't have to wrap
121  // MPI_Gather in Teuchos.
122  RCP<const MpiComm<int> > mpiComm =
123  rcp_dynamic_cast<const MpiComm<int> >(comm);
124  if (!mpiComm.is_null()) {
125  MPI_Datatype sendtype = getMpiDatatype<IntType>();
126  MPI_Datatype recvtype = sendtype;
127  MPI_Comm rawMpiComm = *(mpiComm->getRawMpiComm());
128  const int err = MPI_Gatherv(reinterpret_cast<void*>(const_cast<IntType*>(sendbuf)),
129  sendcnt,
130  sendtype,
131  reinterpret_cast<void*>(recvbuf),
132  const_cast<int*>(recvcnts),
133  const_cast<int*>(displs),
134  recvtype,
135  root,
136  rawMpiComm);
137  TEUCHOS_TEST_FOR_EXCEPTION(
138  err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
139  return;
140  }
141 #endif // HAVE_MPI
142  TEUCHOS_TEST_FOR_EXCEPTION(
143  recvcnts[0] > sendcnt,
144  std::invalid_argument,
145  "gatherv: If the input communicator contains only one process, "
146  "then you cannot receive more entries than you send. "
147  "You aim to receive "
148  << recvcnts[0] << " entries, "
149  "but to send "
150  << sendcnt << ".");
151  // Serial communicator case: just copy. recvcnts[0] is the
152  // amount to receive, so it's the amount to copy. Start
153  // writing to recvbuf at the offset displs[0].
154  std::copy(sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
155 }
156 } // namespace
157 
158 // Given an arbitrary Map, compute a Map containing all the GIDs
159 // in the same order as in (the one-to-one version of) map, but
160 // all owned exclusively by Proc 0.
161 template <class MapType>
162 Teuchos::RCP<const MapType>
163 computeGatherMap(Teuchos::RCP<const MapType> map,
164  const Teuchos::RCP<Teuchos::FancyOStream>& err,
165  const bool dbg = false) {
166  using std::endl;
167  using Teuchos::Array;
168  using Teuchos::ArrayRCP;
169  using Teuchos::ArrayView;
170  using Teuchos::as;
171  using Teuchos::Comm;
172  using Teuchos::RCP;
174  using Tpetra::global_size_t;
175  typedef typename MapType::local_ordinal_type LO;
176  typedef typename MapType::global_ordinal_type GO;
177  typedef typename MapType::node_type NT;
178 
179  RCP<const Comm<int> > comm = map->getComm();
180  const int numProcs = comm->getSize();
181  const int myRank = comm->getRank();
182 
183  if (!err.is_null()) {
184  err->pushTab();
185  }
186  if (dbg) {
187  *err << myRank << ": computeGatherMap:" << endl;
188  }
189  if (!err.is_null()) {
190  err->pushTab();
191  }
192 
193  RCP<const MapType> oneToOneMap;
194  if (map->isContiguous()) {
195  oneToOneMap = map; // contiguous Maps are always 1-to-1
196  } else {
197  if (dbg) {
198  *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
199  }
200  // It could be that Map is one-to-one, but the class doesn't
201  // give us a way to test this, other than to create the
202  // one-to-one Map.
203  oneToOneMap = createOneToOne<LO, GO, NT>(map);
204  }
205 
206  RCP<const MapType> gatherMap;
207  if (numProcs == 1) {
208  gatherMap = oneToOneMap;
209  } else {
210  if (dbg) {
211  *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
212  }
213  // Gather each process' count of Map elements to Proc 0,
214  // into the recvCounts array. This will tell Proc 0 how
215  // many GIDs to expect from each process when calling
216  // MPI_Gatherv. Counts and offsets are all int, because
217  // that's what MPI uses. Teuchos::as will at least prevent
218  // bad casts to int in a debug build.
219  //
220  // Yeah, it's not memory scalable. Guess what: We're going
221  // to bring ALL the data to Proc 0, not just the receive
222  // counts. The first MPI_Gather is only the beginning...
223  // Seriously, if you want to make this memory scalable, the
224  // right thing to do (after the Export to the one-to-one
225  // Map) is to go round robin through the processes, having
226  // each send a chunk of data (with its GIDs, to get the
227  // order of rows right) at a time, and overlapping writing
228  // to the file (resp. reading from it) with receiving (resp.
229  // sending) the next chunk.
230  const int myEltCount = as<int>(oneToOneMap->getLocalNumElements());
231  Array<int> recvCounts(numProcs);
232  const int rootProc = 0;
233  gather<int>(&myEltCount, 1, recvCounts.getRawPtr(), 1, rootProc, comm);
234 
235  ArrayView<const GO> myGlobalElts = oneToOneMap->getLocalElementList();
236  const int numMyGlobalElts = as<int>(myGlobalElts.size());
237  // Only Proc 0 needs to receive and store all the GIDs (from
238  // all processes).
239  ArrayRCP<GO> allGlobalElts;
240  if (myRank == 0) {
241  allGlobalElts = Teuchos::arcp<GO>(oneToOneMap->getGlobalNumElements());
242  std::fill(allGlobalElts.begin(), allGlobalElts.end(), 0);
243  }
244 
245  if (dbg) {
246  *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
247  "displacements"
248  << endl;
249  }
250  // Displacements for gatherv() in this case (where we are
251  // gathering into a contiguous array) are an exclusive
252  // partial sum (first entry is zero, second starts the
253  // partial sum) of recvCounts.
254  Array<int> displs(numProcs, 0);
255  std::partial_sum(recvCounts.begin(), recvCounts.end() - 1,
256  displs.begin() + 1);
257  if (dbg) {
258  *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
259  }
260  gatherv<GO>(myGlobalElts.getRawPtr(), numMyGlobalElts,
261  allGlobalElts.getRawPtr(), recvCounts.getRawPtr(),
262  displs.getRawPtr(), rootProc, comm);
263 
264  if (dbg) {
265  *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
266  }
267  // Create a Map with all the GIDs, in the same order as in
268  // the one-to-one Map, but owned by Proc 0.
269  ArrayView<const GO> allElts(NULL, 0);
270  if (myRank == 0) {
271  allElts = allGlobalElts();
272  }
273  const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid();
274  gatherMap = rcp(new MapType(INVALID, allElts,
275  oneToOneMap->getIndexBase(),
276  comm));
277  }
278  if (!err.is_null()) {
279  err->popTab();
280  }
281  if (dbg) {
282  *err << myRank << ": computeGatherMap: done" << endl;
283  }
284  if (!err.is_null()) {
285  err->popTab();
286  }
287  return gatherMap;
288 }
289 
290 } // namespace Details
291 } // namespace Tpetra
292 
293 #endif // __Tpetra_ComputeGatherMap_hpp
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.