Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef __Tpetra_ComputeGatherMap_hpp
43 #define __Tpetra_ComputeGatherMap_hpp
44 
49 #include "Tpetra_Map.hpp"
50 #include <numeric>
51 
52 // Macro that marks a function as "possibly unused," in order to
53 // suppress build warnings.
54 #if ! defined(TRILINOS_UNUSED_FUNCTION)
55 # if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
56 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
57 # elif defined(__clang__)
58 # if __has_attribute(unused)
59 # define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
60 # else
61 # define TRILINOS_UNUSED_FUNCTION
62 # endif // Clang has 'unused' attribute
63 # elif defined(__IBMCPP__)
64 // IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
65 //
66 // http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
67 # define TRILINOS_UNUSED_FUNCTION
68 # else // some other compiler
69 # define TRILINOS_UNUSED_FUNCTION
70 # endif
71 #endif // ! defined(TRILINOS_UNUSED_FUNCTION)
72 
73 
74 namespace Tpetra {
75  namespace Details {
76 
77  namespace {
78 #ifdef HAVE_MPI
79  // We're communicating integers of type IntType. Figure
80  // out the right MPI_Datatype for IntType. Usually it
81  // is int or long, so these are good enough for now.
82  template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype
83  getMpiDatatype () {
84  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
85  "Not implemented for IntType != int, long, or long long");
86  }
87 
88  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
89  getMpiDatatype<int> () { return MPI_INT; }
90 
91  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
92  getMpiDatatype<long> () { return MPI_LONG; }
93 
94  template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
95  getMpiDatatype<long long> () { return MPI_LONG_LONG; }
96 #endif // HAVE_MPI
97 
98  template<class IntType>
99  void
100  gather (const IntType sendbuf[], const int sendcnt,
101  IntType recvbuf[], const int recvcnt,
102  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
103  {
104  using Teuchos::RCP;
105  using Teuchos::rcp_dynamic_cast;
106 #ifdef HAVE_MPI
107  using Teuchos::MpiComm;
108 
109  // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
110  RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm);
111  if (! mpiComm.is_null ()) {
112  MPI_Datatype sendtype = getMpiDatatype<IntType> ();
113  MPI_Datatype recvtype = sendtype;
114  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
115  const int err = MPI_Gather (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
116  sendcnt,
117  sendtype,
118  reinterpret_cast<void*> (recvbuf),
119  recvcnt,
120  recvtype,
121  root,
122  rawMpiComm);
123  TEUCHOS_TEST_FOR_EXCEPTION(
124  err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
125  return;
126  }
127 #endif // HAVE_MPI
128 
129  TEUCHOS_TEST_FOR_EXCEPTION(
130  recvcnt > sendcnt, std::invalid_argument,
131  "gather: If the input communicator contains only one process, "
132  "then you cannot receive more entries than you send. "
133  "You aim to receive " << recvcnt << " entries, "
134  "but to send " << sendcnt << ".");
135  // Serial communicator case: just copy. recvcnt is the
136  // amount to receive, so it's the amount to copy.
137  std::copy (sendbuf, sendbuf + recvcnt, recvbuf);
138  }
139 
140  template<class IntType>
141  void
142  gatherv (const IntType sendbuf[], const int sendcnt,
143  IntType recvbuf[], const int recvcnts[], const int displs[],
144  const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
145  {
146  using Teuchos::RCP;
147  using Teuchos::rcp_dynamic_cast;
148 #ifdef HAVE_MPI
149  using Teuchos::MpiComm;
150 
151  // Get the raw MPI communicator, so we don't have to wrap
152  // MPI_Gather in Teuchos.
153  RCP<const MpiComm<int> > mpiComm =
154  rcp_dynamic_cast<const MpiComm<int> > (comm);
155  if (! mpiComm.is_null ()) {
156  MPI_Datatype sendtype = getMpiDatatype<IntType> ();
157  MPI_Datatype recvtype = sendtype;
158  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
159  const int err = MPI_Gatherv (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
160  sendcnt,
161  sendtype,
162  reinterpret_cast<void*> (recvbuf),
163  const_cast<int*> (recvcnts),
164  const_cast<int*> (displs),
165  recvtype,
166  root,
167  rawMpiComm);
168  TEUCHOS_TEST_FOR_EXCEPTION(
169  err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
170  return;
171  }
172 #endif // HAVE_MPI
173  TEUCHOS_TEST_FOR_EXCEPTION(
174  recvcnts[0] > sendcnt,
175  std::invalid_argument,
176  "gatherv: If the input communicator contains only one process, "
177  "then you cannot receive more entries than you send. "
178  "You aim to receive " << recvcnts[0] << " entries, "
179  "but to send " << sendcnt << ".");
180  // Serial communicator case: just copy. recvcnts[0] is the
181  // amount to receive, so it's the amount to copy. Start
182  // writing to recvbuf at the offset displs[0].
183  std::copy (sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
184  }
185  } // namespace (anonymous)
186 
187 
188  // Given an arbitrary Map, compute a Map containing all the GIDs
189  // in the same order as in (the one-to-one version of) map, but
190  // all owned exclusively by Proc 0.
191  template<class MapType>
192  Teuchos::RCP<const MapType>
193  computeGatherMap (Teuchos::RCP<const MapType> map,
194  const Teuchos::RCP<Teuchos::FancyOStream>& err,
195  const bool dbg=false)
196  {
198  using Tpetra::global_size_t;
199  using Teuchos::Array;
200  using Teuchos::ArrayRCP;
201  using Teuchos::ArrayView;
202  using Teuchos::as;
203  using Teuchos::Comm;
204  using Teuchos::RCP;
205  using std::endl;
206  typedef typename MapType::local_ordinal_type LO;
207  typedef typename MapType::global_ordinal_type GO;
208  typedef typename MapType::node_type NT;
209 
210  RCP<const Comm<int> > comm = map->getComm ();
211  const int numProcs = comm->getSize ();
212  const int myRank = comm->getRank ();
213 
214  if (! err.is_null ()) {
215  err->pushTab ();
216  }
217  if (dbg) {
218  *err << myRank << ": computeGatherMap:" << endl;
219  }
220  if (! err.is_null ()) {
221  err->pushTab ();
222  }
223 
224  RCP<const MapType> oneToOneMap;
225  if (map->isContiguous ()) {
226  oneToOneMap = map; // contiguous Maps are always 1-to-1
227  } else {
228  if (dbg) {
229  *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
230  }
231  // It could be that Map is one-to-one, but the class doesn't
232  // give us a way to test this, other than to create the
233  // one-to-one Map.
234  oneToOneMap = createOneToOne<LO, GO, NT> (map);
235  }
236 
237  RCP<const MapType> gatherMap;
238  if (numProcs == 1) {
239  gatherMap = oneToOneMap;
240  } else {
241  if (dbg) {
242  *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
243  }
244  // Gather each process' count of Map elements to Proc 0,
245  // into the recvCounts array. This will tell Proc 0 how
246  // many GIDs to expect from each process when calling
247  // MPI_Gatherv. Counts and offsets are all int, because
248  // that's what MPI uses. Teuchos::as will at least prevent
249  // bad casts to int in a debug build.
250  //
251  // Yeah, it's not memory scalable. Guess what: We're going
252  // to bring ALL the data to Proc 0, not just the receive
253  // counts. The first MPI_Gather is only the beginning...
254  // Seriously, if you want to make this memory scalable, the
255  // right thing to do (after the Export to the one-to-one
256  // Map) is to go round robin through the processes, having
257  // each send a chunk of data (with its GIDs, to get the
258  // order of rows right) at a time, and overlapping writing
259  // to the file (resp. reading from it) with receiving (resp.
260  // sending) the next chunk.
261  const int myEltCount = as<int> (oneToOneMap->getNodeNumElements ());
262  Array<int> recvCounts (numProcs);
263  const int rootProc = 0;
264  gather<int> (&myEltCount, 1, recvCounts.getRawPtr (), 1, rootProc, comm);
265 
266  ArrayView<const GO> myGlobalElts = oneToOneMap->getNodeElementList ();
267  const int numMyGlobalElts = as<int> (myGlobalElts.size ());
268  // Only Proc 0 needs to receive and store all the GIDs (from
269  // all processes).
270  ArrayRCP<GO> allGlobalElts;
271  if (myRank == 0) {
272  allGlobalElts = Teuchos::arcp<GO> (oneToOneMap->getGlobalNumElements ());
273  std::fill (allGlobalElts.begin (), allGlobalElts.end (), 0);
274  }
275 
276  if (dbg) {
277  *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
278  "displacements" << endl;
279  }
280  // Displacements for gatherv() in this case (where we are
281  // gathering into a contiguous array) are an exclusive
282  // partial sum (first entry is zero, second starts the
283  // partial sum) of recvCounts.
284  Array<int> displs (numProcs, 0);
285  std::partial_sum (recvCounts.begin (), recvCounts.end () - 1,
286  displs.begin () + 1);
287  if (dbg) {
288  *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
289  }
290  gatherv<GO> (myGlobalElts.getRawPtr (), numMyGlobalElts,
291  allGlobalElts.getRawPtr (), recvCounts.getRawPtr (),
292  displs.getRawPtr (), rootProc, comm);
293 
294  if (dbg) {
295  *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
296  }
297  // Create a Map with all the GIDs, in the same order as in
298  // the one-to-one Map, but owned by Proc 0.
299  ArrayView<const GO> allElts (NULL, 0);
300  if (myRank == 0) {
301  allElts = allGlobalElts ();
302  }
303  const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid ();
304  gatherMap = rcp (new MapType (INVALID, allElts,
305  oneToOneMap->getIndexBase (),
306  comm));
307  }
308  if (! err.is_null ()) {
309  err->popTab ();
310  }
311  if (dbg) {
312  *err << myRank << ": computeGatherMap: done" << endl;
313  }
314  if (! err.is_null ()) {
315  err->popTab ();
316  }
317  return gatherMap;
318  }
319 
320  } // namespace Details
321 } // namespace Tpetra
322 
323 #endif // __Tpetra_ComputeGatherMap_hpp
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.