Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_DefaultComm.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TEUCHOS_DEFAULT_COMM_HPP
11 #define TEUCHOS_DEFAULT_COMM_HPP
12 
13 #include "Teuchos_RCP.hpp"
14 #include "Teuchos_DefaultSerialComm.hpp"
15 #ifdef HAVE_MPI
17 #endif
18 
19 namespace Teuchos {
20 
21 #ifdef HAVE_MPI
22 namespace Details {
23 
24 template<class OrdinalType>
25 int
26 mpiFreeDefaultComm (MPI_Comm, int, void*, void*);
27 
28 template<class OrdinalType>
29 int
30 mpiFreeDefaultSerialComm (MPI_Comm, int, void*, void*);
31 
32 } // namespace Details
33 #endif // HAVE_MPI
34 
61 template<typename OrdinalType>
62 class DefaultComm {
63 public:
77 
86 
87 private:
93  static const Comm<OrdinalType>* comm_;
94 
96  static const Comm<OrdinalType>* defaultSerialComm_;
97 
98 #ifdef HAVE_MPI
99  template<class OT>
101  friend int
102  Details::mpiFreeDefaultComm (MPI_Comm, int, void*, void*);
103 
105  template<class OT>
106  friend int
107  Details::mpiFreeDefaultSerialComm (MPI_Comm, int, void*, void*);
108 #endif // HAVE_MPI
109 
111  static void freeDefaultComm () {
112  if (comm_ != NULL) {
113  delete comm_;
114  comm_ = NULL;
115  }
116  }
117 
119  static void freeDefaultSerialComm () {
120  if (defaultSerialComm_ != NULL) {
121  delete defaultSerialComm_;
122  defaultSerialComm_ = NULL;
123  }
124  }
125 };
126 
127 #ifdef HAVE_MPI
128 namespace Details {
129 
130 template<class OrdinalType>
131 int
132 mpiFreeDefaultComm (MPI_Comm, int, void*, void*)
133 {
134  try {
136  } catch (...) {
137  // Destructors must not throw exceptions, so we must accept the
138  // possible memory leak and move on.
139  std::cerr << "Teuchos::DefaultComm: Failed to free default Comm! We can't "
140  "throw an exception here because this is a singleton destructor that "
141  "should only be called at MPI_Finalize or (if not building with MPI) at "
142  "exit from main()." << std::endl;
143  // FIXME (mfh 16 Nov 2014) There might be some way to create a
144  // custom return code with MPI error reporting. For now, we just
145  // pick some error code not equal to MPI_SUCCESS. It could
146  // perhaps overlap with some existing error code.
147  return (MPI_SUCCESS == 0) ? -1 : 0;
148  }
149  return MPI_SUCCESS;
150 }
151 
152 template<class OrdinalType>
153 int
154 mpiFreeDefaultSerialComm (MPI_Comm, int, void*, void*)
155 {
156  try {
158  } catch (...) {
159  // Destructors must not throw exceptions, so we must accept the
160  // possible memory leak and move on.
161  std::cerr << "Teuchos::DefaultComm: Failed to free default serial Comm! "
162  "We can't throw an exception here because this is a singleton destructor "
163  "that should only be called at MPI_Finalize or (if not building with MPI)"
164  " at exit from main()." << std::endl;
165  // FIXME (mfh 16 Nov 2014) There might be some way to create a
166  // custom return code with MPI error reporting. For now, we just
167  // pick some error code not equal to MPI_SUCCESS. It could
168  // perhaps overlap with some existing error code.
169  return (MPI_SUCCESS == 0) ? -1 : 0;
170  }
171  return MPI_SUCCESS;
172 }
173 
174 } // namespace Details
175 #endif // HAVE_MPI
176 
177 
178 template<typename OrdinalType>
181 {
182  if (comm_ == NULL) {
183 #ifdef HAVE_MPI
184 # if MPI_VERSION >= 2
185 
186  comm_ = new MpiComm<OrdinalType> (MPI_COMM_WORLD);
187 
188  // We want comm_ to be deallocated when MPI_Finalize is called.
189  // The standard idiom for this (look in the MPI standard) is to
190  // register an attribute ((key,value) pair) with MPI_COMM_SELF,
191  // with a custom "destructor" to be called at MPI_Finalize.
192 
193  // 'key' is an output argument of MPI_Comm_create_keyval.
194  int key = MPI_KEYVAL_INVALID;
195  int err =
196  MPI_Comm_create_keyval (MPI_COMM_NULL_COPY_FN,
197  Details::mpiFreeDefaultComm<OrdinalType>,
198  &key,
199  NULL); // no extra state
200  if (err != MPI_SUCCESS) {
201  if (comm_ != NULL) { // clean up if MPI call fails
202  delete comm_;
203  comm_ = NULL;
204  }
205  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
206  "Teuchos::DefaultComm::getComm: MPI_Comm_create_keyval failed!");
207  }
208  int val = key; // doesn't matter
209 
210  // Attach the attribute to MPI_COMM_SELF.
211  err = MPI_Comm_set_attr (MPI_COMM_SELF, key, &val);
212  if (err != MPI_SUCCESS) {
213  // MPI (versions up to and including 3.0) doesn't promise
214  // correct behavior after any function returns something other
215  // than MPI_SUCCESS. Thus, it's not required to try to free the
216  // new key via MPI_Comm_free_keyval. Furthermore, if something
217  // went wrong with MPI_Comm_set_attr, it's likely that the
218  // attribute mechanism is broken. Thus, it would be unwise to
219  // call MPI_Comm_free_keyval. However, we can still clean up
220  // other data.
221  if (comm_ != NULL) { // clean up if MPI call fails
222  delete comm_;
223  comm_ = NULL;
224  }
225  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
226  "Teuchos::DefaultComm::getComm: MPI_Comm_set_attr failed!");
227  }
228 
229  // It looks weird to "free" the key right away. However, this
230  // does not actually cause the "destructor" to be called. It only
231  // gets called at MPI_FINALIZE. See MPI 3.0 standard, Section
232  // 6.7.2, MPI_COMM_FREE_KEYVAL:
233  //
234  // "Note that it is not erroneous to free an attribute key that is
235  // in use, because the actual free does not transpire until after
236  // all references (in other communicators on the process) to the
237  // key have been freed. These references need to be explicitly
238  // freed by the program, either via calls to MPI_COMM_DELETE_ATTR
239  // that free one attribute instance, or by calls to MPI_COMM_FREE
240  // that free all attribute instances associated with the freed
241  // communicator."
242  //
243  // We rely here on the latter mechanism. MPI_FINALIZE calls
244  // MPI_COMM_FREE on MPI_COMM_SELF, so we do not need to call it
245  // explicitly.
246  //
247  // It's not clear what to do if the MPI_* calls above succeeded,
248  // but this call fails (i.e., returns != MPI_SUCCESS). We could
249  // throw; this would make sense to do, because MPI (versions up to
250  // and including 3.0) doesn't promise correct behavior after any
251  // MPI function returns something other than MPI_SUCCESS. We
252  // could also be optimistic and just ignore the return value,
253  // hoping that if the above calls succeeded, then the communicator
254  // will get freed at MPI_FINALIZE, even though the unfreed key may
255  // leak memory (see Bug 6338). I've chosen the latter.
256  (void) MPI_Comm_free_keyval (&key);
257 
258 # else // MPI_VERSION < 2
259 # error "Sorry, you need an MPI implementation that supports at least MPI 2.0 in order to build this code. MPI 2.0 came out in 1997. I wrote this comment in 2017. If you really _really_ want MPI 1.x support, please file a GitHub issue for this feature request at github.com/trilinos/trilinos/issues with an expression of its priority and we will get to it as soon as we can."
260 # endif // MPI_VERSION >= 2
261 
262 #else // NOT HAVE_MPI
263  comm_ = new SerialComm<OrdinalType> ();
264  // We want comm_ to be deallocated when main exits, so register
265  // its deallocation function as an atexit handler.
266  //
267  // The POSIX standard allows atexit to fail, in particular if it
268  // lacks space for registering more functions. "[T]he application
269  // should call sysconf() to obtain the value of {ATEXIT_MAX}, the
270  // [maximum] number of functions that can be registered. There is
271  // no way for an application to tell how many functions have
272  // already been registered with atexit()."
273  //
274  // We don't do this here. Instead, we just check atexit's return
275  // code. If it fails, we throw.
276  int err = atexit (freeDefaultComm);
277  if (err != 0) {
278  if (comm_ != NULL) { // clean up if atexit fails
279  delete comm_;
280  comm_ = NULL;
281  }
282  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
283  "Teuchos::DefaultComm::getComm: atexit failed!");
284  }
285 #endif // HAVE_MPI
286  }
287 
289  (comm_ == NULL, std::logic_error, "Teuchos::DefaultComm::getComm: "
290  "comm_ == NULL before return. This should never happen. "
291  "Please report this bug to the Teuchos developers.");
292 
293  // Return a nonowning RCP, because we need to ensure that
294  // destruction happens at MPI_Finalize (or at exit of main(), if not
295  // building with MPI).
296  return rcp (comm_, false);
297 }
298 
299 template<typename OrdinalType>
303 {
304  if (! comm.is_null ()) {
305  return comm;
306  } else {
307  if (defaultSerialComm_ == NULL) {
308 #ifdef HAVE_MPI
309 # if MPI_VERSION >= 2
310  //defaultSerialComm_ = new MpiComm<OrdinalType> (MPI_COMM_SELF);
311  defaultSerialComm_ = new SerialComm<OrdinalType> ();
312 
313  // Register an MPI_Finalize hook to free defaultSerialComm_.
314  // (See getComm implementation above in this file for details.)
315 
316  int key = MPI_KEYVAL_INVALID;
317  int err =
318  MPI_Comm_create_keyval (MPI_COMM_NULL_COPY_FN,
319  Details::mpiFreeDefaultSerialComm<OrdinalType>,
320  &key,
321  NULL); // no extra state
322  if (err != MPI_SUCCESS) {
323  if (defaultSerialComm_ != NULL) { // clean up if MPI call fails
324  delete defaultSerialComm_;
325  defaultSerialComm_ = NULL;
326  }
328  true, std::runtime_error, "Teuchos::DefaultComm::getDefaultSerialComm"
329  ": MPI_Comm_create_keyval failed!");
330  }
331  int val = key; // doesn't matter
332 
333  // Attach the attribute to MPI_COMM_SELF.
334  err = MPI_Comm_set_attr (MPI_COMM_SELF, key, &val);
335  if (err != MPI_SUCCESS) {
336  // See comments in getComm implementation above to see why we
337  // don't call MPI_Comm_free_keyval here.
338  if (defaultSerialComm_ != NULL) { // clean up if MPI call fails
339  delete defaultSerialComm_;
340  defaultSerialComm_ = NULL;
341  }
343  true, std::runtime_error, "Teuchos::DefaultComm::getDefaultSerialComm"
344  ": MPI_Comm_set_attr failed!");
345  }
346 
347  // See comments in getComm implementation above to see why we
348  // _do_ call MPI_Comm_free_keyval here, and why we don't check
349  // the return code.
350  (void) MPI_Comm_free_keyval (&key);
351 
352 # else // MPI_VERSION < 2
353 # error "Sorry, you need an MPI implementation that supports at least MPI 2.0 in order to build this code. MPI 2.0 came out in 1997. I wrote this comment in 2017. If you really _really_ want MPI 1.x support, please file a GitHub issue for this feature request at github.com/trilinos/trilinos/issues with an expression of its priority and we will get to it as soon as we can."
354 # endif // MPI_VERSION >= 2
355 
356 #else // NOT HAVE_MPI
357  defaultSerialComm_ = new SerialComm<OrdinalType> ();
358  // We want defaultSerialComm_ to be deallocated when main exits,
359  // so register its deallocation function as an atexit handler.
360  //
361  // The POSIX standard allows atexit to fail, in particular if it
362  // lacks space for registering more functions. "[T]he
363  // application should call sysconf() to obtain the value of
364  // {ATEXIT_MAX}, the [maximum] number of functions that can be
365  // registered. There is no way for an application to tell how
366  // many functions have already been registered with atexit()."
367  //
368  // We don't do this here. Instead, we just check atexit's
369  // return code. If it fails, we throw.
370  int err = atexit (freeDefaultComm);
371  if (err != 0) {
372  if (defaultSerialComm_ != NULL) { // clean up if atexit fails
373  delete defaultSerialComm_;
374  defaultSerialComm_ = NULL;
375  }
376  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
377  "Teuchos::DefaultComm::getDefaultSerialComm: atexit failed!");
378  }
379 #endif // HAVE_MPI
380  }
381 
383  (defaultSerialComm_ == NULL, std::logic_error, "Teuchos::DefaultComm::"
384  "getDefaultSerialComm: defaultSerialComm_ == NULL before return. This sh"
385  "ould never happen. Please report this bug to the Teuchos developers.");
386 
387  // Return a nonowning RCP, because we need to ensure that
388  // destruction happens at MPI_Finalize (or at exit of main(), if not
389  // building with MPI).
390  return rcp (defaultSerialComm_, false);
391  }
392 }
393 
394 template<typename OrdinalType>
397 
398 template<typename OrdinalType>
401 
402 } // namespace Teuchos
403 
404 #endif // TEUCHOS_DEFAULT_COMM_HPP
static Teuchos::RCP< const Comm< OrdinalType > > getDefaultSerialComm(const Teuchos::RCP< const Comm< OrdinalType > > &comm)
Return a serial Comm if the input Comm is null.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Concrete serial communicator subclass.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
Implementation of Teuchos wrappers for MPI.
Return a default global communicator appropriate for the build.
Abstract interface for distributed-memory communication.
Smart reference counting pointer class for automatic garbage collection.
Reference-counted pointer class and non-member templated function implementations.