Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_DefaultMpiComm.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef TEUCHOS_MPI_COMM_HPP
43 #define TEUCHOS_MPI_COMM_HPP
44 
49 
50 #include <Teuchos_ConfigDefs.hpp>
51 
52 // If MPI is not enabled, disable the contents of this file.
53 #ifdef HAVE_TEUCHOS_MPI
54 
55 #include "Teuchos_Comm.hpp"
56 #include "Teuchos_CommUtilities.hpp"
58 #include "Teuchos_OpaqueWrapper.hpp"
60 #include "Teuchos_SerializationTraitsHelpers.hpp"
61 #include "Teuchos_Workspace.hpp"
63 #include "Teuchos_as.hpp"
64 #include "Teuchos_Assert.hpp"
65 #include <mpi.h>
66 #include <iterator>
67 
68 // This must be defined globally for the whole program!
69 //#define TEUCHOS_MPI_COMM_DUMP
70 
71 #ifdef TEUCHOS_MPI_COMM_DUMP
72 # include "Teuchos_VerboseObject.hpp"
73 #endif
74 
75 namespace Teuchos {
76 
78 TEUCHOSCOMM_LIB_DLL_EXPORT std::string
79 mpiErrorCodeToString (const int err);
80 
81 namespace details {
95  TEUCHOSCOMM_LIB_DLL_EXPORT void safeCommFree (MPI_Comm* comm);
96 
101  TEUCHOSCOMM_LIB_DLL_EXPORT int setCommErrhandler (MPI_Comm comm, MPI_Errhandler handler);
102 
103 } // namespace details
104 
105 #ifdef TEUCHOS_MPI_COMM_DUMP
106 template<typename Ordinal, typename T>
107 void dumpBuffer(
108  const std::string &funcName, const std::string &buffName
109  ,const Ordinal bytes, const T buff[]
110  )
111 {
114  Teuchos::OSTab tab(out);
115  *out
116  << "\n" << funcName << "::" << buffName << ":\n";
117  tab.incrTab();
118  for( Ordinal i = 0; i < bytes; ++i ) {
119  *out << buffName << "[" << i << "] = '" << buff[i] << "'\n";
120  }
121  *out << "\n";
122 }
123 #endif // TEUCHOS_MPI_COMM_DUMP
124 
136 template<class OrdinalType>
137 class MpiCommStatus : public CommStatus<OrdinalType> {
138 public:
139  MpiCommStatus (MPI_Status status) : status_ (status) {}
140 
142  virtual ~MpiCommStatus() {}
143 
145  OrdinalType getSourceRank () { return status_.MPI_SOURCE; }
146 
148  OrdinalType getTag () { return status_.MPI_TAG; }
149 
151  OrdinalType getError () { return status_.MPI_ERROR; }
152 
153 private:
155  MpiCommStatus ();
156 
158  MPI_Status status_;
159 };
160 
164 template<class OrdinalType>
165 inline RCP<MpiCommStatus<OrdinalType> >
166 mpiCommStatus (MPI_Status rawMpiStatus)
167 {
168  return rcp (new MpiCommStatus<OrdinalType> (rawMpiStatus));
169 }
170 
186 template<class OrdinalType>
187 class MpiCommRequestBase : public CommRequest<OrdinalType> {
188 public:
190  MpiCommRequestBase () :
191  rawMpiRequest_ (MPI_REQUEST_NULL)
192  {}
193 
195  MpiCommRequestBase (MPI_Request rawMpiRequest) :
196  rawMpiRequest_ (rawMpiRequest)
197  {}
198 
206  MPI_Request releaseRawMpiRequest()
207  {
208  MPI_Request tmp_rawMpiRequest = rawMpiRequest_;
209  rawMpiRequest_ = MPI_REQUEST_NULL;
210  return tmp_rawMpiRequest;
211  }
212 
214  bool isNull() const {
215  return rawMpiRequest_ == MPI_REQUEST_NULL;
216  }
217 
218  bool isReady() {
219  MPI_Status rawMpiStatus;
220  int flag = 0;
221 
222  MPI_Test(&rawMpiRequest_, &flag, &rawMpiStatus);
223 
224  return (flag != 0);
225  }
226 
232  RCP<CommStatus<OrdinalType> > wait () {
233  MPI_Status rawMpiStatus;
234  // Whether this function satisfies the strong exception guarantee
235  // depends on whether MPI_Wait modifies its input request on error.
236  const int err = MPI_Wait (&rawMpiRequest_, &rawMpiStatus);
238  err != MPI_SUCCESS, std::runtime_error,
239  "Teuchos: MPI_Wait() failed with error \""
240  << mpiErrorCodeToString (err));
241  // MPI_Wait sets the MPI_Request to MPI_REQUEST_NULL on success.
242  return mpiCommStatus<OrdinalType> (rawMpiStatus);
243  }
244 
249  RCP<CommStatus<OrdinalType> > cancel () {
250  if (rawMpiRequest_ == MPI_REQUEST_NULL) {
251  return null;
252  }
253  else {
254  int err = MPI_Cancel (&rawMpiRequest_);
256  err != MPI_SUCCESS, std::runtime_error,
257  "Teuchos: MPI_Cancel failed with the following error: "
258  << mpiErrorCodeToString (err));
259 
260  // Wait on the request. If successful, MPI_Wait will set the
261  // MPI_Request to MPI_REQUEST_NULL. The returned status may
262  // still be useful; for example, one may call MPI_Test_cancelled
263  // to test an MPI_Status from a nonblocking send.
264  MPI_Status status;
265  err = MPI_Wait (&rawMpiRequest_, &status);
266  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
267  "Teuchos::MpiCommStatus::cancel: MPI_Wait failed with the following "
268  "error: " << mpiErrorCodeToString (err));
269  return mpiCommStatus<OrdinalType> (status);
270  }
271  }
272 
274  virtual ~MpiCommRequestBase () {
275  if (rawMpiRequest_ != MPI_REQUEST_NULL) {
276  // We're in a destructor, so don't throw errors. However, if
277  // MPI_Cancel fails, it's probably a bad idea to call MPI_Wait.
278  const int err = MPI_Cancel (&rawMpiRequest_);
279  if (err == MPI_SUCCESS) {
280  // The MPI_Cancel succeeded. Now wait on the request. Ignore
281  // any reported error, since we can't do anything about those
282  // in the destructor (other than kill the program). If
283  // successful, MPI_Wait will set the MPI_Request to
284  // MPI_REQUEST_NULL. We ignore the returned MPI_Status, since
285  // if the user let the request fall out of scope, she must not
286  // care about the status.
287  //
288  // mfh 21 Oct 2012: The MPI standard requires completing a
289  // canceled request by calling a function like MPI_Wait,
290  // MPI_Test, or MPI_Request_free. MPI_Wait on a canceled
291  // request behaves like a local operation (it does not
292  // communicate or block waiting for communication). One could
293  // also call MPI_Request_free instead of MPI_Wait, but
294  // MPI_Request_free is intended more for persistent requests
295  // (created with functions like MPI_Recv_init).
296  (void) MPI_Wait (&rawMpiRequest_, MPI_STATUS_IGNORE);
297  }
298  }
299  }
300 
301 private:
303  MPI_Request rawMpiRequest_;
304 };
305 
321 template<class OrdinalType>
322 class MpiCommRequest : public MpiCommRequestBase<OrdinalType> {
323 public:
325  MpiCommRequest () :
326  MpiCommRequestBase<OrdinalType> (MPI_REQUEST_NULL),
327  numBytes_ (0)
328  {}
329 
331  MpiCommRequest (MPI_Request rawMpiRequest,
332  const ArrayView<char>::size_type numBytesInMessage) :
333  MpiCommRequestBase<OrdinalType> (rawMpiRequest),
334  numBytes_ (numBytesInMessage)
335  {}
336 
342  ArrayView<char>::size_type numBytes () const {
343  return numBytes_;
344  }
345 
347  virtual ~MpiCommRequest () {}
348 
349 private:
351  ArrayView<char>::size_type numBytes_;
352 };
353 
362 template<class OrdinalType>
363 inline RCP<MpiCommRequest<OrdinalType> >
364 mpiCommRequest (MPI_Request rawMpiRequest,
365  const ArrayView<char>::size_type numBytes)
366 {
367  return rcp (new MpiCommRequest<OrdinalType> (rawMpiRequest, numBytes));
368 }
369 
385 template<typename Ordinal>
386 class MpiComm : public Comm<Ordinal> {
387 public:
389 
390 
411  explicit MpiComm (MPI_Comm rawMpiComm);
412 
427  MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm);
428 
446  MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
447  const int defaultTag);
448 
465  MpiComm (const MpiComm<Ordinal>& other);
466 
468  RCP<const OpaqueWrapper<MPI_Comm> > getRawMpiComm () const {
469  return rawMpiComm_;
470  }
471 
536  void setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler);
537 
539 
541 
543  virtual int getRank() const;
544 
546  virtual int getSize() const;
547 
549  virtual void barrier() const;
550 
552  virtual void broadcast(
553  const int rootRank, const Ordinal bytes, char buffer[]
554  ) const;
555 
557  virtual void
558  gather (const Ordinal sendBytes, const char sendBuffer[],
559  const Ordinal recvBytes, char recvBuffer[],
560  const int root) const;
562  virtual void gatherAll(
563  const Ordinal sendBytes, const char sendBuffer[]
564  ,const Ordinal recvBytes, char recvBuffer[]
565  ) const;
567  virtual void reduceAll(
568  const ValueTypeReductionOp<Ordinal,char> &reductOp
569  ,const Ordinal bytes, const char sendBuffer[], char globalReducts[]
570  ) const;
572  virtual void scan(
573  const ValueTypeReductionOp<Ordinal,char> &reductOp
574  ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
575  ) const;
577  virtual void send(
578  const Ordinal bytes, const char sendBuffer[], const int destRank
579  ) const;
581  virtual void
582  send (const Ordinal bytes,
583  const char sendBuffer[],
584  const int destRank,
585  const int tag) const;
587  virtual void ssend(
588  const Ordinal bytes, const char sendBuffer[], const int destRank
589  ) const;
591  virtual void
592  ssend (const Ordinal bytes,
593  const char sendBuffer[],
594  const int destRank,
595  const int tag) const;
597  virtual int receive(
598  const int sourceRank, const Ordinal bytes, char recvBuffer[]
599  ) const;
601  virtual void readySend(
602  const ArrayView<const char> &sendBuffer,
603  const int destRank
604  ) const;
606  virtual void
607  readySend (const Ordinal bytes,
608  const char sendBuffer[],
609  const int destRank,
610  const int tag) const;
612  virtual RCP<CommRequest<Ordinal> > isend(
613  const ArrayView<const char> &sendBuffer,
614  const int destRank
615  ) const;
617  virtual RCP<CommRequest<Ordinal> >
618  isend (const ArrayView<const char> &sendBuffer,
619  const int destRank,
620  const int tag) const;
622  virtual RCP<CommRequest<Ordinal> > ireceive(
623  const ArrayView<char> &Buffer,
624  const int sourceRank
625  ) const;
627  virtual RCP<CommRequest<Ordinal> >
628  ireceive (const ArrayView<char> &Buffer,
629  const int sourceRank,
630  const int tag) const;
632  virtual void waitAll(
633  const ArrayView<RCP<CommRequest<Ordinal> > > &requests
634  ) const;
636  virtual void
637  waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
638  const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const;
640  virtual RCP<CommStatus<Ordinal> >
641  wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const;
643  virtual RCP< Comm<Ordinal> > duplicate() const;
645  virtual RCP< Comm<Ordinal> > split(const int color, const int key) const;
647  virtual RCP< Comm<Ordinal> > createSubcommunicator(
648  const ArrayView<const int>& ranks) const;
649 
651 
653 
655  std::string description() const;
656 
658 
659  // These should be private but the PGI compiler requires them be public
660 
661  static int const minTag_ = 26000; // These came from Teuchos::MpiComm???
662  static int const maxTag_ = 26099; // ""
663 
669  int getTag () const { return tag_; }
670 
671 private:
672 
676  void setupMembersFromComm();
677  static int tagCounter_;
678 
686  RCP<const OpaqueWrapper<MPI_Comm> > rawMpiComm_;
687 
689  int rank_;
690 
692  int size_;
693 
701  int tag_;
702 
704  RCP<const OpaqueWrapper<MPI_Errhandler> > customErrorHandler_;
705 
706  void assertRank(const int rank, const std::string &rankName) const;
707 
708  // Not defined and not to be called!
709  MpiComm();
710 
711 #ifdef TEUCHOS_MPI_COMM_DUMP
712 public:
713  static bool show_dump;
714 #endif // TEUCHOS_MPI_COMM_DUMP
715 
716 };
717 
718 
732 template<typename Ordinal>
733 RCP<MpiComm<Ordinal> >
734 createMpiComm(
735  const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
736  );
737 
738 
752 template<typename Ordinal>
753 RCP<MpiComm<Ordinal> >
754 createMpiComm(
755  const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm,
756  const int defaultTag
757  );
758 
759 
787 template<typename Ordinal>
788 MPI_Comm
789 getRawMpiComm(const Comm<Ordinal> &comm);
790 
791 
792 // ////////////////////////
793 // Implementations
794 
795 
796 // Static members
797 
798 
799 template<typename Ordinal>
800 int MpiComm<Ordinal>::tagCounter_ = MpiComm<Ordinal>::minTag_;
801 
802 
803 // Constructors
804 
805 
806 template<typename Ordinal>
807 MpiComm<Ordinal>::
808 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm)
809 {
811  rawMpiComm.get () == NULL, std::invalid_argument,
812  "Teuchos::MpiComm constructor: The input RCP is null.");
814  *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
815  "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
816 
817  rawMpiComm_ = rawMpiComm;
818 
819  // mfh 09 Jul 2013: Please resist the temptation to modify the given
820  // MPI communicator's error handler here. See Bug 5943. Note that
821  // an MPI communicator's default error handler is
822  // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
823  // returning an error code from the MPI function). Users who want
824  // MPI functions instead to return an error code if they encounter
825  // an error, should set the error handler to MPI_ERRORS_RETURN. DO
826  // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should
827  // always check the error code returned by an MPI function,
828  // regardless of the error handler. Users who want to set the error
829  // handler on an MpiComm may call its setErrorHandler method.
830 
831  setupMembersFromComm ();
832 }
833 
834 
835 template<typename Ordinal>
836 MpiComm<Ordinal>::
837 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
838  const int defaultTag)
839 {
841  rawMpiComm.get () == NULL, std::invalid_argument,
842  "Teuchos::MpiComm constructor: The input RCP is null.");
844  *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
845  "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
846 
847  rawMpiComm_ = rawMpiComm;
848  // Set size_ (the number of processes in the communicator).
849  int err = MPI_Comm_size (*rawMpiComm_, &size_);
850  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
851  "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
852  "error \"" << mpiErrorCodeToString (err) << "\".");
853  // Set rank_ (the calling process' rank).
854  err = MPI_Comm_rank (*rawMpiComm_, &rank_);
855  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
856  "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
857  "error \"" << mpiErrorCodeToString (err) << "\".");
858  tag_ = defaultTag; // set the default message tag
859 }
860 
861 
862 template<typename Ordinal>
863 MpiComm<Ordinal>::MpiComm (MPI_Comm rawMpiComm)
864 {
865  TEUCHOS_TEST_FOR_EXCEPTION(rawMpiComm == MPI_COMM_NULL,
866  std::invalid_argument, "Teuchos::MpiComm constructor: The given MPI_Comm "
867  "is MPI_COMM_NULL.");
868  // We don't supply a "free" function here, since this version of the
869  // constructor makes the caller responsible for freeing rawMpiComm
870  // after use if necessary.
871  rawMpiComm_ = opaqueWrapper<MPI_Comm> (rawMpiComm);
872 
873  // mfh 09 Jul 2013: Please resist the temptation to modify the given
874  // MPI communicator's error handler here. See Bug 5943. Note that
875  // an MPI communicator's default error handler is
876  // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
877  // returning an error code from the MPI function). Users who want
878  // MPI functions instead to return an error code if they encounter
879  // an error, should set the error handler to MPI_ERRORS_RETURN. DO
880  // NOT SET THE ERROR HANDLER HERE!!! Teuchos' MPI wrappers should
881  // always check the error code returned by an MPI function,
882  // regardless of the error handler. Users who want to set the error
883  // handler on an MpiComm may call its setErrorHandler method.
884 
885  setupMembersFromComm ();
886 }
887 
888 
889 template<typename Ordinal>
890 MpiComm<Ordinal>::MpiComm (const MpiComm<Ordinal>& other) :
891  rawMpiComm_ (opaqueWrapper<MPI_Comm> (MPI_COMM_NULL)) // <- This will be set below
892 {
893  // These are logic errors, since they violate MpiComm's invariants.
894  RCP<const OpaqueWrapper<MPI_Comm> > origCommPtr = other.getRawMpiComm ();
895  TEUCHOS_TEST_FOR_EXCEPTION(origCommPtr == null, std::logic_error,
896  "Teuchos::MpiComm copy constructor: "
897  "The input's getRawMpiComm() method returns null.");
898  MPI_Comm origComm = *origCommPtr;
899  TEUCHOS_TEST_FOR_EXCEPTION(origComm == MPI_COMM_NULL, std::logic_error,
900  "Teuchos::MpiComm copy constructor: "
901  "The input's raw MPI_Comm is MPI_COMM_NULL.");
902 
903  // mfh 19 Oct 2012: Don't change the behavior of MpiComm's copy
904  // constructor for now. Later, we'll switch to the version that
905  // calls MPI_Comm_dup. For now, we just copy other's handle over.
906  // Note that the new MpiComm's tag is still different than the input
907  // MpiComm's tag. See Bug 5740.
908  if (true) {
909  rawMpiComm_ = origCommPtr;
910  }
911  else { // false (not run)
912  MPI_Comm newComm;
913  const int err = MPI_Comm_dup (origComm, &newComm);
914  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
915  "Teuchos::MpiComm copy constructor: MPI_Comm_dup failed with "
916  "the following error: " << mpiErrorCodeToString (err));
917  // No side effects until after everything has succeeded.
918  rawMpiComm_ = opaqueWrapper (newComm, details::safeCommFree);
919  }
920 
921  setupMembersFromComm ();
922 }
923 
924 
925 template<typename Ordinal>
926 void MpiComm<Ordinal>::setupMembersFromComm ()
927 {
928  int err = MPI_Comm_size (*rawMpiComm_, &size_);
929  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
930  "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
931  "error \"" << mpiErrorCodeToString (err) << "\".");
932  err = MPI_Comm_rank (*rawMpiComm_, &rank_);
933  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
934  "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
935  "error \"" << mpiErrorCodeToString (err) << "\".");
936 
937  // Set the default tag to make unique across all communicators
938  if (tagCounter_ > maxTag_) {
939  tagCounter_ = minTag_;
940  }
941  tag_ = tagCounter_++;
942  // Ensure that the same tag is used on all processes.
943  //
944  // FIXME (mfh 09 Jul 2013) This would not be necessary if MpiComm
945  // were just to call MPI_Comm_dup (as every library should) when
946  // given its communicator. Of course, MPI_Comm_dup may also be
947  // implemented as a collective, and may even be more expensive than
948  // a broadcast. If we do decide to use MPI_Comm_dup, we can get rid
949  // of the broadcast below, and also get rid of tag_, tagCounter_,
950  // minTag_, and maxTag_.
951  MPI_Bcast (&tag_, 1, MPI_INT, 0, *rawMpiComm_);
952 }
953 
954 
955 template<typename Ordinal>
956 void
957 MpiComm<Ordinal>::
958 setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler)
959 {
960  if (! is_null (errHandler)) {
961  const int err = details::setCommErrhandler (*getRawMpiComm (), *errHandler);
962  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
963  "Teuchos::MpiComm: Setting the MPI_Comm's error handler failed with "
964  "error \"" << mpiErrorCodeToString (err) << "\".");
965  }
966  // Wait to set this until the end, in case setting the error handler
967  // doesn't succeed.
968  customErrorHandler_ = errHandler;
969 }
970 
971 //
972 // Overridden from Comm
973 //
974 
975 template<typename Ordinal>
976 int MpiComm<Ordinal>::getRank() const
977 {
978  return rank_;
979 }
980 
981 
982 template<typename Ordinal>
983 int MpiComm<Ordinal>::getSize() const
984 {
985  return size_;
986 }
987 
988 
989 template<typename Ordinal>
990 void MpiComm<Ordinal>::barrier() const
991 {
992  TEUCHOS_COMM_TIME_MONITOR(
993  "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::barrier()"
994  );
995  const int err = MPI_Barrier (*rawMpiComm_);
996  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
997  "Teuchos::MpiComm::barrier: MPI_Barrier failed with error \""
998  << mpiErrorCodeToString (err) << "\".");
999 }
1000 
1001 
1002 template<typename Ordinal>
1003 void MpiComm<Ordinal>::broadcast(
1004  const int rootRank, const Ordinal bytes, char buffer[]
1005  ) const
1006 {
1007  TEUCHOS_COMM_TIME_MONITOR(
1008  "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::broadcast(...)"
1009  );
1010  const int err = MPI_Bcast (buffer, bytes, MPI_CHAR, rootRank, *rawMpiComm_);
1011  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1012  "Teuchos::MpiComm::broadcast: MPI_Bcast failed with error \""
1013  << mpiErrorCodeToString (err) << "\".");
1014 }
1015 
1016 
1017 template<typename Ordinal>
1018 void MpiComm<Ordinal>::gatherAll(
1019  const Ordinal sendBytes, const char sendBuffer[],
1020  const Ordinal recvBytes, char recvBuffer[]
1021  ) const
1022 {
1023  TEUCHOS_COMM_TIME_MONITOR(
1024  "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gatherAll(...)"
1025  );
1026  TEUCHOS_ASSERT_EQUALITY((sendBytes*size_), recvBytes );
1027  const int err =
1028  MPI_Allgather (const_cast<char *>(sendBuffer), sendBytes, MPI_CHAR,
1029  recvBuffer, sendBytes, MPI_CHAR, *rawMpiComm_);
1030  // NOTE: 'sendBytes' is being sent above for the MPI arg recvcount (which is
1031  // very confusing in the MPI documentation) for MPI_Allgether(...).
1032 
1033  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1034  "Teuchos::MpiComm::gatherAll: MPI_Allgather failed with error \""
1035  << mpiErrorCodeToString (err) << "\".");
1036 }
1037 
1038 
1039 template<typename Ordinal>
1040 void
1041 MpiComm<Ordinal>::gather (const Ordinal sendBytes,
1042  const char sendBuffer[],
1043  const Ordinal recvBytes,
1044  char recvBuffer[],
1045  const int root) const
1046 {
1047  (void) recvBytes; // silence compile warning for "unused parameter"
1048 
1049  TEUCHOS_COMM_TIME_MONITOR(
1050  "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gather(...)"
1051  );
1052  const int err =
1053  MPI_Gather (const_cast<char *> (sendBuffer), sendBytes, MPI_CHAR,
1054  recvBuffer, sendBytes, MPI_CHAR, root, *rawMpiComm_);
1055  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1056  "Teuchos::MpiComm::gather: MPI_Gather failed with error \""
1057  << mpiErrorCodeToString (err) << "\".");
1058 }
1059 
1060 
1061 template<typename Ordinal>
1062 void
1063 MpiComm<Ordinal>::
1064 reduceAll (const ValueTypeReductionOp<Ordinal,char> &reductOp,
1065  const Ordinal bytes,
1066  const char sendBuffer[],
1067  char globalReducts[]) const
1068 {
1069  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::reduceAll(...)" );
1070  int err = MPI_SUCCESS;
1071 
1072  if (bytes == 0) return;
1073 
1074  Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1075  MPI_Op op = Details::setMpiReductionOp (opWrap);
1076 
1077  // FIXME (mfh 23 Nov 2014) Ross decided to mash every type into
1078  // char. This can cause correctness issues if we're actually doing
1079  // a reduction over, say, double. Thus, he creates a custom
1080  // MPI_Datatype here that represents a contiguous block of char, so
1081  // that MPI doesn't split up the reduction type and thus do the sum
1082  // wrong. It's a hack but it works.
1083 
1084  MPI_Datatype char_block;
1085  err = MPI_Type_contiguous (bytes, MPI_CHAR, &char_block);
1087  err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1088  "MPI_Type_contiguous failed with error \"" << mpiErrorCodeToString (err)
1089  << "\".");
1090  err = MPI_Type_commit (&char_block);
1092  err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1093  "MPI_Type_commit failed with error \"" << mpiErrorCodeToString (err)
1094  << "\".");
1095 
1096  if (sendBuffer == globalReducts) {
1097  // NOTE (mfh 31 May 2017) This is only safe if the communicator is
1098  // NOT an intercomm. The usual case is that communicators are
1099  // intracomms.
1100  err = MPI_Allreduce (MPI_IN_PLACE, globalReducts, 1,
1101  char_block, op, *rawMpiComm_);
1102  }
1103  else {
1104  err = MPI_Allreduce (const_cast<char*> (sendBuffer), globalReducts, 1,
1105  char_block, op, *rawMpiComm_);
1106  }
1107  if (err != MPI_SUCCESS) {
1108  // Don't throw until we release the type resources we allocated
1109  // above. If freeing fails for some reason, let the memory leak
1110  // go; we already have more serious problems if MPI_Allreduce
1111  // doesn't work.
1112  (void) MPI_Type_free (&char_block);
1114  true, std::runtime_error, "Teuchos::reduceAll (MPI, custom op): "
1115  "MPI_Allreduce failed with error \"" << mpiErrorCodeToString (err)
1116  << "\".");
1117  }
1118  err = MPI_Type_free (&char_block);
1120  err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1121  "MPI_Type_free failed with error \"" << mpiErrorCodeToString (err)
1122  << "\".");
1123 }
1124 
1125 
1126 template<typename Ordinal>
1127 void MpiComm<Ordinal>::scan(
1128  const ValueTypeReductionOp<Ordinal,char> &reductOp
1129  ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
1130  ) const
1131 {
1132  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::scan(...)" );
1133 
1134  Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1135  MPI_Op op = Details::setMpiReductionOp (opWrap);
1136  const int err =
1137  MPI_Scan (const_cast<char*> (sendBuffer), scanReducts, bytes, MPI_CHAR,
1138  op, *rawMpiComm_);
1139  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1140  "Teuchos::MpiComm::scan: MPI_Scan() failed with error \""
1141  << mpiErrorCodeToString (err) << "\".");
1142 }
1143 
1144 
1145 template<typename Ordinal>
1146 void
1147 MpiComm<Ordinal>::send (const Ordinal bytes,
1148  const char sendBuffer[],
1149  const int destRank) const
1150 {
1151  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1152 
1153 #ifdef TEUCHOS_MPI_COMM_DUMP
1154  if(show_dump) {
1155  dumpBuffer<Ordinal,char>(
1156  "Teuchos::MpiComm<Ordinal>::send(...)"
1157  ,"sendBuffer", bytes, sendBuffer
1158  );
1159  }
1160 #endif // TEUCHOS_MPI_COMM_DUMP
1161 
1162  const int err = MPI_Send (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1163  destRank, tag_, *rawMpiComm_);
1164  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1165  "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1166  << mpiErrorCodeToString (err) << "\".");
1167 }
1168 
1169 
1170 template<typename Ordinal>
1171 void
1172 MpiComm<Ordinal>::send (const Ordinal bytes,
1173  const char sendBuffer[],
1174  const int destRank,
1175  const int tag) const
1176 {
1177  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1178  const int err = MPI_Send (const_cast<char*> (sendBuffer), bytes, MPI_CHAR,
1179  destRank, tag, *rawMpiComm_);
1180  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1181  "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1182  << mpiErrorCodeToString (err) << "\".");
1183 }
1184 
1185 
1186 template<typename Ordinal>
1187 void
1188 MpiComm<Ordinal>::ssend (const Ordinal bytes,
1189  const char sendBuffer[],
1190  const int destRank) const
1191 {
1192  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1193 
1194 #ifdef TEUCHOS_MPI_COMM_DUMP
1195  if(show_dump) {
1196  dumpBuffer<Ordinal,char>(
1197  "Teuchos::MpiComm<Ordinal>::send(...)"
1198  ,"sendBuffer", bytes, sendBuffer
1199  );
1200  }
1201 #endif // TEUCHOS_MPI_COMM_DUMP
1202 
1203  const int err = MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1204  destRank, tag_, *rawMpiComm_);
1205  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1206  "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1207  << mpiErrorCodeToString (err) << "\".");
1208 }
1209 
1210 template<typename Ordinal>
1211 void
1212 MpiComm<Ordinal>::ssend (const Ordinal bytes,
1213  const char sendBuffer[],
1214  const int destRank,
1215  const int tag) const
1216 {
1217  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1218  const int err =
1219  MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1220  destRank, tag, *rawMpiComm_);
1221  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1222  "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1223  << mpiErrorCodeToString (err) << "\".");
1224 }
1225 
1226 template<typename Ordinal>
1227 void MpiComm<Ordinal>::readySend(
1228  const ArrayView<const char> &sendBuffer,
1229  const int destRank
1230  ) const
1231 {
1232  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1233 
1234 #ifdef TEUCHOS_MPI_COMM_DUMP
1235  if(show_dump) {
1236  dumpBuffer<Ordinal,char>(
1237  "Teuchos::MpiComm<Ordinal>::readySend(...)"
1238  ,"sendBuffer", bytes, sendBuffer
1239  );
1240  }
1241 #endif // TEUCHOS_MPI_COMM_DUMP
1242 
1243  const int err =
1244  MPI_Rsend (const_cast<char*>(sendBuffer.getRawPtr()), static_cast<int>(sendBuffer.size()),
1245  MPI_CHAR, destRank, tag_, *rawMpiComm_);
1246  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1247  "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1248  << mpiErrorCodeToString (err) << "\".");
1249 }
1250 
1251 
1252 template<typename Ordinal>
1253 void MpiComm<Ordinal>::
1254 readySend (const Ordinal bytes,
1255  const char sendBuffer[],
1256  const int destRank,
1257  const int tag) const
1258 {
1259  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1260  const int err =
1261  MPI_Rsend (const_cast<char*> (sendBuffer), bytes,
1262  MPI_CHAR, destRank, tag, *rawMpiComm_);
1263  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1264  "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1265  << mpiErrorCodeToString (err) << "\".");
1266 }
1267 
1268 
1269 template<typename Ordinal>
1270 int
1271 MpiComm<Ordinal>::receive (const int sourceRank,
1272  const Ordinal bytes,
1273  char recvBuffer[]) const
1274 {
1275  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::receive(...)" );
1276 
1277  // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1278  // will take an incoming message from any process, as long as the
1279  // tag matches.
1280  const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1281 
1282  MPI_Status status;
1283  const int err = MPI_Recv (recvBuffer, bytes, MPI_CHAR, theSrcRank, tag_,
1284  *rawMpiComm_, &status);
1285  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1286  "Teuchos::MpiComm::receive: MPI_Recv() failed with error \""
1287  << mpiErrorCodeToString (err) << "\".");
1288 
1289 #ifdef TEUCHOS_MPI_COMM_DUMP
1290  if (show_dump) {
1291  dumpBuffer<Ordinal,char> ("Teuchos::MpiComm<Ordinal>::receive(...)",
1292  "recvBuffer", bytes, recvBuffer);
1293  }
1294 #endif // TEUCHOS_MPI_COMM_DUMP
1295 
1296  // Returning the source rank is useful in the MPI_ANY_SOURCE case.
1297  return status.MPI_SOURCE;
1298 }
1299 
1300 
1301 template<typename Ordinal>
1302 RCP<CommRequest<Ordinal> >
1303 MpiComm<Ordinal>::isend (const ArrayView<const char> &sendBuffer,
1304  const int destRank) const
1305 {
1306  using Teuchos::as;
1307  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1308 
1309  MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1310  const int err =
1311  MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1312  as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1313  destRank, tag_, *rawMpiComm_, &rawMpiRequest);
1314  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1315  "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1316  << mpiErrorCodeToString (err) << "\".");
1317 
1318  return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1319 }
1320 
1321 
1322 template<typename Ordinal>
1323 RCP<CommRequest<Ordinal> >
1324 MpiComm<Ordinal>::
1325 isend (const ArrayView<const char> &sendBuffer,
1326  const int destRank,
1327  const int tag) const
1328 {
1329  using Teuchos::as;
1330  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1331 
1332  MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1333  const int err =
1334  MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1335  as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1336  destRank, tag, *rawMpiComm_, &rawMpiRequest);
1337  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1338  "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1339  << mpiErrorCodeToString (err) << "\".");
1340 
1341  return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1342 }
1343 
1344 
1345 template<typename Ordinal>
1346 RCP<CommRequest<Ordinal> >
1347 MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1348  const int sourceRank) const
1349 {
1350  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1351 
1352  // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1353  // will take an incoming message from any process, as long as the
1354  // tag matches.
1355  const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1356 
1357  MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1358  const int err =
1359  MPI_Irecv (const_cast<char*>(recvBuffer.getRawPtr()), recvBuffer.size(),
1360  MPI_CHAR, theSrcRank, tag_, *rawMpiComm_, &rawMpiRequest);
1361  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1362  "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1363  << mpiErrorCodeToString (err) << "\".");
1364 
1365  return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size());
1366 }
1367 
1368 template<typename Ordinal>
1369 RCP<CommRequest<Ordinal> >
1370 MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1371  const int sourceRank,
1372  const int tag) const
1373 {
1374  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1375 
1376  // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1377  // will take an incoming message from any process, as long as the
1378  // tag matches.
1379  const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1380 
1381  MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1382  const int err =
1383  MPI_Irecv (const_cast<char*> (recvBuffer.getRawPtr ()), recvBuffer.size (),
1384  MPI_CHAR, theSrcRank, tag, *rawMpiComm_, &rawMpiRequest);
1385  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1386  "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1387  << mpiErrorCodeToString (err) << "\".");
1388 
1389  return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size ());
1390 }
1391 
1392 namespace {
1393  // Called by the two-argument MpiComm::waitAll() variant.
1394  template<typename Ordinal>
1395  void
1396  waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1397  const ArrayView<MPI_Status>& rawMpiStatuses)
1398  {
1399  typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1400  const size_type count = requests.size();
1401  // waitAllImpl() is not meant to be called by users, so it's a bug
1402  // for the two views to have different lengths.
1403  TEUCHOS_TEST_FOR_EXCEPTION(rawMpiStatuses.size() != count,
1404  std::logic_error, "Teuchos::MpiComm's waitAllImpl: rawMpiStatus.size() = "
1405  << rawMpiStatuses.size() << " != requests.size() = " << requests.size()
1406  << ". Please report this bug to the Tpetra developers.");
1407  if (count == 0) {
1408  return; // No requests on which to wait
1409  }
1410 
1411  // MpiComm wraps MPI and can't expose any MPI structs or opaque
1412  // objects. Thus, we have to unpack requests into a separate array.
1413  // If that's too slow, then your code should just call into MPI
1414  // directly.
1415  //
1416  // Pull out the raw MPI requests from the wrapped requests.
1417  // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL, but
1418  // we keep track just to inform the user.
1419  bool someNullRequests = false;
1420  Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1421  for (int i = 0; i < count; ++i) {
1422  RCP<CommRequest<Ordinal> > request = requests[i];
1423  if (! is_null (request)) {
1424  RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1425  rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1426  // releaseRawMpiRequest() sets the MpiCommRequest's raw
1427  // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not
1428  // satisfy the strong exception guarantee. That's OK because
1429  // MPI_Waitall() doesn't promise that it satisfies the strong
1430  // exception guarantee, and we would rather conservatively
1431  // invalidate the handles than leave dangling requests around
1432  // and risk users trying to wait on the same request twice.
1433  rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest();
1434  }
1435  else { // Null requests map to MPI_REQUEST_NULL
1436  rawMpiRequests[i] = MPI_REQUEST_NULL;
1437  someNullRequests = true;
1438  }
1439  }
1440 
1441  // This is the part where we've finally peeled off the wrapper and
1442  // we can now interact with MPI directly.
1443  //
1444  // One option in the one-argument version of waitAll() is to ignore
1445  // the statuses completely. MPI lets you pass in the named constant
1446  // MPI_STATUSES_IGNORE for the MPI_Status array output argument in
1447  // MPI_Waitall(), which would tell MPI not to bother with the
1448  // statuses. However, we want the statuses because we can use them
1449  // for detailed error diagnostics in case something goes wrong.
1450  const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1451  rawMpiStatuses.getRawPtr());
1452 
1453  // In MPI_Waitall(), an error indicates that one or more requests
1454  // failed. In that case, there could be requests that completed
1455  // (their MPI_Status' error field is MPI_SUCCESS), and other
1456  // requests that have not completed yet but have not necessarily
1457  // failed (MPI_PENDING). We make no attempt here to wait on the
1458  // pending requests. It doesn't make sense for us to do so, because
1459  // in general Teuchos::Comm doesn't attempt to provide robust
1460  // recovery from failed messages.
1461  if (err != MPI_SUCCESS) {
1462  if (err == MPI_ERR_IN_STATUS) {
1463  //
1464  // When MPI_Waitall returns MPI_ERR_IN_STATUS (a standard error
1465  // class), it's telling us to check the error codes in the
1466  // returned statuses. In that case, we do so and generate a
1467  // detailed exception message.
1468  //
1469  // Figure out which of the requests failed.
1470  Array<std::pair<size_type, int> > errorLocationsAndCodes;
1471  for (size_type k = 0; k < rawMpiStatuses.size(); ++k) {
1472  const int curErr = rawMpiStatuses[k].MPI_ERROR;
1473  if (curErr != MPI_SUCCESS) {
1474  errorLocationsAndCodes.push_back (std::make_pair (k, curErr));
1475  }
1476  }
1477  const size_type numErrs = errorLocationsAndCodes.size();
1478  if (numErrs > 0) {
1479  // There was at least one error. Assemble a detailed
1480  // exception message reporting which requests failed,
1481  // their error codes, and their source
1482  std::ostringstream os;
1483  os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1484  << mpiErrorCodeToString (err) << "\". Of the " << count
1485  << " total request" << (count != 1 ? "s" : "") << ", " << numErrs
1486  << " failed. Here are the indices of the failed requests, and the "
1487  "error codes extracted from their returned MPI_Status objects:"
1488  << std::endl;
1489  for (size_type k = 0; k < numErrs; ++k) {
1490  const size_type errInd = errorLocationsAndCodes[k].first;
1491  os << "Request " << errInd << ": MPI_ERROR = "
1492  << mpiErrorCodeToString (rawMpiStatuses[errInd].MPI_ERROR)
1493  << std::endl;
1494  }
1495  if (someNullRequests) {
1496  os << " On input to MPI_Waitall, there was at least one MPI_"
1497  "Request that was MPI_REQUEST_NULL. MPI_Waitall should not "
1498  "normally fail in that case, but we thought we should let you know "
1499  "regardless.";
1500  }
1501  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1502  }
1503  // If there were no actual errors in the returned statuses,
1504  // well, then I guess everything is OK. Just keep going.
1505  }
1506  else {
1507  std::ostringstream os;
1508  os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1509  << mpiErrorCodeToString (err) << "\".";
1510  if (someNullRequests) {
1511  os << " On input to MPI_Waitall, there was at least one MPI_Request "
1512  "that was MPI_REQUEST_NULL. MPI_Waitall should not normally fail in "
1513  "that case, but we thought we should let you know regardless.";
1514  }
1515  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1516  }
1517  }
1518 
1519  // Invalidate the input array of requests by setting all entries
1520  // to null.
1521  std::fill (requests.begin(), requests.end(), null);
1522  }
1523 
1524 
1525 
1526  // Called by the one-argument MpiComm::waitAll() variant.
1527  template<typename Ordinal>
1528  void
1529  waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests)
1530  {
1531  typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1532  const size_type count = requests.size ();
1533  if (count == 0) {
1534  return; // No requests on which to wait
1535  }
1536 
1537  // MpiComm wraps MPI and can't expose any MPI structs or opaque
1538  // objects. Thus, we have to unpack requests into a separate
1539  // array. If that's too slow, then your code should just call
1540  // into MPI directly.
1541  //
1542  // Pull out the raw MPI requests from the wrapped requests.
1543  // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL,
1544  // but we keep track just to inform the user.
1545  bool someNullRequests = false;
1546  Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1547  for (int i = 0; i < count; ++i) {
1548  RCP<CommRequest<Ordinal> > request = requests[i];
1549  if (! request.is_null ()) {
1550  RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1551  rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1552  // releaseRawMpiRequest() sets the MpiCommRequest's raw
1553  // MPI_Request to MPI_REQUEST_NULL. This makes waitAll() not
1554  // satisfy the strong exception guarantee. That's OK because
1555  // MPI_Waitall() doesn't promise that it satisfies the strong
1556  // exception guarantee, and we would rather conservatively
1557  // invalidate the handles than leave dangling requests around
1558  // and risk users trying to wait on the same request twice.
1559  rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest ();
1560  }
1561  else { // Null requests map to MPI_REQUEST_NULL
1562  rawMpiRequests[i] = MPI_REQUEST_NULL;
1563  someNullRequests = true;
1564  }
1565  }
1566 
1567  // This is the part where we've finally peeled off the wrapper and
1568  // we can now interact with MPI directly.
1569  //
1570  // MPI lets us pass in the named constant MPI_STATUSES_IGNORE for
1571  // the MPI_Status array output argument in MPI_Waitall(), which
1572  // tells MPI not to bother writing out the statuses.
1573  const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1574  MPI_STATUSES_IGNORE);
1575 
1576  // In MPI_Waitall(), an error indicates that one or more requests
1577  // failed. In that case, there could be requests that completed
1578  // (their MPI_Status' error field is MPI_SUCCESS), and other
1579  // requests that have not completed yet but have not necessarily
1580  // failed (MPI_PENDING). We make no attempt here to wait on the
1581  // pending requests. It doesn't make sense for us to do so,
1582  // because in general Teuchos::Comm doesn't attempt to provide
1583  // robust recovery from failed messages.
1584  if (err != MPI_SUCCESS) {
1585  std::ostringstream os;
1586  os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1587  << mpiErrorCodeToString (err) << "\".";
1588  if (someNullRequests) {
1589  os << std::endl << "On input to MPI_Waitall, there was at least one "
1590  "MPI_Request that was MPI_REQUEST_NULL. MPI_Waitall should not "
1591  "normally fail in that case, but we thought we should let you know "
1592  "regardless.";
1593  }
1594  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1595  }
1596 
1597  // Invalidate the input array of requests by setting all entries
1598  // to null. We delay this until the end, since some
1599  // implementations of CommRequest might hold the only reference to
1600  // the communication buffer, and we don't want that to go away
1601  // until we've waited on the communication operation.
1602  std::fill (requests.begin(), requests.end(), null);
1603  }
1604 
1605 } // namespace (anonymous)
1606 
1607 
1608 
1609 template<typename Ordinal>
1610 void
1611 MpiComm<Ordinal>::
1612 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests) const
1613 {
1614  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests)" );
1615  // Call the one-argument version of waitAllImpl, to avoid overhead
1616  // of handling statuses (which the user didn't want anyway).
1617  waitAllImpl<Ordinal> (requests);
1618 }
1619 
1620 
1621 template<typename Ordinal>
1622 void
1623 MpiComm<Ordinal>::
1624 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1625  const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const
1626 {
1627  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests, statuses)" );
1628 
1629  typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1630  const size_type count = requests.size();
1631 
1632  TEUCHOS_TEST_FOR_EXCEPTION(count != statuses.size(),
1633  std::invalid_argument, "Teuchos::MpiComm::waitAll: requests.size() = "
1634  << count << " != statuses.size() = " << statuses.size() << ".");
1635 
1636  Array<MPI_Status> rawMpiStatuses (count);
1637  waitAllImpl<Ordinal> (requests, rawMpiStatuses());
1638 
1639  // Repackage the raw MPI_Status structs into the wrappers.
1640  for (size_type i = 0; i < count; ++i) {
1641  statuses[i] = mpiCommStatus<Ordinal> (rawMpiStatuses[i]);
1642  }
1643 }
1644 
1645 
1646 template<typename Ordinal>
1647 RCP<CommStatus<Ordinal> >
1648 MpiComm<Ordinal>::wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const
1649 {
1650  TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::wait(...)" );
1651 
1652  if (is_null (*request)) {
1653  return null; // Nothing to wait on ...
1654  }
1655  else {
1656  RCP<CommStatus<Ordinal> > status = (*request)->wait ();
1657  // mfh 22 Oct 2012: The unit tests expect waiting on the
1658  // CommRequest to invalidate it by setting it to null.
1659  *request = null;
1660  return status;
1661  }
1662 }
1663 
1664 template<typename Ordinal>
1665 RCP< Comm<Ordinal> >
1666 MpiComm<Ordinal>::duplicate() const
1667 {
1668  MPI_Comm origRawComm = *rawMpiComm_;
1669  MPI_Comm newRawComm = MPI_COMM_NULL;
1670  const int err = MPI_Comm_dup (origRawComm, &newRawComm);
1671  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, "Teuchos"
1672  "::MpiComm::duplicate: MPI_Comm_dup failed with the following error: "
1673  << mpiErrorCodeToString (err));
1674 
1675  // Wrap the raw communicator, and pass the (const) wrapped
1676  // communicator to MpiComm's constructor. We created the raw comm,
1677  // so we have to supply a function that frees it after use.
1678  RCP<OpaqueWrapper<MPI_Comm> > wrapped =
1679  opaqueWrapper<MPI_Comm> (newRawComm, details::safeCommFree);
1680  // Since newComm's raw MPI_Comm is the result of an MPI_Comm_dup,
1681  // its messages cannot collide with those of any other MpiComm.
1682  // This means we can assign its tag without an MPI_Bcast.
1683  RCP<MpiComm<Ordinal> > newComm =
1684  rcp (new MpiComm<Ordinal> (wrapped.getConst (), minTag_));
1685  return rcp_implicit_cast<Comm<Ordinal> > (newComm);
1686 }
1687 
1688 
1689 template<typename Ordinal>
1690 RCP< Comm<Ordinal> >
1691 MpiComm<Ordinal>::split(const int color, const int key) const
1692 {
1693  MPI_Comm newComm;
1694  const int splitReturn =
1695  MPI_Comm_split (*rawMpiComm_,
1696  color < 0 ? MPI_UNDEFINED : color,
1697  key,
1698  &newComm);
1700  splitReturn != MPI_SUCCESS,
1701  std::logic_error,
1702  "Teuchos::MpiComm::split: Failed to create communicator with color "
1703  << color << "and key " << key << ". MPI_Comm_split failed with error \""
1704  << mpiErrorCodeToString (splitReturn) << "\".");
1705  if (newComm == MPI_COMM_NULL) {
1706  return RCP< Comm<Ordinal> >();
1707  } else {
1708  RCP<const OpaqueWrapper<MPI_Comm> > wrapped =
1709  opaqueWrapper<MPI_Comm> (newComm, details::safeCommFree);
1710  // Since newComm's raw MPI_Comm is the result of an
1711  // MPI_Comm_split, its messages cannot collide with those of any
1712  // other MpiComm. This means we can assign its tag without an
1713  // MPI_Bcast.
1714  return rcp (new MpiComm<Ordinal> (wrapped, minTag_));
1715  }
1716 }
1717 
1718 
1719 template<typename Ordinal>
1720 RCP< Comm<Ordinal> >
1721 MpiComm<Ordinal>::createSubcommunicator(const ArrayView<const int> &ranks) const
1722 {
1723  int err = MPI_SUCCESS; // For error codes returned by MPI functions
1724 
1725  // Get the group that this communicator is in.
1726  MPI_Group thisGroup;
1727  err = MPI_Comm_group (*rawMpiComm_, &thisGroup);
1728  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1729  "Failed to obtain the current communicator's group. "
1730  "MPI_Comm_group failed with error \""
1731  << mpiErrorCodeToString (err) << "\".");
1732 
1733  // Create a new group with the specified members.
1734  MPI_Group newGroup;
1735  // It's rude to cast away const, but MPI functions demand it.
1736  //
1737  // NOTE (mfh 14 Aug 2012) Please don't ask for &ranks[0] unless you
1738  // know that ranks.size() > 0. That's why I'm using getRawPtr().
1739  err = MPI_Group_incl (thisGroup, ranks.size(),
1740  const_cast<int*> (ranks.getRawPtr ()), &newGroup);
1741  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1742  "Failed to create subgroup. MPI_Group_incl failed with error \""
1743  << mpiErrorCodeToString (err) << "\".");
1744 
1745  // Create a new communicator from the new group.
1746  MPI_Comm newComm;
1747  try {
1748  err = MPI_Comm_create (*rawMpiComm_, newGroup, &newComm);
1749  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1750  "Failed to create subcommunicator. MPI_Comm_create failed with error \""
1751  << mpiErrorCodeToString (err) << "\".");
1752  } catch (...) {
1753  // Attempt to free the new group before rethrowing. If
1754  // successful, this will prevent a memory leak due to the "lost"
1755  // group that was allocated successfully above. Since we're
1756  // throwing std::logic_error anyway, we can only promise
1757  // best-effort recovery; thus, we don't check the error code.
1758  (void) MPI_Group_free (&newGroup);
1759  (void) MPI_Group_free (&thisGroup);
1760  throw;
1761  }
1762 
1763  // We don't need the group any more, so free it.
1764  err = MPI_Group_free (&newGroup);
1765  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1766  "Failed to free subgroup. MPI_Group_free failed with error \""
1767  << mpiErrorCodeToString (err) << "\".");
1768  err = MPI_Group_free (&thisGroup);
1769  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1770  "Failed to free subgroup. MPI_Group_free failed with error \""
1771  << mpiErrorCodeToString (err) << "\".");
1772 
1773  if (newComm == MPI_COMM_NULL) {
1774  return RCP<Comm<Ordinal> > ();
1775  } else {
1776  using Teuchos::details::safeCommFree;
1777  typedef OpaqueWrapper<MPI_Comm> ow_type;
1778  RCP<const ow_type> wrapper =
1779  rcp_implicit_cast<const ow_type> (opaqueWrapper (newComm, safeCommFree));
1780  // Since newComm's raw MPI_Comm is the result of an
1781  // MPI_Comm_create, its messages cannot collide with those of any
1782  // other MpiComm. This means we can assign its tag without an
1783  // MPI_Bcast.
1784  return rcp (new MpiComm<Ordinal> (wrapper, minTag_));
1785  }
1786 }
1787 
1788 
1789 // Overridden from Describable
1790 
1791 
1792 template<typename Ordinal>
1793 std::string MpiComm<Ordinal>::description() const
1794 {
1795  std::ostringstream oss;
1796  oss
1797  << typeName(*this)
1798  << "{"
1799  << "size="<<size_
1800  << ",rank="<<rank_
1801  << ",rawMpiComm="<<static_cast<MPI_Comm>(*rawMpiComm_)
1802  <<"}";
1803  return oss.str();
1804 }
1805 
1806 
1807 #ifdef TEUCHOS_MPI_COMM_DUMP
1808 template<typename Ordinal>
1809 bool MpiComm<Ordinal>::show_dump = false;
1810 #endif
1811 
1812 
1813 // private
1814 
1815 
1816 template<typename Ordinal>
1817 void MpiComm<Ordinal>::assertRank(const int rank, const std::string &rankName) const
1818 {
1820  ! ( 0 <= rank && rank < size_ ), std::logic_error
1821  ,"Error, "<<rankName<<" = " << rank << " is not < 0 or is not"
1822  " in the range [0,"<<size_-1<<"]!"
1823  );
1824 }
1825 
1826 
1827 } // namespace Teuchos
1828 
1829 
1830 template<typename Ordinal>
1832 Teuchos::createMpiComm(
1833  const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
1834  )
1835 {
1836  if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL )
1837  return rcp(new MpiComm<Ordinal>(rawMpiComm));
1838  return Teuchos::null;
1839 }
1840 
1841 
1842 template<typename Ordinal>
1844 Teuchos::createMpiComm(
1845  const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm,
1846  const int defaultTag
1847  )
1848 {
1849  if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL )
1850  return rcp(new MpiComm<Ordinal>(rawMpiComm, defaultTag));
1851  return Teuchos::null;
1852 }
1853 
1854 
1855 template<typename Ordinal>
1856 MPI_Comm
1857 Teuchos::getRawMpiComm(const Comm<Ordinal> &comm)
1858 {
1859  return *(
1860  dyn_cast<const MpiComm<Ordinal> >(comm).getRawMpiComm()
1861  );
1862 }
1863 
1864 
1865 #endif // HAVE_TEUCHOS_MPI
1866 #endif // TEUCHOS_MPI_COMM_HPP
1867 
RCP< T > rcp(const boost::shared_ptr< T > &sptr)
Conversion function that takes in a boost::shared_ptr object and spits out a Teuchos::RCP object...
bool is_null(const std::shared_ptr< T > &p)
Returns true if p.get()==NULL.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
T_To & dyn_cast(T_From &from)
Dynamic casting utility function meant to replace dynamic_cast&lt;T&amp;&gt; by throwing a better documented er...
Teuchos header file which uses auto-configuration information to include necessary C++ headers...
Tabbing class for helping to create formated, indented output for a basic_FancyOStream object...
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
static RCP< FancyOStream > getDefaultOStream()
Get the default output stream object.
void send(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of send() that takes a tag (and restores the correct order of arguments). ...
RCP< CommRequest< Ordinal > > ireceive(const ArrayRCP< Packet > &recvBuffer, const int sourceRank, const int tag, const Comm< Ordinal > &comm)
Variant of ireceive that takes a tag argument (and restores the correct order of arguments).
Defines basic traits for the ordinal field type.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
void ssend(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of ssend() that takes a tag (and restores the correct order of arguments).
Smart reference counting pointer class for automatic garbage collection.
Implementation detail of Teuchos&#39; MPI wrapper.
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
This macro is checks that to numbers are equal and if not then throws an exception with a good error ...
Defines basic traits returning the name of a type in a portable and readable way. ...
Definition of Teuchos::as, for conversions between types.
void readySend(const Packet sendBuffer[], const Ordinal count, const int destRank, const int tag, const Comm< Ordinal > &comm)
Variant of readySend() that accepts a message tag.
std::string typeName(const T &t)
Template function for returning the concrete type name of a passed-in object.