Thyra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Thyra_SpmdMultiVectorDefaultBase_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Thyra: Interfaces and Support for Abstract Numerical Algorithms
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Roscoe A. Bartlett (bartlettra@ornl.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef THYRA_SPMD_MULTI_VECTOR_DEFAULT_BASE_DEF_HPP
43 #define THYRA_SPMD_MULTI_VECTOR_DEFAULT_BASE_DEF_HPP
44 
45 // disable clang warnings
46 #if defined (__clang__) && !defined (__INTEL_COMPILER)
47 #pragma clang system_header
48 #endif
49 
50 
51 #include "Thyra_SpmdMultiVectorDefaultBase_decl.hpp"
52 #include "Thyra_MultiVectorDefaultBase.hpp"
53 #include "Thyra_MultiVectorAdapterBase.hpp"
54 #include "Thyra_SpmdVectorSpaceDefaultBase.hpp"
55 #include "Thyra_DetachedMultiVectorView.hpp"
56 #include "Thyra_apply_op_helper.hpp"
57 #include "Thyra_SpmdLocalDataAccess.hpp"
58 #include "RTOpPack_SPMD_apply_op.hpp"
59 #include "RTOp_parallel_helpers.h"
60 #include "Teuchos_Workspace.hpp"
61 #include "Teuchos_dyn_cast.hpp"
62 #include "Teuchos_Time.hpp"
63 #include "Teuchos_CommHelpers.hpp"
64 
65 
66 // Define to see some timing output!
67 //#define THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
68 
69 
70 namespace Thyra {
71 
72 
73 // Constructors / initializers / accessors
74 
75 
76 template<class Scalar>
78  :in_applyOp_(false),
79  globalDim_(0),
80  localOffset_(-1),
81  localSubDim_(0),
82  numCols_(0)
83 {}
84 
85 
86 // Overridden public functions from MultiVectorAdapterBase
87 
88 
89 template<class Scalar>
92 {
93  return Teuchos::rcp_dynamic_cast<const ScalarProdVectorSpaceBase<Scalar> >(
94  this->spmdSpace(), true
95  );
96 }
97 
98 
99 // Overridden public functions from MultiVectorAdapterBase
100 
101 
102 template<class Scalar>
105 {
106  using Teuchos::outArg;
107  ArrayRCP<Scalar> localValues;
108  Ordinal leadingDim = -1;
109  this->getNonconstLocalData(outArg(localValues), outArg(leadingDim));
111  localOffset_, // globalOffset
112  localSubDim_,
113  0, // colOffset
114  numCols_,
115  localValues,
116  leadingDim
117  );
118 }
119 
120 
121 template<class Scalar>
124 {
125  using Teuchos::outArg;
126  ArrayRCP<const Scalar> localValues;
127  Ordinal leadingDim = -1;
128  this->getLocalData(outArg(localValues), outArg(leadingDim));
130  localOffset_, // globalOffset
131  localSubDim_,
132  0, // colOffset
133  numCols_,
134  localValues,
135  leadingDim
136  );
137 }
138 
139 
140 // Protected functions overridden from MultiVectorBase
141 
142 
143 template<class Scalar>
145  const RTOpPack::RTOpT<Scalar> &pri_op,
146  const ArrayView<const Ptr<const MultiVectorBase<Scalar> > > &multi_vecs,
147  const ArrayView<const Ptr<MultiVectorBase<Scalar> > > &targ_multi_vecs,
148  const ArrayView<const Ptr<RTOpPack::ReductTarget> > &reduct_objs,
149  const Ordinal pri_global_offset_in
150  ) const
151 {
152 
153  using Teuchos::dyn_cast;
154  using Teuchos::Workspace;
155  using Teuchos::rcpFromPtr;
156 
158 
159  const Ordinal numCols = this->domain()->dim();
160  const SpmdVectorSpaceBase<Scalar> &spmdSpc = *this->spmdSpace();
161 
162 #ifdef TEUCHOS_DEBUG
164  in_applyOp_, std::invalid_argument,
165  "SpmdMultiVectorDefaultBase<>::mvMultiReductApplyOpImpl(...): Error, this method is"
166  " being entered recursively which is a clear sign that one of the methods"
167  " acquireDetachedView(...), releaseDetachedView(...) or commitDetachedView(...)"
168  " was not implemented properly!"
169  );
171  "SpmdMultiVectorDefaultBase<>::mvMultiReductApplyOpImpl(...)", *this->domain(),
172  *this->range(), pri_op, multi_vecs, targ_multi_vecs, reduct_objs,
173  pri_global_offset_in);
174 #endif
175 
176  // Flag that we are in applyOp()
177  in_applyOp_ = true;
178 
179  // First see if this is a locally replicated vector in which case
180  // we treat this as a local operation only.
181  const bool locallyReplicated = spmdSpc.isLocallyReplicated();
182 
183  const Range1D local_rng(localOffset_, localOffset_+localSubDim_-1);
184  const Range1D col_rng(0, numCols-1);
185 
186  // Create sub-vector views of all of the *participating* local data
187  Workspace<RTOpPack::ConstSubMultiVectorView<Scalar> >
188  sub_multi_vecs(wss,multi_vecs.size());
189  Workspace<RTOpPack::SubMultiVectorView<Scalar> >
190  targ_sub_multi_vecs(wss,targ_multi_vecs.size());
191  for(int k = 0; k < multi_vecs.size(); ++k ) {
192  sub_multi_vecs[k] = getLocalSubMultiVectorView<Scalar>(rcpFromPtr(multi_vecs[k]));
193  sub_multi_vecs[k].setGlobalOffset(localOffset_+pri_global_offset_in);
194  }
195  for(int k = 0; k < targ_multi_vecs.size(); ++k ) {
196  targ_sub_multi_vecs[k] =
197  getNonconstLocalSubMultiVectorView<Scalar>(rcpFromPtr(targ_multi_vecs[k]));
198  targ_sub_multi_vecs[k].setGlobalOffset(localOffset_+pri_global_offset_in);
199  }
200  Workspace<RTOpPack::ReductTarget*> reduct_objs_ptr(wss, reduct_objs.size());
201  for (int k = 0; k < reduct_objs.size(); ++k) {
202  reduct_objs_ptr[k] = &*reduct_objs[k];
203  }
204 
205  // Apply the RTOp operator object (all processors must participate)
206  RTOpPack::SPMD_apply_op(
207  locallyReplicated ? NULL : spmdSpc.getComm().get(), // comm
208  pri_op, // op
209  col_rng.size(), // num_cols
210  sub_multi_vecs.size(), // multi_vecs.size()
211  sub_multi_vecs.getRawPtr(), // sub_multi_vecs
212  targ_sub_multi_vecs.size(), // targ_multi_vecs.size()
213  targ_sub_multi_vecs.getRawPtr(), // targ_sub_multi_vecs
214  reduct_objs_ptr.getRawPtr() // reduct_objs
215  );
216 
217  // Free and commit the local data
218  for(int k = 0; k < multi_vecs.size(); ++k ) {
219  sub_multi_vecs[k] = RTOpPack::ConstSubMultiVectorView<Scalar>();
220  }
221  for(int k = 0; k < targ_multi_vecs.size(); ++k ) {
222  targ_sub_multi_vecs[k] = RTOpPack::SubMultiVectorView<Scalar>();
223  }
224 
225  // Flag that we are leaving applyOp()
226  in_applyOp_ = false;
227 
228 }
229 
230 
231 template<class Scalar>
233  const Range1D &rowRng_in,
234  const Range1D &colRng_in,
236  ) const
237 {
238  using Teuchos::outArg;
239  const Range1D rowRng = validateRowRange(rowRng_in);
240  const Range1D colRng = validateColRange(colRng_in);
241  if( rowRng.lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.ubound() ) {
242  // rng consists of off-processor elements so use the default implementation!
244  rowRng_in,colRng_in,sub_mv
245  );
246  return;
247  }
248  ArrayRCP<const Scalar> localValues;
249  Ordinal leadingDim = 0;
250  this->getLocalData(outArg(localValues), outArg(leadingDim));
251  sub_mv->initialize(
252  rowRng.lbound(), // globalOffset
253  rowRng.size(), // subDim
254  colRng.lbound(), // colOffset
255  colRng.size(), // numSubCols
256  localValues
257  +(rowRng.lbound()-localOffset_)
258  +colRng.lbound()*leadingDim, // values
259  leadingDim // leadingDim
260  );
261 }
262 
263 
264 template<class Scalar>
267  ) const
268 {
269  if(
270  sub_mv->globalOffset() < localOffset_
271  ||
272  localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim()
273  )
274  {
275  // Let the default implementation handle it!
277  return;
278  }
279  sub_mv->uninitialize();
280 }
281 
282 
283 template<class Scalar>
285  const Range1D &rowRng_in,
286  const Range1D &colRng_in,
288  )
289 {
290  using Teuchos::outArg;
291  const Range1D rowRng = validateRowRange(rowRng_in);
292  const Range1D colRng = validateColRange(colRng_in);
293  if(
294  rowRng.lbound() < localOffset_
295  ||
296  localOffset_+localSubDim_-1 < rowRng.ubound()
297  )
298  {
299  // rng consists of off-processor elements so use the default implementation!
301  rowRng_in, colRng_in, sub_mv
302  );
303  return;
304  }
305  ArrayRCP<Scalar> localValues;
306  Ordinal leadingDim = 0;
307  this->getNonconstLocalData(outArg(localValues), outArg(leadingDim));
308  sub_mv->initialize(
309  rowRng.lbound() // globalOffset
310  ,rowRng.size() // subDim
311  ,colRng.lbound() // colOffset
312  ,colRng.size() // numSubCols
313  ,localValues
314  +(rowRng.lbound()-localOffset_)
315  +colRng.lbound()*leadingDim // values
316  ,leadingDim // leadingDim
317  );
318 }
319 
320 
321 template<class Scalar>
324  )
325 {
326  if(
327  sub_mv->globalOffset() < localOffset_
328  ||
329  localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim()
330  )
331  {
332  // Let the default implementation handle it!
334  return;
335  }
336  sub_mv->uninitialize();
337 }
338 
339 
340 // Protected functions overridden from MultiVectorAdapterBase
341 
342 
343 template<class Scalar>
345  const EOpTransp M_trans,
346  const MultiVectorBase<Scalar> &X,
347  const Ptr<MultiVectorBase<Scalar> > &Y,
348  const Scalar alpha,
349  const Scalar beta
350  ) const
351 {
353  using Teuchos::Workspace;
354  using Teuchos::rcpFromPtr;
356 
357 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
358  Teuchos::Time timerTotal("dummy",true);
359  Teuchos::Time timer("dummy");
360 #endif
361 
362  //
363  // This function performs one of two operations.
364  //
365  // The first operation (M_trans == NOTRANS) is:
366  //
367  // Y = beta * Y + alpha * M * X
368  //
369  // where Y and M have compatible (distributed?) range vector
370  // spaces and X is a locally replicated serial multi-vector. This
371  // operation does not require any global communication.
372  //
373  // The second operation (M_trans == TRANS) is:
374  //
375  // Y = beta * Y + alpha * M' * X
376  //
377  // where M and X have compatible (distributed?) range vector spaces
378  // and Y is a locally replicated serial multi-vector. This operation
379  // requires a local reduction.
380  //
381 
382  //
383  // Get spaces and validate compatibility
384  //
385 
386  // Get the SpmdVectorSpace
387  const SpmdVectorSpaceBase<Scalar> &spmdSpc = *this->spmdSpace();
388 
389  // Get the Spmd communicator
390  const RCP<const Teuchos::Comm<Ordinal> > comm = spmdSpc.getComm();
391  const int procRank = (nonnull(comm) ? comm->getRank() : 0 );
392 
393 #ifdef TEUCHOS_DEBUG
395  &Y_range = *Y->range(),
396  &X_range = *X.range();
397 // std::cout << "SpmdMultiVectorDefaultBase<Scalar>::apply(...): comm = " << comm << std::endl;
399  ( globalDim_ > localSubDim_ ) && is_null(comm), std::logic_error
400  ,"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
401  );
402  // ToDo: Write a good general validation function that I can call that will replace
403  // all of these TEUCHOS_TEST_FOR_EXCEPTION(...) uses
404 
407  ,"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
408  );
410  real_trans(M_trans)==TRANS && !spmdSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces
411  ,"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
412  );
413 #endif
414 
415  //
416  // Get explicit (local) views of Y, M and X
417  //
418 
419 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
420  timer.start();
421 #endif
422 
424  Y_local = getNonconstLocalSubMultiVectorView<Scalar>(rcpFromPtr(Y));
426  M_local = getLocalSubMultiVectorView<Scalar>(rcpFromRef(*this)),
427  X_local = getLocalSubMultiVectorView<Scalar>(rcpFromRef(X));
428 /*
429  DetachedMultiVectorView<Scalar>
430  Y_local(
431  *Y,
432  real_trans(M_trans)==NOTRANS ? Range1D(localOffset_,localOffset_+localSubDim_-1) : Range1D(),
433  Range1D()
434  );
435  ConstDetachedMultiVectorView<Scalar>
436  M_local(
437  *this,
438  Range1D(localOffset_,localOffset_+localSubDim_-1),
439  Range1D()
440  );
441  ConstDetachedMultiVectorView<Scalar>
442  X_local(
443  X
444  ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_,localOffset_+localSubDim_-1)
445  ,Range1D()
446  );
447 */
448 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
449  timer.stop();
450  std::cout << "\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n";
451 #endif
452 #ifdef TEUCHOS_DEBUG
454  real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
456  ,"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
457  );
459  real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
461  ,"SpmdMultiVectorDefaultBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
462  );
463 #endif
464 
465  //
466  // If nonlocal (i.e. M_trans==TRANS) then create temporary storage
467  // for:
468  //
469  // Y_local_tmp = alpha * M(local) * X(local) : on nonroot processes
470  //
471  // or
472  //
473  // Y_local_tmp = beta*Y_local + alpha * M(local) * X(local) : on root process (localOffset_==0)
474  //
475  // and set
476  //
477  // localBeta = ( localOffset_ == 0 ? beta : 0.0 )
478  //
479  // Above, we choose localBeta such that we will only perform
480  // Y_local = beta * Y_local + ... on one process (the root
481  // process where localOffset_==0x). Then, when we add up Y_local
482  // on all of the processors and we will get the correct result.
483  //
484  // If strictly local (i.e. M_trans == NOTRANS) then set:
485  //
486  // Y_local_tmp = Y_local
487  // localBeta = beta
488  //
489 
490 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
491  timer.start();
492 #endif
493 
494  // determine if both fields are locally replicated. If they are
495  // there is no need to do any reduction operations.
496  bool locallyReplicated = false;
497  {
498  bool locallyReplicated_this = spmdSpc.isLocallyReplicated();
499  bool locallyReplicated_x = locallyReplicated_this; // x must be compatible with "this"
500  bool locallyReplicated_y = false;
501 
502  Ptr<SpmdMultiVectorBase<Scalar> > spmd_Y = Teuchos::ptr_dynamic_cast<SpmdMultiVectorBase<Scalar> >(Y);
503  if (nonnull(spmd_Y))
504  locallyReplicated_y = spmd_Y->spmdSpace()->isLocallyReplicated();
505 
506  locallyReplicated = locallyReplicated_this && locallyReplicated_x && locallyReplicated_y;
507  }
508 
509  bool isNonLocalAdjoint =
510  (
511  real_trans(M_trans) == TRANS
512  &&
513  (globalDim_ > localSubDim_ || (nonnull(comm) && comm->getSize() > 1))
514  );
515 
516  if (locallyReplicated)
517  isNonLocalAdjoint = false;
518 
519  Workspace<Scalar> Y_local_tmp_store(wss, Y_local.subDim()*Y_local.numSubCols(), false);
521  Scalar localBeta;
522  if (isNonLocalAdjoint) {
523  // Nonlocal
524  Y_local_tmp.initialize(
525  0, Y_local.subDim(),
526  0, Y_local.numSubCols(),
527  Teuchos::arcpFromArrayView(Y_local_tmp_store()),
528  Y_local.subDim() // leadingDim == subDim (columns are adjacent)
529  );
530  if (procRank == 0) {
531  // Root process: Must copy Y_local into Y_local_tmp
532  for( int j = 0; j < Y_local.numSubCols(); ++j ) {
533  typedef typename ArrayRCP<const Scalar>::const_iterator Y_local_values_iter_t;
534  const Y_local_values_iter_t Y_local_j =
535  Y_local.values().begin() + Y_local.leadingDim()*j;
536  std::copy( Y_local_j, Y_local_j + Y_local.subDim(),
537  Y_local_tmp.values().begin() + Y_local_tmp.leadingDim()*j );
538  }
539  localBeta = beta;
540  }
541  else {
542  // Not the root process
543  localBeta = 0.0;
544  }
545  }
546  else {
547  // Local
548  Y_local_tmp = Y_local; // Shallow copy only!
549  localBeta = beta;
550  }
551 
552 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
553  timer.stop();
554  std::cout << "\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n";
555 #endif
556 
557  //
558  // Perform the local multiplication:
559  //
560  // Y(local) = localBeta * Y(local) + alpha * op(M(local)) * X(local)
561  //
562  // or in BLAS lingo:
563  //
564  // C = beta * C + alpha * op(A) * op(B)
565  //
566 
567 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
568  timer.start();
569 #endif
570  Teuchos::ETransp t_transp;
571  if(ST::isComplex) {
572  switch(M_trans) {
573  case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
574  case TRANS: t_transp = Teuchos::TRANS; break;
575  case CONJTRANS: t_transp = Teuchos::CONJ_TRANS; break;
576  default: TEUCHOS_TEST_FOR_EXCEPT(true);
577  }
578  }
579  else {
580  switch(real_trans(M_trans)) {
581  case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
582  case TRANS: t_transp = Teuchos::TRANS; break;
583  default: TEUCHOS_TEST_FOR_EXCEPT(true);
584  }
585  }
586  if (M_local.numSubCols() > 0) {
587  // AGS: Added std::max on ld? below, following what is done in
588  // Epetra_MultiVector Multiply use of GEMM. Allows for 0 length.
589  blas_.GEMM(
590  t_transp // TRANSA
591  ,Teuchos::NO_TRANS // TRANSB
592  ,Y_local.subDim() // M
593  ,Y_local.numSubCols() // N
594  ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim() // K
595  ,alpha // ALPHA
596  ,const_cast<Scalar*>(M_local.values().getRawPtr()) // A
597  ,std::max((int) M_local.leadingDim(),1) // LDA
598  ,const_cast<Scalar*>(X_local.values().getRawPtr()) // B
599  ,std::max((int) X_local.leadingDim(),1) // LDB
600  ,localBeta // BETA
601  ,Y_local_tmp.values().getRawPtr() // C
602  ,std::max((int) Y_local_tmp.leadingDim(),1) // LDC
603  );
604  }
605  else {
606  std::fill( Y_local_tmp.values().begin(), Y_local_tmp.values().end(),
607  ST::zero() );
608  }
609 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
610  timer.stop();
611  std::cout
612  << "\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Time for GEMM = "
613  << timer.totalElapsedTime() << " seconds\n";
614 #endif
615 
616  if (nonnull(comm)) {
617 
618  //
619  // Perform the global reduction of Y_local_tmp back into Y_local
620  //
621 
622  if (isNonLocalAdjoint) {
623  // Contiguous buffer for final reduction
624  Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false);
625  // Perform the reduction
626  Teuchos::reduceAll<Ordinal,Scalar>(
627  *comm, Teuchos::REDUCE_SUM, Y_local_final_buff.size(), Y_local_tmp.values().getRawPtr(),
628  Y_local_final_buff.getRawPtr()
629  );
630  // Load Y_local_final_buff back into Y_local
631  typedef typename ArrayView<const Scalar>::const_iterator Y_local_final_buff_iter_t;
632  const ArrayView<const Scalar> Y_local_final_buff_av = Y_local_final_buff;
633  Y_local_final_buff_iter_t Y_local_final_buff_ptr = Y_local_final_buff_av.begin();
634  for( int j = 0; j < Y_local.numSubCols(); ++j ) {
635  typedef typename ArrayRCP<Scalar>::iterator Y_local_values_iter_t;
636  Y_local_values_iter_t Y_local_ptr =
637  Y_local.values().begin() + Y_local.leadingDim()*j;
638  for( int i = 0; i < Y_local.subDim(); ++i ) {
639  (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
640  }
641  }
642  }
643  }
644  else {
645 
646  // When you get here the view Y_local will be committed back to Y
647  // in the destructor to Y_local
648 
649  }
650 
651 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
652  timer.stop();
653  std::cout
654  << "\nSpmdMultiVectorDefaultBase<Scalar>::apply(...): Total time = "
655  << timerTotal.totalElapsedTime() << " seconds\n";
656 #endif
657 
658 }
659 
660 
661 // Protected functions for subclasses to call
662 
663 
664 template<class Scalar>
666 {
667  if(globalDim_ == 0) {
668  const SpmdVectorSpaceBase<Scalar> *l_spmdSpace = this->spmdSpace().get();
669  if(l_spmdSpace) {
670  globalDim_ = l_spmdSpace->dim();
671  localOffset_ = l_spmdSpace->localOffset();
672  localSubDim_ = l_spmdSpace->localSubDim();
673  numCols_ = this->domain()->dim();
674  }
675  else {
676  globalDim_ = 0;
677  localOffset_ = -1;
678  localSubDim_ = 0;
679  numCols_ = 0;
680  }
681  }
682 }
683 
684 
685 template<class Scalar>
687 {
688  const Range1D rowRng = Teuchos::full_range(rowRng_in,0,globalDim_-1);
689 #ifdef TEUCHOS_DEBUG
691  !( 0 <= rowRng.lbound() && rowRng.ubound() < globalDim_ ), std::invalid_argument
692  ,"SpmdMultiVectorDefaultBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
693  <<rowRng.lbound()<<","<<rowRng.ubound()<<"] is not "
694  "in the range [0,"<<(globalDim_-1)<<"]!"
695  );
696 #endif
697  return rowRng;
698 }
699 
700 
701 template<class Scalar>
703 {
704  const Range1D colRng = Teuchos::full_range(colRng_in,0,numCols_-1);
705 #ifdef TEUCHOS_DEBUG
707  !(0 <= colRng.lbound() && colRng.ubound() < numCols_), std::invalid_argument
708  ,"SpmdMultiVectorDefaultBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
709  <<colRng.lbound()<<","<<colRng.ubound()<<"] is not "
710  "in the range [0,"<<(numCols_-1)<<"]!"
711  );
712 #endif
713  return colRng;
714 }
715 
716 
717 } // end namespace Thyra
718 
719 
720 #endif // THYRA_SPMD_MULTI_VECTOR_DEFAULT_BASE_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
virtual RCP< const VectorSpaceBase< Scalar > > range() const =0
Return a smart pointer for the range space for this operator.
EOpTransp
Enumeration for determining how a linear operator is applied. `*.
virtual void releaseDetachedMultiVectorViewImpl(RTOpPack::ConstSubMultiVectorView< Scalar > *sub_mv) const
Thrown if vector spaces are incompatible.
void apply_op_validate_input(const std::string &func_name, const VectorSpaceBase< Scalar > &space, const RTOpPack::RTOpT< Scalar > &op, const ArrayView< const Ptr< const VectorBase< Scalar > > > &vecs, const ArrayView< const Ptr< VectorBase< Scalar > > > &targ_vecs, const Ptr< RTOpPack::ReductTarget > &reduct_obj, const Ordinal global_offset)
Validate the inputs to VectorBase::applyOp().
RTOpPack::SubMultiVectorView< Scalar > getNonconstLocalSubMultiVectorImpl()
void euclideanApply(const EOpTransp M_trans, const MultiVectorBase< Scalar > &X, const Ptr< MultiVectorBase< Scalar > > &Y, const Scalar alpha, const Scalar beta) const
Uses GEMM() and Teuchos::reduceAll() to implement.
iterator begin() const
Base interface class for SPMD multi-vectors.
void mvMultiReductApplyOpImpl(const RTOpPack::RTOpT< Scalar > &primary_op, const ArrayView< const Ptr< const MultiVectorBase< Scalar > > > &multi_vecs, const ArrayView< const Ptr< MultiVectorBase< Scalar > > > &targ_multi_vecs, const ArrayView< const Ptr< RTOpPack::ReductTarget > > &reduct_objs, const Ordinal primary_global_offset) const
virtual void acquireNonconstDetachedMultiVectorViewImpl(const Range1D &rowRng, const Range1D &colRng, RTOpPack::SubMultiVectorView< Scalar > *sub_mv)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Use the non-transposed operator.
const_pointer const_iterator
T_To & dyn_cast(T_From &from)
EOpTransp real_trans(EOpTransp transp)
Return NOTRANS or TRANS for real scalar valued operators and this also is used for determining struct...
Use the transposed operator with complex-conjugate clements (same as TRANS for real scalar types)...
const ArrayRCP< Scalar > values() const
T * get() const
Abstract interface for objects that represent a space for vectors.
Ordinal ubound() const
RTOpPack::ConstSubMultiVectorView< Scalar > getLocalSubMultiVectorImpl() const
Use the transposed operator.
RCP< const ScalarProdVectorSpaceBase< Scalar > > rangeScalarProdVecSpc() const
Returns spmdSpace.
void start(bool reset=false)
Teuchos::Ordinal Ordinal
Type for the dimension of a vector space. `*.
Interface for a collection of column vectors called a multi-vector.
double stop()
void initialize(Ordinal globalOffset_in, Ordinal subDim_in, Ordinal colOffset_in, Ordinal numSubCols_in, const ArrayRCP< Scalar > &values_in, Ordinal leadingDim_in)
virtual void commitNonconstDetachedMultiVectorViewImpl(RTOpPack::SubMultiVectorView< Scalar > *sub_mv)
void acquireDetachedMultiVectorViewImpl(const Range1D &rowRng, const Range1D &colRng, RTOpPack::ConstSubMultiVectorView< Scalar > *sub_mv) const
void commitNonconstDetachedMultiVectorViewImpl(RTOpPack::SubMultiVectorView< Scalar > *sub_mv)
Ordinal lbound() const
bool nonnull(const boost::shared_ptr< T > &p)
virtual Teuchos::RCP< const Teuchos::Comm< Ordinal > > getComm() const =0
Returns the SPMD communicator.
virtual Ordinal localOffset() const =0
Returns the offset for the local sub-vector stored on this process.
void releaseDetachedMultiVectorViewImpl(RTOpPack::ConstSubMultiVectorView< Scalar > *sub_mv) const
Ordinal size() const
double totalElapsedTime(bool readCurrentTime=false) const
void initialize(Ordinal globalOffset_in, Ordinal subDim_in, Ordinal colOffset_in, Ordinal numSubCols_in, const ArrayRCP< const Scalar > &values_in, Ordinal leadingDim_in)
Range1D validateColRange(const Range1D &rowCol) const
Validate and resize the column range.
virtual void acquireDetachedMultiVectorViewImpl(const Range1D &rowRng, const Range1D &colRng, RTOpPack::ConstSubMultiVectorView< Scalar > *sub_mv) const
void acquireNonconstDetachedMultiVectorViewImpl(const Range1D &rowRng, const Range1D &colRng, RTOpPack::SubMultiVectorView< Scalar > *sub_mv)
virtual bool isCompatible(const VectorSpaceBase< Scalar > &vecSpc) const =0
Compare the compatibility of two vector spaces.
Base abstract VectorSpaceBase class for all SPMD-based vector spaces.
virtual Ordinal dim() const =0
Return the dimension of the vector space.
virtual void updateSpmdSpace()
Subclasses should call whenever the structure of the VectorSpaceBase changes.
#define TEUCHOS_TEST_FOR_EXCEPT(throw_exception_test)
iterator begin() const
TEUCHOSCORE_LIB_DLL_EXPORT Teuchos::RCP< WorkspaceStore > get_default_workspace_store()
virtual bool isLocallyReplicated() const =0
Returns true if vector space is locally replicated space.
Range1D validateRowRange(const Range1D &rowRng) const
Validate and resize the row range.
virtual Ordinal localSubDim() const =0
Returns the number of local elements stored on this process.