Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_MultiVectorFiller.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 #ifndef __Tpetra_MultiVectorFiller_hpp
42 #define __Tpetra_MultiVectorFiller_hpp
43 #include "TpetraCore_config.h"
44 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
45 #include "Tpetra_MultiVector.hpp"
46 #include "Tpetra_Vector.hpp"
47 #include "Teuchos_CommHelpers.hpp"
48 #include <iterator>
49 #include <set>
50 
51 
52 namespace Tpetra {
53 namespace Details {
54  // \fn sortAndMergeIn
55  // \brief Sort and merge newEntries into allEntries, and make unique.
56  //
57  // \param allEntries [in/out]: Array in which current entries have
58  // already been stored, and in which the new entries are to be
59  // stored. Passed by reference in case we need to resize it.
60  //
61  // \param currentEntries [in/out]: Current entries, which we assume
62  // have already been sorted and made unique. Aliases the
63  // beginning of allEntries.
64  //
65  // \param newEntries [in/out] New entries, which have not yet been
66  // sorted or made unique. This does <i>not</i> alias allEntries.
67  //
68  // Sort and make entries of newEntries unique. Resize allEntries if
69  // necessary to fit the unique entries of newEntries. Merge
70  // newEntries into allEntries and make the results unique. (This is
71  // cheaper than sorting the whole array.)
72  //
73  // \warning DO NOT CALL THIS METHOD! This method is DEPRECATED
74  // and will DISAPPEAR VERY SOON.
75  //
76  // \return A view of all the entries (current and new) in allEntries.
77  template<class T>
78  Teuchos::ArrayView<T>
79  sortAndMergeIn (Teuchos::Array<T>& allEntries,
80  Teuchos::ArrayView<T> currentEntries,
81  Teuchos::ArrayView<T> newEntries)
82  {
83  using Teuchos::ArrayView;
84  using Teuchos::as;
85  typedef typename ArrayView<T>::iterator iter_type;
86  typedef typename ArrayView<T>::size_type size_type;
87 
88  std::sort (newEntries.begin(), newEntries.end());
89  iter_type it = std::unique (newEntries.begin(), newEntries.end());
90  const size_type numNew = as<size_type> (it - newEntries.begin());
91  // View of the sorted, made-unique new entries to merge in.
92  ArrayView<T> newEntriesView = newEntries.view (0, numNew);
93 
94  const size_type numCur = currentEntries.size();
95  if (allEntries.size() < numCur + numNew) {
96  allEntries.resize (numCur + numNew);
97  }
98  ArrayView<T> allView = allEntries.view (0, numCur + numNew);
99  ArrayView<T> newView = allEntries.view (numCur, numNew); // target of copy
100 
101  std::copy (newEntries.begin(), newEntries.end(), newView.begin());
102  std::inplace_merge (allView.begin(), newView.begin(), allView.end());
103  iter_type it2 = std::unique (allView.begin(), allView.end());
104  const size_type numTotal = as<size_type> (it2 - allView.begin());
105 
106  return allEntries.view (0, numTotal);
107  }
108 
114  template<class MV>
115  class TPETRA_DEPRECATED MultiVectorFillerData {
116  public:
117  typedef typename MV::scalar_type scalar_type;
118  typedef typename MV::local_ordinal_type local_ordinal_type;
119  typedef typename MV::global_ordinal_type global_ordinal_type;
120  typedef typename MV::node_type node_type;
121 
122  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
123 
135  MultiVectorFillerData (const Teuchos::RCP<const map_type>& map) :
136  map_ (map),
137  numCols_ (0)
138  {}
139 
156  MultiVectorFillerData (const Teuchos::RCP<const map_type>& map,
157  const size_t numColumns) :
158  map_ (map),
159  numCols_ (numColumns),
160  sourceIndices_ (numColumns),
161  sourceValues_ (numColumns)
162  {}
163 
165  void
166  setNumColumns (const size_t newNumColumns)
167  {
168  const size_t oldNumColumns = getNumColumns();
169  if (newNumColumns >= oldNumColumns) {
170  for (size_t j = oldNumColumns; j < newNumColumns; ++j) {
171  sourceIndices_.push_back (Teuchos::Array<global_ordinal_type> ());
172  sourceValues_.push_back (Teuchos::Array<scalar_type> ());
173  }
174  }
175  else {
176  // This may not necessarily deallocate any data, but that's OK.
177  sourceIndices_.resize (newNumColumns);
178  sourceValues_.resize (newNumColumns);
179  }
180  numCols_ = oldNumColumns;
181  }
182 
183  void
184  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
185  const size_t column,
186  const Teuchos::ArrayView<const scalar_type>& values)
187  {
188  if (column >= getNumColumns()) {
189  for (size_t j = column; j < getNumColumns(); ++j) {
190  sourceIndices_.push_back (Teuchos::Array<global_ordinal_type> ());
191  sourceValues_.push_back (Teuchos::Array<scalar_type> ());
192  }
193  }
194  std::copy (rows.begin(), rows.end(), std::back_inserter (sourceIndices_[column]));
195  std::copy (values.begin(), values.end(), std::back_inserter (sourceValues_[column]));
196  }
197 
205  void
206  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
207  const Teuchos::ArrayView<const scalar_type>& values)
208  {
209  typedef global_ordinal_type GO;
210  typedef scalar_type ST;
211  typedef typename Teuchos::ArrayView<const GO>::const_iterator GoIter;
212  typedef typename Teuchos::ArrayView<const ST>::const_iterator StIter;
213 
214  const size_t numColumns = getNumColumns();
215  GoIter rowIter = rows.begin();
216  StIter valIter = values.begin();
217  for (size_t j = 0; j < numColumns; ++j) {
218  GoIter rowIterNext = rowIter + numColumns;
219  StIter valIterNext = valIter + numColumns;
220  std::copy (rowIter, rowIterNext, std::back_inserter (sourceIndices_[j]));
221  std::copy (valIter, valIterNext, std::back_inserter (sourceValues_[j]));
222  rowIter = rowIterNext;
223  valIter = valIterNext;
224  }
225  }
226 
251  template<class BinaryFunction>
252  void
253  locallyAssemble (MV& X, BinaryFunction& f)
254  {
255  using Teuchos::Array;
256  using Teuchos::ArrayRCP;
257  using Teuchos::ArrayView;
258  using Teuchos::RCP;
259  typedef local_ordinal_type LO;
260  typedef global_ordinal_type GO;
261  typedef scalar_type ST;
262  typedef node_type NT;
263 
264  RCP<const ::Tpetra::Map<LO, GO, NT> > map = X.getMap();
265  Array<LO> localIndices;
266  const size_t numColumns = getNumColumns();
267  for (size_t j = 0; j < numColumns; ++j) {
268  const typename Array<const GO>::size_type numIndices = sourceIndices_[j].size();
269  // Precompute all the local indices before saving to the
270  // vector. Hopefully this gives us a little bit of locality
271  // in the global->local conversion, at the expense of a little
272  // more storage.
273  if (localIndices.size() < numIndices) {
274  localIndices.resize (numIndices);
275  }
276  ArrayView<LO> localIndicesView = localIndices.view (0, numIndices);
277  ArrayView<const GO> globalIndicesView = sourceIndices_[j].view (0, numIndices);
278  for (typename ArrayView<const GO>::size_type i = 0; i < numIndices; ++i) {
279  localIndices[i] = map->getLocalElement (globalIndicesView[i]);
280  }
281 
282  ArrayRCP<ST> X_j = X.getDataNonConst (j);
283  ArrayView<const ST> localValues = sourceValues_[j].view (0, numIndices);
284  for (typename ArrayView<const GO>::size_type i = 0; i < numIndices; ++i) {
285  const LO localInd = localIndices[i];
286  X_j[localInd] = f (X_j[localInd], localValues[i]);
287  }
288  }
289  }
290 
292  void
293  locallyAssemble (MV& X)
294  {
295  std::plus<scalar_type> f;
296  locallyAssemble<std::plus<scalar_type> > (X, f);
297  }
298 
300  void clear() {
301  Teuchos::Array<Teuchos::Array<global_ordinal_type> > newSourceIndices;
302  Teuchos::Array<Teuchos::Array<scalar_type> > newSourceValues;
303  // The standard STL idiom for clearing the contents of a vector completely.
304  std::swap (sourceIndices_, newSourceIndices);
305  std::swap (sourceValues_, newSourceValues);
306  }
307 
309  Teuchos::Array<global_ordinal_type>
310  getSourceIndices () const
311  {
312  using Teuchos::Array;
313  using Teuchos::ArrayView;
314  using Teuchos::as;
315  typedef global_ordinal_type GO;
316  typedef typename Array<GO>::size_type size_type;
317 
318  Array<GO> allInds (0); // will resize below
319  const size_t numCols = getNumColumns();
320 
321  if (numCols == 1) {
322  // Special case for 1 column avoids copying indices twice.
323  // Pick the size of the array exactly the first time so there
324  // are at most two allocations (the constructor may choose to
325  // allocate).
326  const size_type numNew = sourceIndices_[0].size();
327  allInds.resize (allInds.size() + numNew);
328  std::copy (sourceIndices_[0].begin(), sourceIndices_[0].end(),
329  allInds.begin());
330  std::sort (allInds.begin(), allInds.end());
331  typename Array<GO>::iterator it =
332  std::unique (allInds.begin(), allInds.end());
333  const size_type numFinal = as<size_type> (it - allInds.begin());
334  allInds.resize (numFinal);
335  }
336  else {
337  // Carefully collect all the row indices one column at a time.
338  // This ensures that the total allocation size in this routine
339  // is independent of the number of columns. Also, only sort
340  // the current column's indices. Use merges to ensure sorted
341  // order in the collected final result.
342  ArrayView<GO> curIndsView = allInds.view (0, 0); // will grow
343  Array<GO> newInds;
344  for (size_t j = 0; j < numCols; ++j) {
345  const size_type numNew = sourceIndices_[j].size();
346  if (numNew > newInds.size()) {
347  newInds.resize (numNew);
348  }
349  ArrayView<GO> newIndsView = newInds.view (0, numNew);
350  std::copy (sourceIndices_[j].begin(), sourceIndices_[j].end(),
351  newIndsView.begin());
352  curIndsView = sortAndMergeIn<GO> (allInds, curIndsView, newIndsView);
353  }
354  }
355  return allInds;
356  }
357 
358  private:
359  Teuchos::RCP<const map_type> map_;
360  size_t numCols_;
361  Teuchos::Array<Teuchos::Array<global_ordinal_type> > sourceIndices_;
362  Teuchos::Array<Teuchos::Array<scalar_type> > sourceValues_;
363 
364  size_t getNumColumns() const { return numCols_; }
365  };
366 
373  template<class MV>
374  class TPETRA_DEPRECATED MultiVectorFillerData2 : public Teuchos::Describable {
375  public:
376  typedef typename MV::scalar_type scalar_type;
377  typedef typename MV::local_ordinal_type local_ordinal_type;
378  typedef typename MV::global_ordinal_type global_ordinal_type;
379  typedef typename MV::node_type node_type;
380 
381  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
382 
393  MultiVectorFillerData2 (const Teuchos::RCP<const map_type>& map,
394  const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT,
395  const Teuchos::RCP<Teuchos::FancyOStream>& out=Teuchos::null) :
396  map_ (map),
397  numCols_ (0),
398  verbLevel_ (verbLevel),
399  out_ (out)
400  {}
401 
419  MultiVectorFillerData2 (const Teuchos::RCP<const map_type>& map,
420  const size_t numColumns,
421  const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_DEFAULT,
422  const Teuchos::RCP<Teuchos::FancyOStream>& out=Teuchos::null) :
423  map_ (map),
424  numCols_ (numColumns),
425  localVec_ (new MV (map, numColumns)),
426 #if 0
427  nonlocalIndices_ (numColumns),
428  nonlocalValues_ (numColumns),
429 #endif // 0
430  verbLevel_ (verbLevel),
431  out_ (out)
432  {}
433 
434  std::string
435  description() const
436  {
437  std::ostringstream oss;
438  oss << "Tpetra::MultiVectorFillerData2<"
439  << Teuchos::TypeNameTraits<MV>::name () << ">";
440  return oss.str();
441  }
442 
443  void
444  describe (Teuchos::FancyOStream& out,
445  const Teuchos::EVerbosityLevel verbLevel =
446  Teuchos::Describable::verbLevel_default) const
447  {
448  using std::endl;
449  using Teuchos::Array;
450  using Teuchos::ArrayRCP;
451  using Teuchos::ArrayView;
452  using Teuchos::RCP;
453  using Teuchos::VERB_DEFAULT;
454  using Teuchos::VERB_NONE;
455  using Teuchos::VERB_LOW;
456  using Teuchos::VERB_MEDIUM;
457  using Teuchos::VERB_HIGH;
458  using Teuchos::VERB_EXTREME;
459 
460  // Set default verbosity if applicable.
461  const Teuchos::EVerbosityLevel vl =
462  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
463 
464  RCP<const Teuchos::Comm<int> > comm = map_->getComm();
465  const int myImageID = comm->getRank();
466  const int numImages = comm->getSize();
467 
468  if (vl != VERB_NONE) {
469  // Don't set the tab level unless we're printing something.
470  Teuchos::OSTab tab (out);
471 
472  if (myImageID == 0) { // >= VERB_LOW prints description()
473  out << this->description() << endl;
474  }
475  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
476  if (myImageID == imageCtr) {
477  if (vl != VERB_LOW) {
478  // At verbosity > VERB_LOW, each process prints something.
479  out << "Process " << myImageID << ":" << endl;
480 
481  Teuchos::OSTab procTab (out);
482  // >= VERB_MEDIUM: print the local vector length.
483  out << "local length=" << localVec_->getLocalLength();
484  if (vl != VERB_MEDIUM) {
485  // >= VERB_HIGH: print isConstantStride() and getStride()
486  if (localVec_->isConstantStride()) {
487  out << ", constant stride=" << localVec_->getStride() << endl;
488  }
489  else {
490  out << ", not constant stride" << endl;
491  }
492  if (vl == VERB_EXTREME) {
493  // VERB_EXTREME: print all the values in the multivector.
494  out << "Local values:" << endl;
495  ArrayRCP<ArrayRCP<const scalar_type> > X = localVec_->get2dView();
496  for (size_t i = 0; i < localVec_->getLocalLength(); ++i) {
497  for (size_t j = 0; j < localVec_->getNumVectors(); ++j) {
498  out << X[j][i];
499  if (j < localVec_->getNumVectors() - 1) {
500  out << " ";
501  }
502  } // for each column
503  out << endl;
504  } // for each row
505 
506 #if 0
507  out << "Nonlocal indices and values:" << endl;
508  for (size_t j = 0; j < (size_t)nonlocalIndices_.size(); ++j) {
509  ArrayView<const global_ordinal_type> inds = nonlocalIndices_[j]();
510  ArrayView<const scalar_type> vals = nonlocalValues_[j]();
511  for (typename ArrayView<const global_ordinal_type>::size_type k = 0;
512  k < inds.size(); ++k) {
513  out << "X(" << inds[k] << "," << j << ") = " << vals[k] << endl;
514  }
515  }
516 #endif // 0
517  } // if vl == VERB_EXTREME
518  } // if (vl != VERB_MEDIUM)
519  else { // vl == VERB_LOW
520  out << endl;
521  }
522  } // if vl != VERB_LOW
523  } // if it is my process' turn to print
524  } // for each process in the communicator
525  } // if vl != VERB_NONE
526  }
527 
529  Teuchos::Array<global_ordinal_type>
530  getSourceIndices (const Teuchos::Comm<int>& comm,
531  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
532  const bool debug = false) const
533  {
534  using Teuchos::Array;
535  using Teuchos::ArrayView;
536  using Teuchos::RCP;
537  using Teuchos::rcp;
538  using Tpetra::global_size_t;
539  typedef global_ordinal_type GO;
540  const char prefix[] = "Tpetra::MultiVectorFiller::getSourceIndices: ";
541 
542  if (debug && ! out.is_null ()) {
543  std::ostringstream os;
544  os << "Proc " << comm.getRank () << ": getSourceIndices" << std::endl;
545  *out << os.str ();
546  }
547 
548  // Get the nonlocal row indices, sorted and made unique.
549  // It's fair to assume that these are not contiguous.
550  Array<GO> nonlocalIndices = getSortedUniqueNonlocalIndices (comm, out, debug);
551 
552  // Get the local row indices, not necessarily sorted or unique.
553  ArrayView<const GO> localIndices = getLocalIndices ();
554 
555  // Copy the local indices into the full indices array, and sort
556  // them there. We'll merge in the nonlocal indices below. This
557  // can be more efficient than just sorting all the indices, if
558  // there are a lot of nonlocal indices.
559  Array<GO> indices (localIndices.size () + nonlocalIndices.size ());
560  ArrayView<GO> localIndView = indices.view (0, localIndices.size ());
561  TEUCHOS_TEST_FOR_EXCEPTION
562  (localIndView.size () > indices.size (), std::logic_error,
563  prefix << "localIndView.size() = " << localIndView.size ()
564  << " > indices.size() = " << indices.size () << ".");
565  TEUCHOS_TEST_FOR_EXCEPTION
566  (localIndView.size () != localIndices.size (), std::logic_error,
567  prefix << "localIndView.size() = " << localIndView.size ()
568  << " != localIndices.size() = " << localIndices.size () << ".");
569 
570  std::copy (localIndices.begin (), localIndices.end (), localIndView.begin ());
571  std::sort (localIndView.begin (), localIndView.end ());
572 
573  if (debug && ! out.is_null ()) {
574  std::ostringstream os;
575  os << "Proc " << comm.getRank () << ": Right after copying and sorting" << std::endl;
576  *out << os.str ();
577  }
578 
579  // Merge the local and nonlocal indices.
580  if (nonlocalIndices.size () > 0) {
581  ArrayView<GO> nonlclIndView =
582  indices.view (localIndices.size (), nonlocalIndices.size ());
583 
584  // We need a view of the output array, because
585  // std::inplace_merge needs all its iterator inputs to have
586  // the same type. Debug mode builds are pickier than release
587  // mode builds, because the iterators in a debug mode build
588  // are of a different type that does run-time checking (they
589  // aren't just raw pointers).
590  ArrayView<GO> indView = indices.view (0, indices.size ());
591  if (debug && ! out.is_null ()) {
592  std::ostringstream os;
593  os << "Right before std::copy" << std::endl;
594  *out << os.str ();
595  }
596  std::copy (nonlocalIndices.begin (),
597  nonlocalIndices.end (),
598  nonlclIndView.begin ());
599  if (debug && ! out.is_null ()) {
600  std::ostringstream os;
601  os << "Proc " << comm.getRank () << ": Right before std::inplace_merge"
602  << std::endl;
603  *out << os.str ();
604  }
605  std::inplace_merge (indView.begin (),
606  indView.begin () + localIndices.size (),
607  indView.end ());
608  }
609 
610  if (debug && ! out.is_null ()) {
611  std::ostringstream os;
612  os << "Proc " << comm.getRank () << ": Done with getSourceIndices"
613  << std::endl;
614  *out << os.str ();
615  }
616  return indices;
617  }
618 
628  void
629  setNumColumns (const size_t newNumColumns)
630  {
631  using Teuchos::Array;
632  using Teuchos::Range1D;
633  using Teuchos::RCP;
634 
635  const size_t oldNumColumns = numCols_;
636  if (newNumColumns == oldNumColumns) {
637  return; // No side effects if no change.
638  }
639 
640  RCP<MV> newLclVec;
641  if (newNumColumns > oldNumColumns) {
642  newLclVec = rcp (new MV (map_, newNumColumns));
643  // Assign the contents of the old local multivector to the
644  // first oldNumColumns columns of the new local multivector,
645  // then get rid of the old local multivector.
646  RCP<MV> newLclVecView =
647  newLclVec->subViewNonConst (Range1D (0, oldNumColumns-1));
648  *newLclVecView = *localVec_;
649  }
650  else {
651  if (newNumColumns == 0) {
652  // Tpetra::MultiVector doesn't let you construct a
653  // multivector with zero columns.
654  newLclVec = Teuchos::null;
655  }
656  else {
657  newLclVec = localVec_->subViewNonConst (Range1D (0, newNumColumns-1));
658  }
659  }
660 
661  // Leave most side effects until the end, for exception safety.
662  localVec_ = newLclVec;
663  numCols_ = newNumColumns;
664  }
665 
671  void
672  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
673  const size_t columnIndex,
674  const Teuchos::ArrayView<const scalar_type>& values)
675  {
676  using Teuchos::ArrayView;
677  typedef local_ordinal_type LO;
678  typedef global_ordinal_type GO;
679  typedef scalar_type ST;
680  typedef decltype (rows.size ()) size_type;
681  const char prefix[] = "Tpetra::MultiVectorFiller::sumIntoGlobalValues: ";
682 
683  if (map_.is_null ()) {
684  return; // the calling process is not participating
685  }
686  const size_type numEnt = rows.size ();
687  TEUCHOS_TEST_FOR_EXCEPTION
688  (numEnt != values.size (), std::invalid_argument, prefix
689  << "rows.size() = " << numEnt << " != values.size() = "
690  << values.size () << ".");
691 
692  // FIXME (31 Aug 2015) Don't do this.
693  if (columnIndex >= getNumColumns()) {
694  // Automatically expand the number of columns. This
695  // implicitly ensures that localVec_ is not null.
696  setNumColumns (columnIndex + 1);
697  }
698 
699  // It would be a lot more efficient for users to get the Kokkos
700  // view outside of their fill loop, and fill into it directly.
701  auto X_j = localVec_->getVector (columnIndex);
702  auto X_j_2d = X_j->template getLocalView<typename MV::dual_view_type::t_host::memory_space> ();
703  auto X_j_1d = Kokkos::subview (X_j_2d, Kokkos::ALL (), 0);
704 
705  const map_type& theMap = *map_;
706  for (size_type k = 0; k < numEnt; ++k) {
707  const ST val = values[k];
708  const GO gblRowInd = rows[k];
709  const LO lclRowInd = theMap.getLocalElement (gblRowInd);
710  if (lclRowInd == Teuchos::OrdinalTraits<LO>::invalid ()) {
711  // The row doesn't belong to the calling process, so stuff
712  // its data in nonlocals_.
713 
714  auto& innerMap = nonlocals_[columnIndex];
715  auto innerIter = innerMap.find (gblRowInd);
716  if (innerIter == innerMap.end ()) {
717  innerMap.insert (std::make_pair (gblRowInd, values[k]));
718  } else {
719  innerIter->second += val;
720  }
721  }
722  else {
723  // FIXME (mfh 27 Feb 2012) Allow different combine modes.
724  // The current combine mode just adds to the current value
725  // at that location.
726  X_j_1d[lclRowInd] += val;
727  }
728  }
729  }
730 
740  void
741  sumIntoGlobalValues (const Teuchos::ArrayView<const global_ordinal_type>& rows,
742  const Teuchos::ArrayView<const scalar_type>& values)
743  {
744  using Teuchos::ArrayView;
745  typedef typename ArrayView<const global_ordinal_type>::size_type size_type;
746 
747  const size_t numCols = getNumColumns();
748  for (size_t j = 0; j < numCols; ++j) {
749  const size_type offset = numCols*j;
750  const size_type len = numCols;
751  this->sumIntoGlobalValues (rows.view (offset, len), j, values.view (offset, len));
752  }
753  }
754 
779  template<class BinaryFunction>
780  void
781  locallyAssemble (MV& X, BinaryFunction& f)
782  {
783  typedef scalar_type ST;
784  typedef local_ordinal_type LO;
785  typedef global_ordinal_type GO;
786  typedef typename MV::dual_view_type::t_host::memory_space host_memory_space;
787 
788  if (X.getMap ().is_null ()) {
789  return; // this process is not participating
790  }
791  const map_type& srcMap = * (X.getMap ());
792 
793 
794 
795  for (size_t j = 0; j < X.getNumVectors (); ++j) {
796  auto X_j = X.getVector (j);
797  auto X_j_2d = X_j->template getLocalView<host_memory_space> ();
798  auto X_j_1d = Kokkos::subview (X_j_2d, Kokkos::ALL (), 0);
799 
800  // First, assemble in the local components from localVec_.
801  if (! localVec_.is_null () && ! localVec_->getMap ().is_null ()
802  && j < localVec_->getNumVectors ()) {
803  auto lcl_j = localVec_->getVector (j);
804  auto lcl_j_2d = lcl_j->template getLocalView<host_memory_space> ();
805  auto lcl_j_1d = Kokkos::subview (lcl_j_2d, Kokkos::ALL (), 0);
806 
807  // Iterate over all rows of localVec_. i_lcl is a local
808  // index with respect to localVec_'s Map, which may differ
809  // from X's Map.
810  const map_type& lclMap = * (localVec_->getMap ());
811  const LO lclNumRows = static_cast<LO> (lcl_j->getLocalLength ());
812  for (LO i_lcl = 0; i_lcl < lclNumRows; ++i_lcl) {
813  const GO i_gbl = lclMap.getGlobalElement (i_lcl);
814  const LO i_X = srcMap.getLocalElement (i_gbl);
815  X_j_1d(i_X) = f (X_j_1d(i_X), lcl_j_1d(i_lcl));
816  }
817  }
818 
819  // Second, assemble in the nonlocal components.
820  auto outerIter = nonlocals_.find (j);
821  if (outerIter != nonlocals_.end ()) {
822  auto beg = outerIter->second.begin ();
823  auto end = outerIter->second.end ();
824  for (auto innerIter = beg; innerIter != end; ++innerIter) {
825  const GO gblRowInd = innerIter->first;
826  const LO lclRowInd = srcMap.getLocalElement (gblRowInd);
827 
828  if (lclRowInd != Teuchos::OrdinalTraits<LO>::invalid ()) {
829  const ST val = innerIter->second;
830  X_j_1d(lclRowInd) = f (X_j_1d(lclRowInd), val);
831  }
832  }
833  }
834  }
835  }
836 
842  void
843  locallyAssemble (MV& X)
844  {
845  std::plus<scalar_type> f;
846  locallyAssemble<std::plus<scalar_type> > (X, f);
847  }
848 
854  void clear ()
855  {
856  std::map<size_t, std::map<global_ordinal_type, scalar_type> > newNonlcls;
857  // The standard STL idiom for clearing the contents of a
858  // container completely. Setting the size to zero may not
859  // necessarily deallocate data.
860  std::swap (nonlocals_, newNonlcls);
861 
862  // Don't actually deallocate the multivector of local entries.
863  // Just fill it with zero. This is because the caller hasn't
864  // reset the number of columns.
865  if (! localVec_.is_null ()) {
866  localVec_->putScalar (Teuchos::ScalarTraits<scalar_type>::zero());
867  }
868  }
869 
870  private:
872  Teuchos::RCP<const map_type> map_;
873 
875  size_t numCols_;
876 
878  Teuchos::RCP<MV> localVec_;
879 
881  std::map<size_t, std::map<global_ordinal_type, scalar_type> > nonlocals_;
882 
884  Teuchos::EVerbosityLevel verbLevel_;
885 
887  Teuchos::RCP<Teuchos::FancyOStream> out_;
888 
890  size_t getNumColumns() const { return numCols_; }
891 
893  Teuchos::ArrayView<const global_ordinal_type>
894  getLocalIndices() const
895  {
896  return map_->getNodeElementList ();
897  }
898 
900  Teuchos::Array<global_ordinal_type>
901  getSortedUniqueNonlocalIndices (const Teuchos::Comm<int>& comm,
902  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
903  const bool debug = false) const
904  {
905  using Teuchos::Array;
906  using Teuchos::ArrayView;
907  using Teuchos::as;
908  typedef global_ordinal_type GO;
909 
910  if (debug && ! out.is_null ()) {
911  std::ostringstream os;
912  os << "Proc " << comm.getRank () << ": getSortedUniqueNonlocalIndices"
913  << std::endl;
914  *out << os.str ();
915  }
916 
917  // FIXME (mfh 30 Aug 2015) The std::set algorithm is harder to
918  // thread-parallelize, but it should be easier to make the new
919  // test pass with this.
920  std::set<GO> indsOut;
921  const size_t numCols = getNumColumns ();
922  for (size_t j = 0; j < numCols; ++j) {
923  auto outerIter = nonlocals_.find (j);
924  if (outerIter != nonlocals_.end ()) {
925  auto beg = outerIter->second.begin ();
926  auto end = outerIter->second.end ();
927  for (auto innerIter = beg; innerIter != end; ++innerIter) {
928  // *innerIter: (global row index, value) pair.
929  indsOut.insert (innerIter->first);
930  }
931  }
932  }
933 
934  if (debug && ! out.is_null ()) {
935  std::ostringstream os;
936  os << "Proc " << comm.getRank () << ": Made nonlocals set" << std::endl;
937  *out << os.str ();
938  }
939 
940  Array<GO> allNonlocals (indsOut.begin (), indsOut.end ());
941  if (debug && ! out.is_null ()) {
942  std::ostringstream os;
943  os << "Proc " << comm.getRank () << ": Done with "
944  "getSortedUniqueNonlocalIndices" << std::endl;
945  *out << os.str ();
946  }
947  return allNonlocals;
948  }
949  };
950 } // namespace Details
951 } // namespace Tpetra
952 
953 namespace Tpetra {
954 
979  template<class MV>
980  class TPETRA_DEPRECATED MultiVectorFiller {
981  public:
982  typedef typename MV::scalar_type scalar_type;
983  typedef typename MV::local_ordinal_type local_ordinal_type;
984  typedef typename MV::global_ordinal_type global_ordinal_type;
985  typedef typename MV::node_type node_type;
986  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
987 
1017  MultiVectorFiller (const Teuchos::RCP<const map_type>& map,
1018  const size_t numCols);
1019 
1039  void
1040  globalAssemble (MV& X_out, const bool forceReuseMap = false);
1041 
1043  void clear () {
1044  data_.clear ();
1045  }
1046 
1047  void
1048  describe (Teuchos::FancyOStream& out,
1049  const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
1050  {
1051  data_.describe (out, verbLevel);
1052  }
1053 
1064  void
1065  sumIntoGlobalValues (Teuchos::ArrayView<const global_ordinal_type> rows,
1066  size_t column,
1067  Teuchos::ArrayView<const scalar_type> values)
1068  {
1069  data_.sumIntoGlobalValues (rows, column, values);
1070  }
1071 
1087  void
1088  sumIntoGlobalValues (Teuchos::ArrayView<const global_ordinal_type> rows,
1089  Teuchos::ArrayView<const scalar_type> values)
1090  {
1091  data_.sumIntoGlobalValues (rows, values);
1092  }
1093 
1094  private:
1096  Teuchos::RCP<const map_type> ctorMap_;
1097 
1102  Teuchos::RCP<const map_type> sourceMap_;
1103 
1109  Teuchos::RCP<const map_type> targetMap_;
1110 
1123  Teuchos::RCP<MV> sourceVec_;
1124 
1129  Tpetra::Details::MultiVectorFillerData2<MV> data_;
1130 
1132  Teuchos::RCP<export_type> exporter_;
1133 
1139  void locallyAssemble (MV& X_in) {
1140  data_.locallyAssemble (X_in);
1141  }
1142 
1144  Teuchos::Array<global_ordinal_type>
1145  getSourceIndices (const Teuchos::Comm<int>& comm,
1146  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
1147  const bool debug = false) const
1148  {
1149  return data_.getSourceIndices (comm, out, debug);
1150  }
1151 
1160  Teuchos::RCP<const map_type>
1161  computeSourceMap (const global_ordinal_type indexBase,
1162  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1163  const Teuchos::RCP<Teuchos::FancyOStream>& out = Teuchos::null,
1164  const bool debug = false);
1165  };
1166 
1167  template<class MV>
1168  MultiVectorFiller<MV>::MultiVectorFiller (const Teuchos::RCP<const typename MultiVectorFiller<MV>::map_type>& map, const size_t numCols)
1169  : ctorMap_ (map),
1170  sourceMap_ (Teuchos::null),
1171  targetMap_ (Teuchos::null),
1172  data_ (map, numCols),
1173  exporter_ (Teuchos::null)
1174  {}
1175 
1176  template<class MV>
1177  Teuchos::RCP<const typename MultiVectorFiller<MV>::map_type>
1178  MultiVectorFiller<MV>::
1179  computeSourceMap (const global_ordinal_type indexBase,
1180  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1181  const Teuchos::RCP<Teuchos::FancyOStream>& out,
1182  const bool debug)
1183  {
1184  using Teuchos::Array;
1185  using Teuchos::ArrayView;
1186  using Teuchos::rcp;
1187  typedef global_ordinal_type GO;
1188 
1189  if (debug && ! out.is_null ()) {
1190  out->pushTab ();
1191  std::ostringstream os;
1192  const int myRank = comm->getRank ();
1193  os << "Proc " << myRank << ": computeSourceMap" << std::endl;
1194  *out << os.str ();
1195  }
1196 
1197  Array<GO> indices = getSourceIndices (*comm, out, debug);
1198 
1199  if (debug && ! out.is_null ()) {
1200  std::ostringstream os;
1201  const int myRank = comm->getRank ();
1202  os << "Proc " << myRank << ": computeSourceMap: About to create Map"
1203  << std::endl;
1204  *out << os.str ();
1205  out->popTab ();
1206  }
1207 
1208  // Passing "invalid" for the numGlobalElements argument of the Map
1209  // constructor tells the Map to compute the global number of
1210  // elements itself.
1211  typedef Tpetra::global_size_t GST;
1212  const GST INV = Teuchos::OrdinalTraits<GST>::invalid ();
1213  return rcp (new map_type (INV, indices, indexBase, comm));
1214  }
1215 
1216  template<class MV>
1217  void
1218  MultiVectorFiller<MV>::
1219  globalAssemble (MV& X_out, const bool forceReuseMap)
1220  {
1221  using Teuchos::ArrayView;
1222  using Teuchos::Array;
1223  using Teuchos::Range1D;
1224  using Teuchos::RCP;
1225  using Teuchos::rcp;
1226  RCP<Teuchos::FancyOStream> outPtr;
1227  const bool debug = false;
1228 
1229  if (debug) {
1230  outPtr = Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cerr));
1231  outPtr->pushTab ();
1232  }
1233 
1234  const size_t numVecs = X_out.getNumVectors ();
1235  if (numVecs == 0) {
1236  // Nothing to do! Of course, this does not check for whether
1237  // X_out has the right number of rows. That's OK, though.
1238  // Compare to the definition of the BLAS' _AXPY for an input
1239  // vector containing NaNs, but multiplied by alpha=0.
1240  return;
1241  }
1242  //
1243  // Get the target Map of the Export. If X_out's Map is the same
1244  // as the target Map from last time, then we may be able to
1245  // recycle the Export from last time, if the source Map is also
1246  // the same.
1247  //
1248  RCP<const map_type> targetMap;
1249  bool assumeSameTargetMap = false;
1250  if (targetMap_.is_null()) {
1251  targetMap_ = X_out.getMap();
1252  targetMap = targetMap_;
1253  assumeSameTargetMap = false;
1254  }
1255  else {
1256  if (forceReuseMap) {
1257  targetMap = targetMap_;
1258  assumeSameTargetMap = true;
1259  }
1260  else {
1261  // If X_out's Map is the same as targetMap_, we may be able to
1262  // reuse the Export. Constructing the Export may be more
1263  // expensive than calling isSameAs() (which requires just a
1264  // few reductions and reading through the lists of owned
1265  // global indices), so it's worth checking.
1266  if (targetMap_->isSameAs (*(X_out.getMap()))) {
1267  assumeSameTargetMap = true;
1268  targetMap = targetMap_;
1269  }
1270  }
1271  }
1272 
1273  if (debug && ! outPtr.is_null ()) {
1274  std::ostringstream os;
1275  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1276  const int myRank = comm.getRank ();
1277  os << "Proc " << myRank << ": Right before getting source Map" << std::endl;
1278  *outPtr << os.str ();
1279  }
1280 
1281  //
1282  // Get the source Map of the Export. If the source Map of the
1283  // Export is the same as last time, then we may be able to recycle
1284  // the Export from last time, if the target Map is also the same.
1285  //
1286  RCP<const map_type> sourceMap;
1287  bool computedSourceMap = false;
1288  {
1289  if (forceReuseMap && ! sourceMap_.is_null()) {
1290  sourceMap = sourceMap_;
1291  }
1292  else {
1293  sourceMap = computeSourceMap (ctorMap_->getIndexBase (),
1294  ctorMap_->getComm (),
1295  outPtr, debug);
1296  computedSourceMap = true;
1297  }
1298  }
1299  if (computedSourceMap) {
1300  sourceMap_ = sourceMap;
1301  }
1302 
1303  if (debug && ! outPtr.is_null ()) {
1304  std::ostringstream os;
1305  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1306  const int myRank = comm.getRank ();
1307  os << "Proc " << myRank << ": Right before computing Export" << std::endl;
1308  *outPtr << os.str ();
1309  }
1310 
1311  //
1312  // Now that we have the source and target Maps of the Export, we
1313  // can check whether we can recycle the Export from last time.
1314  //
1315  const bool mustComputeExport =
1316  (exporter_.is_null() || (assumeSameTargetMap && ! computedSourceMap));
1317  if (mustComputeExport) {
1318  exporter_ = rcp (new export_type (sourceMap_, targetMap_));
1319  }
1320 
1321  if (debug && ! outPtr.is_null ()) {
1322  std::ostringstream os;
1323  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1324  const int myRank = comm.getRank ();
1325  os << "Proc " << myRank << ": Right after computing Export" << std::endl;
1326  *outPtr << os.str ();
1327  }
1328 
1329  // Source multivector for the Export.
1330  RCP<MV> X_in;
1331  const bool mustReallocateVec = sourceVec_.is_null() ||
1332  sourceVec_->getNumVectors() < numVecs || ! assumeSameTargetMap;
1333 
1334  if (mustReallocateVec) {
1335  // Reallocating nonlocalVec_ ensures that it has the right Map.
1336  sourceVec_ = rcp (new MV (sourceMap_, numVecs));
1337  X_in = sourceVec_;
1338  } else {
1339  if (sourceVec_->getNumVectors() == numVecs) {
1340  X_in = sourceVec_;
1341  } else { // sourceVec_ has more vectors than needed.
1342  X_in = sourceVec_->subViewNonConst (Range1D (0, numVecs-1));
1343  }
1344  }
1345 
1346  if (debug && ! outPtr.is_null ()) {
1347  std::ostringstream os;
1348  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1349  const int myRank = comm.getRank ();
1350  os << "Proc " << myRank << ": Right before locallyAssemble" << std::endl;
1351  *outPtr << os.str ();
1352  }
1353 
1354  // "Locally assemble" the data into X_in by summing together
1355  // entries with the same indices.
1356  locallyAssemble (*X_in);
1357 
1358  if (debug && ! outPtr.is_null ()) {
1359  std::ostringstream os;
1360  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1361  const int myRank = comm.getRank ();
1362  os << "Proc " << myRank << ": Right after locallyAssemble" << std::endl;
1363  *outPtr << os.str ();
1364  }
1365 
1366  // Do the Export.
1367  const Tpetra::CombineMode combineMode = Tpetra::ADD;
1368  X_out.doExport (*X_in, *exporter_, combineMode);
1369 
1370  if (debug && ! outPtr.is_null ()) {
1371  std::ostringstream os;
1372  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1373  const int myRank = comm.getRank ();
1374  os << "Proc " << myRank << ": Right after Export" << std::endl;
1375  *outPtr << os.str ();
1376  }
1377 
1378  // Clean out the locally owned data for next time.
1379  X_in->putScalar (Teuchos::ScalarTraits<scalar_type>::zero ());
1380 
1381  // FIXME (mfh 27 Aug 2015) Clean out the remote data for next
1382  // time. See Bug 6398.
1383 
1384  if (debug && ! outPtr.is_null ()) {
1385  std::ostringstream os;
1386  const Teuchos::Comm<int>& comm = * (X_out.getMap ()->getComm ());
1387  const int myRank = comm.getRank ();
1388  os << "Proc " << myRank << ": Done with globalAssemble" << std::endl;
1389  *outPtr << os.str ();
1390  outPtr->pushTab ();
1391  }
1392  }
1393 
1394  namespace Test {
1395 
1401  template<class MV>
1402  class TPETRA_DEPRECATED MultiVectorFillerTester {
1403  public:
1404  typedef typename MV::scalar_type scalar_type;
1405  typedef typename MV::local_ordinal_type local_ordinal_type;
1406  typedef typename MV::global_ordinal_type global_ordinal_type;
1407  typedef typename MV::node_type node_type;
1408  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
1409 
1418  static void
1419  testSameMap (const Teuchos::RCP<const map_type>& targetMap,
1420  const global_ordinal_type eltSize, // Must be odd
1421  const size_t numCols,
1422  const Teuchos::RCP<Teuchos::FancyOStream>& outStream=Teuchos::null,
1423  const Teuchos::EVerbosityLevel verbosityLevel=Teuchos::VERB_DEFAULT)
1424  {
1425  using Teuchos::Array;
1426  using Teuchos::ArrayRCP;
1427  using Teuchos::ArrayView;
1428  using Teuchos::as;
1429  using Teuchos::Comm;
1430  using Teuchos::FancyOStream;
1431  using Teuchos::getFancyOStream;
1432  using Teuchos::oblackholestream;
1433  using Teuchos::ptr;
1434  using Teuchos::RCP;
1435  using Teuchos::rcp;
1436  using Teuchos::rcpFromRef;
1437  using Teuchos::REDUCE_SUM;
1438  using Teuchos::reduceAll;
1439  using std::cerr;
1440  using std::endl;
1441 
1442  typedef local_ordinal_type LO;
1443  typedef global_ordinal_type GO;
1444  typedef scalar_type ST;
1445  typedef Teuchos::ScalarTraits<ST> STS;
1446 
1447  TEUCHOS_TEST_FOR_EXCEPTION(eltSize % 2 == 0, std::invalid_argument,
1448  "Element size (eltSize) argument must be odd.");
1449  TEUCHOS_TEST_FOR_EXCEPTION(numCols == 0, std::invalid_argument,
1450  "Number of columns (numCols) argument must be nonzero.");
1451  // Default behavior is to print nothing out.
1452  RCP<FancyOStream> out = outStream.is_null() ?
1453  getFancyOStream (rcp (new oblackholestream)) : outStream;
1454  const Teuchos::EVerbosityLevel verbLevel =
1455  (verbosityLevel == Teuchos::VERB_DEFAULT) ?
1456  Teuchos::VERB_NONE : verbosityLevel;
1457 
1458  //RCP<MV> X = rcp (new MV (targetMap, numCols));
1459 
1460  const GO indexBase = targetMap->getIndexBase();
1461  Array<GO> rows (eltSize);
1462  Array<ST> values (eltSize);
1463  std::fill (values.begin(), values.end(), STS::one());
1464 
1465  // Make this a pointer so we can free its contents, in case
1466  // those contents depend on the input to globalAssemble().
1467  RCP<MultiVectorFiller<MV> > filler =
1468  rcp (new MultiVectorFiller<MV> (targetMap, numCols));
1469 
1470  TEUCHOS_TEST_FOR_EXCEPTION(! targetMap->isContiguous(),
1471  std::invalid_argument, "MultiVectorFiller test currently only works "
1472  "for contiguous Maps.");
1473 
1474  const GO minGlobalIndex = targetMap->getMinGlobalIndex();
1475  const GO maxGlobalIndex = targetMap->getMaxGlobalIndex();
1476  const GO minAllGlobalIndex = targetMap->getMinAllGlobalIndex();
1477  const GO maxAllGlobalIndex = targetMap->getMaxAllGlobalIndex();
1478  for (size_t j = 0; j < numCols; ++j) {
1479  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1480  // Overlap over processes, without running out of bounds.
1481  const GO start = std::max (i - eltSize/2, minAllGlobalIndex);
1482  const GO end = std::min (i + eltSize/2, maxAllGlobalIndex);
1483  const GO len = end - start + 1;
1484 
1485  TEUCHOS_TEST_FOR_EXCEPTION(len > eltSize, std::logic_error,
1486  "At start,end = " << start << "," << end << ", len = " << len
1487  << " > eltSize = " << eltSize << ".");
1488 
1489  for (GO k = 0; k < len; ++k) {
1490  rows[k] = start + k;
1491  }
1492  if (verbLevel == Teuchos::VERB_EXTREME) {
1493  *out << "Inserting: "
1494  << Teuchos::toString (rows.view(0,len)) << ", "
1495  << Teuchos::toString (values.view(0, len)) << std::endl;
1496  }
1497  filler->sumIntoGlobalValues (rows.view(0, len), j, values.view(0, len));
1498  }
1499  }
1500 
1501  if (verbLevel == Teuchos::VERB_EXTREME) {
1502  *out << "Filler:" << std::endl;
1503  filler->describe (*out, verbLevel);
1504  *out << std::endl;
1505  }
1506 
1507  MV X_out (targetMap, numCols);
1508  filler->globalAssemble (X_out);
1509  filler = Teuchos::null;
1510 
1511  if (verbLevel == Teuchos::VERB_EXTREME) {
1512  *out << "X_out:" << std::endl;
1513  X_out.describe (*out, verbLevel);
1514  *out << std::endl;
1515  }
1516 
1517  // Create multivector for comparison.
1518  MV X_expected (targetMap, numCols);
1519  const scalar_type two = STS::one() + STS::one();
1520  for (size_t j = 0; j < numCols; ++j) {
1521  ArrayRCP<ST> X_j = X_expected.getDataNonConst (j);
1522 
1523  // Each entry of the column should have the value eltSize,
1524  // except for the first and last few entries in the whole
1525  // column (globally, not locally).
1526  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1527  const LO localIndex = targetMap->getLocalElement (i);
1528  TEUCHOS_TEST_FOR_EXCEPTION(i == Teuchos::OrdinalTraits<LO>::invalid(),
1529  std::logic_error, "Global index " << i << " is not in the "
1530  "multivector's Map.");
1531 
1532  if (i <= minAllGlobalIndex + eltSize/2) {
1533  X_j[localIndex] = two + as<ST>(i) - as<ST>(indexBase);
1534  }
1535  else if (i >= maxAllGlobalIndex - eltSize/2) {
1536  X_j[localIndex] = two + as<ST>(maxAllGlobalIndex) - as<ST>(i);
1537  }
1538  else {
1539  X_j[localIndex] = as<ST>(eltSize);
1540  }
1541  } // for each global index which my process owns
1542  } // for each column of the multivector
1543 
1544  if (verbLevel == Teuchos::VERB_EXTREME) {
1545  *out << "X_expected:" << std::endl;
1546  X_expected.describe (*out, verbLevel);
1547  *out << std::endl;
1548  }
1549 
1550  Array<GO> errorLocations;
1551  for (size_t j = 0; j < numCols; ++j) {
1552  ArrayRCP<const ST> X_out_j = X_out.getData (j);
1553  ArrayRCP<const ST> X_expected_j = X_expected.getData (j);
1554 
1555  // Each entry of the column should have the value eltSize,
1556  // except for the first and last few entries in the whole
1557  // column (globally, not locally).
1558  for (GO i = minGlobalIndex; i <= maxGlobalIndex; ++i) {
1559  const LO localIndex = targetMap->getLocalElement (i);
1560  TEUCHOS_TEST_FOR_EXCEPTION(i == Teuchos::OrdinalTraits<LO>::invalid(),
1561  std::logic_error, "Global index " << i << " is not in the "
1562  "multivector's Map.");
1563 
1564  // The floating-point additions should be exact in this
1565  // case, except for very large values of eltSize.
1566  if (X_out_j[localIndex] != X_expected_j[localIndex]) {
1567  errorLocations.push_back (i);
1568  }
1569  } // for each global index which my process owns
1570 
1571  const typename Array<GO>::size_type localNumErrors = errorLocations.size();
1572  typename Array<GO>::size_type globalNumErrors = 0;
1573  RCP<const Comm<int> > comm = targetMap->getComm();
1574  reduceAll (*comm, REDUCE_SUM, localNumErrors, ptr (&globalNumErrors));
1575 
1576  if (globalNumErrors != 0) {
1577  std::ostringstream os;
1578  os << "Proc " << comm->getRank() << ": " << localNumErrors
1579  << " incorrect value" << (localNumErrors != 1 ? "s" : "")
1580  << ". Error locations: [ ";
1581  std::copy (errorLocations.begin(), errorLocations.end(),
1582  std::ostream_iterator<GO> (os, " "));
1583  os << "].";
1584  // Iterate through all processes in the communicator,
1585  // printing out each process' local errors.
1586  for (int p = 0; p < comm->getSize(); ++p) {
1587  if (p == comm->getRank()) {
1588  cerr << os.str() << endl;
1589  }
1590  // Barriers to let output finish.
1591  comm->barrier();
1592  comm->barrier();
1593  comm->barrier();
1594  }
1595  TEUCHOS_TEST_FOR_EXCEPTION(globalNumErrors != 0, std::logic_error,
1596  "Over all procs: " << globalNumErrors << " total error"
1597  << (globalNumErrors != 1 ? "s" : "") << ".");
1598  } // if there were any errors in column j
1599  } // for each column j
1600  }
1601  };
1602 
1604  template<class ScalarType,
1605  class LocalOrdinalType,
1606  class GlobalOrdinalType,
1607  class NodeType>
1608  void
1609  testMultiVectorFiller (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1610  const size_t unknownsPerNode,
1611  const GlobalOrdinalType unknownsPerElt,
1612  const size_t numCols,
1613  const Teuchos::RCP<Teuchos::FancyOStream>& outStream,
1614  const Teuchos::EVerbosityLevel verbLevel)
1615  {
1616  using Teuchos::FancyOStream;
1617  using Teuchos::getFancyOStream;
1618  using Teuchos::oblackholestream;
1619  using Teuchos::ParameterList;
1620  using Teuchos::RCP;
1621  using Teuchos::rcp;
1622  using std::cerr;
1623  using std::endl;
1624 
1625  typedef ScalarType ST;
1626  typedef LocalOrdinalType LO;
1627  typedef GlobalOrdinalType GO;
1628  typedef NodeType NT;
1629  typedef ::Tpetra::Map<LO, GO, NT> map_type;
1631  typedef Tpetra::global_size_t GST;
1632 
1633  RCP<FancyOStream> out = outStream.is_null() ?
1634  getFancyOStream (rcp (new oblackholestream)) : outStream;
1635  const GST INV = Teuchos::OrdinalTraits<GST>::invalid ();
1636  const GO indexBase = 0;
1637  RCP<const map_type> targetMap =
1638  rcp (new map_type (INV, unknownsPerNode, indexBase, comm));
1639 
1640  std::ostringstream os;
1641  try {
1642  MultiVectorFillerTester<MV>::testSameMap (targetMap, unknownsPerElt, numCols, out, verbLevel);
1643  } catch (std::exception& e) {
1644  os << e.what();
1645  }
1646 
1647  for (int p = 0; p < comm->getSize(); ++p) {
1648  if (p == comm->getRank()) {
1649  *out << "On process " << comm->getRank() << ": " << os.str() << endl;
1650  }
1651  comm->barrier();
1652  comm->barrier();
1653  comm->barrier();
1654  }
1655  }
1656 
1662  void
1663  testSortAndMergeIn ();
1664 
1665  } // namespace Test
1666 } // namespace Tpetra
1667 #else
1668 #error "Compile Error: The header Tpetra_MultiVectorFiller.hpp is DEPRECATED and you have compiled with Tpetra_ENABLE_DEPRECATED_CODE off"
1669 
1670 #endif// TPETRA_ENABLE_DEPRECATED_CODE
1671 
1672 #endif // __Tpetra_MultiVectorFiller_hpp
One or more distributed dense vectors.
int local_ordinal_type
Default value of Scalar template parameter.
size_t global_size_t
Global size_t object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
::Kokkos::Compat::KokkosDeviceWrapperNode< execution_space > node_type
Default value of Node template parameter.