Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalFilter_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_LOCALFILTER_DEF_HPP
11 #define IFPACK2_LOCALFILTER_DEF_HPP
12 
13 #include <Ifpack2_LocalFilter_decl.hpp>
14 #include <Tpetra_Map.hpp>
15 #include <Tpetra_MultiVector.hpp>
16 #include <Tpetra_Vector.hpp>
17 
18 #ifdef HAVE_MPI
20 #else
21 # include "Teuchos_DefaultSerialComm.hpp"
22 #endif
23 
24 namespace Ifpack2 {
25 
26 
27 template<class MatrixType>
28 bool
29 LocalFilter<MatrixType>::
30 mapPairsAreFitted (const row_matrix_type& A)
31 {
32  const auto rangeMap = A.getRangeMap();
33  const auto rowMap = A.getRowMap();
34  const bool rangeAndRowFitted = mapPairIsFitted (*rowMap, *rangeMap);
35 
36  const auto domainMap = A.getDomainMap();
37  const auto columnMap = A.getColMap();
38  const bool domainAndColumnFitted = mapPairIsFitted (*columnMap, *domainMap);
39 
40  //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
41  //This means that it can return different values on different ranks. This can cause MPI to hang,
42  //even though it's supposed to terminate globally when any single rank does.
43  //
44  //This function doesn't need to be fast since it's debug-only code.
45  int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
46  int globalSuccess;
47 
48  Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
49 
50  return globalSuccess == 1;
51 }
52 
53 
54 template<class MatrixType>
55 bool
56 LocalFilter<MatrixType>::
57 mapPairIsFitted (const map_type& map1, const map_type& map2)
58 {
59  return map1.isLocallyFitted (map2);
60 }
61 
62 
63 template<class MatrixType>
66  A_ (A),
67  NumNonzeros_ (0),
68  MaxNumEntries_ (0),
69  MaxNumEntriesA_ (0)
70 {
71  using Teuchos::RCP;
72  using Teuchos::rcp;
73 
74 #ifdef HAVE_IFPACK2_DEBUG
75  if(! mapPairsAreFitted (*A))
76  {
77  std::cout << "WARNING: Ifpack2::LocalFilter:\n" <<
78  "A's Map pairs are not fitted to each other on Process "
79  << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
80  "communicator.\n"
81  "This means that LocalFilter may not work with A. "
82  "Please see discussion of Bug 5992.";
83  }
84 #endif // HAVE_IFPACK2_DEBUG
85 
86  // Build the local communicator (containing this process only).
87  RCP<const Teuchos::Comm<int> > localComm;
88 #ifdef HAVE_MPI
89  localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
90 #else
91  localComm = rcp (new Teuchos::SerialComm<int> ());
92 #endif // HAVE_MPI
93 
94 
95  // // FIXME (mfh 21 Nov 2013) Currently, the implementation implicitly
96  // // assumes that the range Map is fitted to the row Map. Otherwise,
97  // // it probably won't work at all.
98  // TEUCHOS_TEST_FOR_EXCEPTION(
99  // mapPairIsFitted (* (A_->getRangeMap ()), * (A_->getRowMap ())),
100  // std::logic_error, "Ifpack2::LocalFilter: Range Map of the input matrix "
101  // "is not fitted to its row Map. LocalFilter does not currently work in "
102  // "this case. See Bug 5992.");
103 
104  // Build the local row, domain, and range Maps. They both use the
105  // local communicator built above. The global indices of each are
106  // different than those of the corresponding original Map; they
107  // actually are the same as the local indices of the original Map.
108  //
109  // It's even OK if signedness of local_ordinal_type and
110  // global_ordinal_type are different. (That would be a BAD IDEA but
111  // some users seem to enjoy making trouble for themselves and us.)
112  //
113  // Both the local row and local range Maps must have the same number
114  // of entries, namely, that of the local number of entries of A's
115  // range Map.
116 
117  const int blockSize = getBlockSize();
118  const size_t numRows = A_->getRangeMap()->getLocalNumElements () / blockSize;
119 
120  const global_ordinal_type indexBase = static_cast<global_ordinal_type> (0);
121 
122  localRowMap_ =
123  rcp (new map_type (numRows, indexBase, localComm,
124  Tpetra::GloballyDistributed));
125  // If the original matrix's range Map is not fitted to its row Map,
126  // we'll have to do an Export when applying the matrix.
127  localRangeMap_ = localRowMap_;
128 
129  // If the original matrix's domain Map is not fitted to its column
130  // Map, we'll have to do an Import when applying the matrix.
131  if (A_->getRangeMap ().getRawPtr () == A_->getDomainMap ().getRawPtr ()) {
132  // The input matrix's domain and range Maps are identical, so the
133  // locally filtered matrix's domain and range Maps can be also.
134  localDomainMap_ = localRangeMap_;
135  }
136  else {
137  const size_t numCols = A_->getDomainMap()->getLocalNumElements () / blockSize;
138  localDomainMap_ =
139  rcp (new map_type (numCols, indexBase, localComm,
140  Tpetra::GloballyDistributed));
141  }
142 
143  // NodeNumEntries_ will contain the actual number of nonzeros for
144  // each localized row (that is, without external nodes, and always
145  // with the diagonal entry)
146  NumEntries_.resize (numRows);
147 
148  // tentative value for MaxNumEntries. This is the number of
149  // nonzeros in the local matrix
150  MaxNumEntries_ = A_->getLocalMaxNumRowEntries ();
151  MaxNumEntriesA_ = A_->getLocalMaxNumRowEntries ();
152 
153  // Allocate temporary arrays for getLocalRowCopy().
154  Kokkos::resize(localIndices_,MaxNumEntries_);
155  Kokkos::resize(localIndicesForGlobalCopy_,MaxNumEntries_);
156  Kokkos::resize(Values_,MaxNumEntries_*blockSize*blockSize);
157 
158  // now compute:
159  // - the number of nonzero per row
160  // - the total number of nonzeros
161  // - the diagonal entries
162 
163  // compute nonzeros (total and per-row), and store the
164  // diagonal entries (already modified)
165  size_t ActualMaxNumEntries = 0;
166 
167  for (size_t i = 0; i < numRows; ++i) {
168  NumEntries_[i] = 0;
169  size_t Nnz, NewNnz = 0;
170  A_->getLocalRowCopy (i, localIndices_, Values_, Nnz);
171  for (size_t j = 0; j < Nnz; ++j) {
172  // FIXME (mfh 03 Apr 2013) This assumes the following:
173  //
174  // 1. Row Map, range Map, and domain Map are all the same.
175  //
176  // 2. The column Map's list of GIDs on this process is the
177  // domain Map's list of GIDs, followed by remote GIDs. Thus,
178  // for any GID in the domain Map on this process, its LID in
179  // the domain Map (and therefore in the row Map, by (1)) is
180  // the same as its LID in the column Map. (Hence the
181  // less-than test, which if true, means that localIndices_[j]
182  // belongs to the row Map.)
183  if (static_cast<size_t> (localIndices_[j]) < numRows) {
184  ++NewNnz;
185  }
186  }
187 
188  if (NewNnz > ActualMaxNumEntries) {
189  ActualMaxNumEntries = NewNnz;
190  }
191 
192  NumNonzeros_ += NewNnz;
193  NumEntries_[i] = NewNnz;
194  }
195 
196  MaxNumEntries_ = ActualMaxNumEntries;
197 }
198 
199 
200 template<class MatrixType>
202 {}
203 
204 
205 template<class MatrixType>
208 {
209  return localRowMap_->getComm ();
210 }
211 
212 
213 
214 
215 template<class MatrixType>
216 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
217  typename MatrixType::global_ordinal_type,
218  typename MatrixType::node_type> >
220 {
221  return localRowMap_;
222 }
223 
224 
225 template<class MatrixType>
226 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
227  typename MatrixType::global_ordinal_type,
228  typename MatrixType::node_type> >
230 {
231  return localRowMap_; // FIXME (mfh 20 Nov 2013)
232 }
233 
234 
235 template<class MatrixType>
236 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
237  typename MatrixType::global_ordinal_type,
238  typename MatrixType::node_type> >
240 {
241  return localDomainMap_;
242 }
243 
244 
245 template<class MatrixType>
246 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
247  typename MatrixType::global_ordinal_type,
248  typename MatrixType::node_type> >
250 {
251  return localRangeMap_;
252 }
253 
254 
255 template<class MatrixType>
256 Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
257  typename MatrixType::global_ordinal_type,
258  typename MatrixType::node_type> >
260 {
261  if (local_graph_ == Teuchos::null) {
262  local_ordinal_type numRows = this->getLocalNumRows();
263  Teuchos::Array<size_t> entriesPerRow(numRows);
264  for(local_ordinal_type i = 0; i < numRows; i++) {
265  entriesPerRow[i] = this->getNumEntriesInLocalRow(i);
266  }
267  Teuchos::RCP<crs_graph_type> local_graph_nc =
268  Teuchos::rcp (new crs_graph_type (this->getRowMap (),
269  this->getColMap (),
270  entriesPerRow()));
271  // copy local indexes into local graph
272  nonconst_local_inds_host_view_type indices("indices",this->getLocalMaxNumRowEntries());
273  nonconst_values_host_view_type values("values",this->getLocalMaxNumRowEntries());
274  for(local_ordinal_type i = 0; i < numRows; i++) {
275  size_t numEntries = 0;
276  this->getLocalRowCopy(i, indices, values, numEntries); // get indices & values
277  local_graph_nc->insertLocalIndices (i, numEntries, indices.data());
278  }
279  local_graph_nc->fillComplete (this->getDomainMap (), this->getRangeMap ());
280  local_graph_ = Teuchos::rcp_const_cast<const crs_graph_type> (local_graph_nc);
281  }
282  return local_graph_;
283 }
284 
285 
286 template<class MatrixType>
288 {
289  return static_cast<global_size_t> (localRangeMap_->getLocalNumElements ());
290 }
291 
292 
293 template<class MatrixType>
295 {
296  return static_cast<global_size_t> (localDomainMap_->getLocalNumElements ());
297 }
298 
299 
300 template<class MatrixType>
302 {
303  return static_cast<size_t> (localRangeMap_->getLocalNumElements ());
304 }
305 
306 
307 template<class MatrixType>
309 {
310  return static_cast<size_t> (localDomainMap_->getLocalNumElements ());
311 }
312 
313 
314 template<class MatrixType>
315 typename MatrixType::global_ordinal_type
317 {
318  return A_->getIndexBase ();
319 }
320 
321 
322 template<class MatrixType>
324 {
325  return NumNonzeros_;
326 }
327 
328 
329 template<class MatrixType>
331 {
332  return NumNonzeros_;
333 }
334 
335 template<class MatrixType>
336 typename MatrixType::local_ordinal_type LocalFilter<MatrixType>::getBlockSize() const
337 {
338  return A_->getBlockSize();
339 }
340 
341 template<class MatrixType>
342 size_t
344 getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
345 {
346  const local_ordinal_type localRow = getRowMap ()->getLocalElement (globalRow);
348  // NOTE (mfh 26 Mar 2014) We return zero if globalRow is not in
349  // the row Map on this process, since "get the number of entries
350  // in the global row" refers only to what the calling process owns
351  // in that row. In this case, it owns no entries in that row,
352  // since it doesn't own the row.
353  return 0;
354  } else {
355  return NumEntries_[localRow];
356  }
357 }
358 
359 
360 template<class MatrixType>
361 size_t
363 getNumEntriesInLocalRow (local_ordinal_type localRow) const
364 {
365  // FIXME (mfh 07 Jul 2014) Shouldn't localRow be a local row index
366  // in the matrix's row Map, not in the LocalFilter's row Map? The
367  // latter is different; it even has different global indices!
368  // (Maybe _that_'s the bug.)
369 
370  if (getRowMap ()->isNodeLocalElement (localRow)) {
371  return NumEntries_[localRow];
372  } else {
373  // NOTE (mfh 26 Mar 2014) We return zero if localRow is not in the
374  // row Map on this process, since "get the number of entries in
375  // the local row" refers only to what the calling process owns in
376  // that row. In this case, it owns no entries in that row, since
377  // it doesn't own the row.
378  return 0;
379  }
380 }
381 
382 
383 template<class MatrixType>
385 {
386  return MaxNumEntries_;
387 }
388 
389 
390 template<class MatrixType>
392 {
393  return MaxNumEntries_;
394 }
395 
396 
397 template<class MatrixType>
399 {
400  return true;
401 }
402 
403 
404 template<class MatrixType>
406 {
407  return A_->isLocallyIndexed ();
408 }
409 
410 
411 template<class MatrixType>
413 {
414  return A_->isGloballyIndexed();
415 }
416 
417 
418 template<class MatrixType>
420 {
421  return A_->isFillComplete ();
422 }
423 
424 
425 template<class MatrixType>
426 void
428  getGlobalRowCopy (global_ordinal_type globalRow,
429  nonconst_global_inds_host_view_type &globalIndices,
430  nonconst_values_host_view_type &values,
431  size_t& numEntries) const
432 {
433  typedef local_ordinal_type LO;
434  typedef typename Teuchos::Array<LO>::size_type size_type;
435 
436  const LO localRow = this->getRowMap ()->getLocalElement (globalRow);
437  if (localRow == Teuchos::OrdinalTraits<LO>::invalid ()) {
438  // NOTE (mfh 26 Mar 2014) We return no entries if globalRow is not
439  // in the row Map on this process, since "get a copy of the
440  // entries in the global row" refers only to what the calling
441  // process owns in that row. In this case, it owns no entries in
442  // that row, since it doesn't own the row.
443  numEntries = 0;
444  }
445  else {
446  // First get a copy of the current row using local indices. Then,
447  // convert to global indices using the input matrix's column Map.
448  //
449  numEntries = this->getNumEntriesInLocalRow (localRow);
450  // FIXME (mfh 26 Mar 2014) If local_ordinal_type ==
451  // global_ordinal_type, we could just alias the input array
452  // instead of allocating a temporary array.
453 
454  // In this case, getLocalRowCopy *does* use the localIndices_, so we use a second temp array
455  this->getLocalRowCopy (localRow, localIndicesForGlobalCopy_, values, numEntries);
456 
457  const map_type& colMap = * (this->getColMap ());
458 
459  // Don't fill the output array beyond its size.
460  const size_type numEnt =
461  std::min (static_cast<size_type> (numEntries),
462  std::min ((size_type)globalIndices.size (), (size_type)values.size ()));
463  for (size_type k = 0; k < numEnt; ++k) {
464  globalIndices[k] = colMap.getGlobalElement (localIndicesForGlobalCopy_[k]);
465  }
466  }
467 }
468 
469 
470 template<class MatrixType>
471 void
473 getLocalRowCopy (local_ordinal_type LocalRow,
474  nonconst_local_inds_host_view_type &Indices,
475  nonconst_values_host_view_type &Values,
476  size_t& NumEntries) const
477 {
478  typedef local_ordinal_type LO;
479  typedef global_ordinal_type GO;
480 
481  if (! A_->getRowMap ()->isNodeLocalElement (LocalRow)) {
482  // The calling process owns zero entries in the row.
483  NumEntries = 0;
484  return;
485  }
486 
487  if (A_->getRowMap()->getComm()->getSize() == 1) {
488  A_->getLocalRowCopy (LocalRow, Indices, Values, NumEntries);
489  return;
490  }
491 
492  const LO blockNumEntr = getBlockSize() * getBlockSize();
493 
494  const size_t numEntInLclRow = NumEntries_[LocalRow];
495  if (static_cast<size_t> (Indices.size ()) < numEntInLclRow ||
496  static_cast<size_t> (Values.size ()) < numEntInLclRow*blockNumEntr) {
497  // FIXME (mfh 07 Jul 2014) Return an error code instead of
498  // throwing. We should really attempt to fill as much space as
499  // we're given, in this case.
501  true, std::runtime_error,
502  "Ifpack2::LocalFilter::getLocalRowCopy: Invalid output array length. "
503  "The output arrays must each have length at least " << numEntInLclRow
504  << " for local row " << LocalRow << " on Process "
505  << localRowMap_->getComm ()->getRank () << ".");
506  }
507  else if (numEntInLclRow == static_cast<size_t> (0)) {
508  // getNumEntriesInLocalRow() returns zero if LocalRow is not owned
509  // by the calling process. In that case, the calling process owns
510  // zero entries in the row.
511  NumEntries = 0;
512  return;
513  }
514 
515  // Always extract using the temporary arrays Values_ and
516  // localIndices_. This is because I may need more space than that
517  // given by the user. The users expects only the local (in the
518  // domain Map) column indices, but I have to extract both local and
519  // remote (not in the domain Map) column indices.
520  //
521  // FIXME (mfh 07 Jul 2014) Use of temporary local storage is not
522  // conducive to thread parallelism. A better way would be to change
523  // the interface so that it only extracts values for the "local"
524  // column indices. CrsMatrix could take a set of column indices,
525  // and return their corresponding values.
526  size_t numEntInMat = 0;
527  A_->getLocalRowCopy (LocalRow, localIndices_, Values_ , numEntInMat);
528 
529  // Fill the user's arrays with the "local" indices and values in
530  // that row. Note that the matrix might have a different column Map
531  // than the local filter.
532  const map_type& matrixDomMap = * (A_->getDomainMap ());
533  const map_type& matrixColMap = * (A_->getColMap ());
534 
535  const size_t capacity = static_cast<size_t> (std::min (Indices.size (),
536  Values.size ()/blockNumEntr));
537  NumEntries = 0;
538  const size_t numRows = localRowMap_->getLocalNumElements (); // superfluous
539  const bool buggy = true; // mfh 07 Jul 2014: See FIXME below.
540  for (size_t j = 0; j < numEntInMat; ++j) {
541  // The LocalFilter only includes entries in the domain Map on
542  // the calling process. We figure out whether an entry is in
543  // the domain Map by converting the (matrix column Map) index to
544  // a global index, and then asking whether that global index is
545  // in the domain Map.
546  const LO matrixLclCol = localIndices_[j];
547  const GO gblCol = matrixColMap.getGlobalElement (matrixLclCol);
548 
549  // FIXME (mfh 07 Jul 2014) This is the likely center of Bug 5992
550  // and perhaps other bugs, like Bug 6117. If 'buggy' is true,
551  // Ifpack2 tests pass; if 'buggy' is false, the tests don't pass.
552  // This suggests that Ifpack2 classes could be using LocalFilter
553  // incorrectly, perhaps by giving it an incorrect domain Map.
554  if (buggy) {
555  // only local indices
556  if ((size_t) localIndices_[j] < numRows) {
557  Indices[NumEntries] = localIndices_[j];
558  for (LO k=0;k<blockNumEntr;++k)
559  Values[NumEntries*blockNumEntr + k] = Values_[j*blockNumEntr + k];
560  NumEntries++;
561  }
562  } else {
563  if (matrixDomMap.isNodeGlobalElement (gblCol)) {
564  // Don't fill more space than the user gave us. It's an error
565  // for them not to give us enough space, but we still shouldn't
566  // overwrite memory that doesn't belong to us.
567  if (NumEntries < capacity) {
568  Indices[NumEntries] = matrixLclCol;
569  for (LO k=0;k<blockNumEntr;++k)
570  Values[NumEntries*blockNumEntr + k] = Values_[j*blockNumEntr + k];
571  }
572  NumEntries++;
573  }
574  }
575  }
576 }
577 
578 
579 template<class MatrixType>
580 void
582 getGlobalRowView (global_ordinal_type /*GlobalRow*/,
583  global_inds_host_view_type &/*indices*/,
584  values_host_view_type &/*values*/) const
585 {
586  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
587  "Ifpack2::LocalFilter does not implement getGlobalRowView.");
588 }
589 
590 
591 template<class MatrixType>
592 void
594 getLocalRowView (local_ordinal_type /*LocalRow*/,
595  local_inds_host_view_type &/*indices*/,
596  values_host_view_type &/*values*/) const
597 {
598  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
599  "Ifpack2::LocalFilter does not implement getLocalRowView.");
600 }
601 
602 
603 template<class MatrixType>
604 void
606 getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& diag) const
607 {
608  using Teuchos::RCP;
609  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
610  global_ordinal_type, node_type> vector_type;
611  // This is always correct, and doesn't require a collective check
612  // for sameness of Maps.
613  RCP<vector_type> diagView = diag.offsetViewNonConst (A_->getRowMap (), 0);
614  A_->getLocalDiagCopy (*diagView);
615 }
616 
617 
618 template<class MatrixType>
619 void
621 leftScale (const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
622 {
623  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
624  "Ifpack2::LocalFilter does not implement leftScale.");
625 }
626 
627 
628 template<class MatrixType>
629 void
631 rightScale (const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
632 {
633  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
634  "Ifpack2::LocalFilter does not implement rightScale.");
635 }
636 
637 
638 template<class MatrixType>
639 void
641 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
642  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
643  Teuchos::ETransp mode,
644  scalar_type alpha,
645  scalar_type beta) const
646 {
647  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
649  X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
650  "Ifpack2::LocalFilter::apply: X and Y must have the same number of columns. "
651  "X has " << X.getNumVectors () << " columns, but Y has "
652  << Y.getNumVectors () << " columns.");
653 
654 #ifdef HAVE_IFPACK2_DEBUG
655  {
657  Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
658  X.norm1 (norms ());
659  bool good = true;
660  for (size_t j = 0; j < X.getNumVectors (); ++j) {
661  if (STM::isnaninf (norms[j])) {
662  good = false;
663  break;
664  }
665  }
666  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::LocalFilter::apply: The 1-norm of the input X is NaN or Inf.");
667  }
668 #endif // HAVE_IFPACK2_DEBUG
669 
671  getBlockSize() > 1, std::runtime_error,
672  "Ifpack2::LocalFilter::apply: Block size greater than zero is not yet supported for "
673  "LocalFilter::apply. Please contact an Ifpack2 developer to request this feature.");
674 
675  if (&X == &Y) {
676  // FIXME (mfh 23 Apr 2014) We have to do more work to figure out
677  // if X and Y alias one another. For example, we should check
678  // whether one is a noncontiguous view of the other.
679  //
680  // X and Y alias one another, so we have to copy X.
681  MV X_copy (X, Teuchos::Copy);
682  applyNonAliased (X_copy, Y, mode, alpha, beta);
683  } else {
684  applyNonAliased (X, Y, mode, alpha, beta);
685  }
686 
687 #ifdef HAVE_IFPACK2_DEBUG
688  {
690  Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
691  Y.norm1 (norms ());
692  bool good = true;
693  for (size_t j = 0; j < Y.getNumVectors (); ++j) {
694  if (STM::isnaninf (norms[j])) {
695  good = false;
696  break;
697  }
698  }
699  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::LocalFilter::apply: The 1-norm of the output Y is NaN or Inf.");
700  }
701 #endif // HAVE_IFPACK2_DEBUG
702 }
703 
704 template<class MatrixType>
705 void
707 applyNonAliased (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
708  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
709  Teuchos::ETransp mode,
710  scalar_type alpha,
711  scalar_type beta) const
712 {
713  using Teuchos::ArrayView;
714  using Teuchos::ArrayRCP;
716 
717  const scalar_type zero = STS::zero ();
718  const scalar_type one = STS::one ();
719 
720  if (beta == zero) {
721  Y.putScalar (zero);
722  }
723  else if (beta != one) {
724  Y.scale (beta);
725  }
726 
727  const size_t NumVectors = Y.getNumVectors ();
728  const size_t numRows = localRowMap_->getLocalNumElements ();
729 
730  // FIXME (mfh 14 Apr 2014) At some point, we would like to
731  // parallelize this using Kokkos. This would require a
732  // Kokkos-friendly version of getLocalRowCopy, perhaps with
733  // thread-local storage.
734 
735  const bool constantStride = X.isConstantStride () && Y.isConstantStride ();
736  if (constantStride) {
737  // Since both X and Y have constant stride, we can work with "1-D"
738  // views of their data.
739  const size_t x_stride = X.getStride();
740  const size_t y_stride = Y.getStride();
741  ArrayRCP<scalar_type> y_rcp = Y.get1dViewNonConst();
742  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
743  ArrayView<scalar_type> y_ptr = y_rcp();
744  ArrayView<const scalar_type> x_ptr = x_rcp();
745  for (size_t i = 0; i < numRows; ++i) {
746  size_t Nnz;
747  // Use this class's getrow to make the below code simpler
748  getLocalRowCopy (i, localIndices_ , Values_ , Nnz);
749  scalar_type* Values = reinterpret_cast<scalar_type*>(Values_.data());
750  if (mode == Teuchos::NO_TRANS) {
751  for (size_t j = 0; j < Nnz; ++j) {
752  const local_ordinal_type col = localIndices_[j];
753  for (size_t k = 0; k < NumVectors; ++k) {
754  y_ptr[i + y_stride*k] +=
755  alpha * Values[j] * x_ptr[col + x_stride*k];
756  }
757  }
758  }
759  else if (mode == Teuchos::TRANS) {
760  for (size_t j = 0; j < Nnz; ++j) {
761  const local_ordinal_type col = localIndices_[j];
762  for (size_t k = 0; k < NumVectors; ++k) {
763  y_ptr[col + y_stride*k] +=
764  alpha * Values[j] * x_ptr[i + x_stride*k];
765  }
766  }
767  }
768  else { //mode==Teuchos::CONJ_TRANS
769  for (size_t j = 0; j < Nnz; ++j) {
770  const local_ordinal_type col = localIndices_[j];
771  for (size_t k = 0; k < NumVectors; ++k) {
772  y_ptr[col + y_stride*k] +=
773  alpha * STS::conjugate (Values[j]) * x_ptr[i + x_stride*k];
774  }
775  }
776  }
777  }
778  }
779  else {
780  // At least one of X or Y does not have constant stride.
781  // This means we must work with "2-D" views of their data.
782  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView();
783  ArrayRCP<ArrayRCP<scalar_type> > y_ptr = Y.get2dViewNonConst();
784 
785  for (size_t i = 0; i < numRows; ++i) {
786  size_t Nnz;
787  // Use this class's getrow to make the below code simpler
788  getLocalRowCopy (i, localIndices_ , Values_ , Nnz);
789  scalar_type* Values = reinterpret_cast<scalar_type*>(Values_.data());
790  if (mode == Teuchos::NO_TRANS) {
791  for (size_t k = 0; k < NumVectors; ++k) {
792  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
793  ArrayView<scalar_type> y_local = (y_ptr())[k]();
794  for (size_t j = 0; j < Nnz; ++j) {
795  y_local[i] += alpha * Values[j] * x_local[localIndices_[j]];
796  }
797  }
798  }
799  else if (mode == Teuchos::TRANS) {
800  for (size_t k = 0; k < NumVectors; ++k) {
801  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
802  ArrayView<scalar_type> y_local = (y_ptr())[k]();
803  for (size_t j = 0; j < Nnz; ++j) {
804  y_local[localIndices_[j]] += alpha * Values[j] * x_local[i];
805  }
806  }
807  }
808  else { //mode==Teuchos::CONJ_TRANS
809  for (size_t k = 0; k < NumVectors; ++k) {
810  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
811  ArrayView<scalar_type> y_local = (y_ptr())[k]();
812  for (size_t j = 0; j < Nnz; ++j) {
813  y_local[localIndices_[j]] +=
814  alpha * STS::conjugate (Values[j]) * x_local[i];
815  }
816  }
817  }
818  }
819  }
820 }
821 
822 template<class MatrixType>
824 {
825  return true;
826 }
827 
828 
829 template<class MatrixType>
831 {
832  return false;
833 }
834 
835 
836 template<class MatrixType>
837 typename
838 LocalFilter<MatrixType>::mag_type
840 {
841  typedef Kokkos::ArithTraits<scalar_type> STS;
842  typedef Kokkos::ArithTraits<mag_type> STM;
843  typedef typename Teuchos::Array<scalar_type>::size_type size_type;
844 
845  const size_type maxNumRowEnt = getLocalMaxNumRowEntries ();
846  const local_ordinal_type blockNumEntr = getBlockSize() * getBlockSize();
847  nonconst_local_inds_host_view_type ind ("ind",maxNumRowEnt);
848  nonconst_values_host_view_type val ("val",maxNumRowEnt*blockNumEntr);
849  const size_t numRows = static_cast<size_t> (localRowMap_->getLocalNumElements ());
850 
851  // FIXME (mfh 03 Apr 2013) Scale during sum to avoid overflow.
852  mag_type sumSquared = STM::zero ();
853  for (size_t i = 0; i < numRows; ++i) {
854  size_t numEntries = 0;
855  this->getLocalRowCopy (i, ind, val, numEntries);
856  for (size_type k = 0; k < static_cast<size_type> (numEntries*blockNumEntr); ++k) {
857  const mag_type v_k_abs = STS::magnitude (val[k]);
858  sumSquared += v_k_abs * v_k_abs;
859  }
860  }
861  return STM::squareroot (sumSquared); // Different for each process; that's OK.
862 }
863 
864 template<class MatrixType>
865 std::string
867 {
869  std::ostringstream os;
870 
871  os << "Ifpack2::LocalFilter: {";
872  os << "MatrixType: " << TypeNameTraits<MatrixType>::name ();
873  if (this->getObjectLabel () != "") {
874  os << ", Label: \"" << this->getObjectLabel () << "\"";
875  }
876  os << ", Number of rows: " << getGlobalNumRows ()
877  << ", Number of columns: " << getGlobalNumCols ()
878  << "}";
879  return os.str ();
880 }
881 
882 
883 template<class MatrixType>
884 void
887  const Teuchos::EVerbosityLevel verbLevel) const
888 {
889  using Teuchos::OSTab;
891  using std::endl;
892 
893  const Teuchos::EVerbosityLevel vl =
894  (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
895 
896  if (vl > Teuchos::VERB_NONE) {
897  // describe() starts with a tab, by convention.
898  OSTab tab0 (out);
899 
900  out << "Ifpack2::LocalFilter:" << endl;
901  OSTab tab1 (out);
902  out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
903  if (this->getObjectLabel () != "") {
904  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
905  }
906  out << "Number of rows: " << getGlobalNumRows () << endl
907  << "Number of columns: " << getGlobalNumCols () << endl
908  << "Number of nonzeros: " << NumNonzeros_ << endl;
909 
910  if (vl > Teuchos::VERB_LOW) {
911  out << "Row Map:" << endl;
912  localRowMap_->describe (out, vl);
913  out << "Domain Map:" << endl;
914  localDomainMap_->describe (out, vl);
915  out << "Range Map:" << endl;
916  localRangeMap_->describe (out, vl);
917  }
918  }
919 }
920 
921 template<class MatrixType>
922 Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type,
923  typename MatrixType::local_ordinal_type,
924  typename MatrixType::global_ordinal_type,
925  typename MatrixType::node_type> >
927 {
928  return A_;
929 }
930 
931 
932 } // namespace Ifpack2
933 
934 #define IFPACK2_LOCALFILTER_INSTANT(S,LO,GO,N) \
935  template class Ifpack2::LocalFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
936 
937 #endif
virtual void rightScale(const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &x)
Scales the RowMatrix on the right with the Vector x.
Definition: Ifpack2_LocalFilter_def.hpp:631
virtual Teuchos::RCP< const map_type > getRowMap() const
Returns the Map that describes the row distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:219
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object to the given output stream.
Definition: Ifpack2_LocalFilter_def.hpp:886
basic_OSTab< char > OSTab
virtual Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map that describes the domain distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:239
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual size_t getLocalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on this process.
Definition: Ifpack2_LocalFilter_def.hpp:391
virtual void getGlobalRowCopy(global_ordinal_type GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const
Get the entries in the given row, using global indices.
Definition: Ifpack2_LocalFilter_def.hpp:428
virtual bool isFillComplete() const
Returns true if fillComplete() has been called.
Definition: Ifpack2_LocalFilter_def.hpp:419
virtual void getLocalRowCopy(local_ordinal_type LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const
Get the entries in the given row, using local indices.
Definition: Ifpack2_LocalFilter_def.hpp:473
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the communicator.
Definition: Ifpack2_LocalFilter_def.hpp:207
virtual void getGlobalRowView(global_ordinal_type GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const
Extract a const, non-persisting view of global indices in a specified row of the matrix.
Definition: Ifpack2_LocalFilter_def.hpp:582
virtual void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Compute Y = beta*Y + alpha*A_local*X.
Definition: Ifpack2_LocalFilter_def.hpp:641
virtual bool hasTransposeApply() const
Whether this operator supports applying the transpose or conjugate transpose.
Definition: Ifpack2_LocalFilter_def.hpp:823
virtual bool isLocallyIndexed() const
Whether the underlying sparse matrix is locally (opposite of globally) indexed.
Definition: Ifpack2_LocalFilter_def.hpp:405
virtual void getLocalDiagCopy(Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &diag) const
Get the diagonal entries of the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:606
virtual size_t getLocalNumEntries() const
Returns the local number of entries in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:330
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Type of the Tpetra::Map specialization that this class uses.
Definition: Ifpack2_LocalFilter_decl.hpp:189
T * getRawPtr() const
virtual mag_type getFrobeniusNorm() const
The Frobenius norm of the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:839
virtual Teuchos::RCP< const Tpetra::RowGraph< local_ordinal_type, global_ordinal_type, node_type > > getGraph() const
The (locally filtered) matrix&#39;s graph.
Definition: Ifpack2_LocalFilter_def.hpp:259
Ordinal size_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual bool supportsRowViews() const
Returns true if RowViews are supported.
Definition: Ifpack2_LocalFilter_def.hpp:830
virtual size_t getLocalNumCols() const
The number of columns in the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:308
virtual size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const
The current number of entries on this node in the specified global row.
Definition: Ifpack2_LocalFilter_def.hpp:344
virtual void leftScale(const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &x)
Scales the RowMatrix on the left with the Vector x.
Definition: Ifpack2_LocalFilter_def.hpp:621
virtual global_ordinal_type getIndexBase() const
Returns the index base for global indices for this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:316
virtual bool isGloballyIndexed() const
Whether the underlying sparse matrix is globally (opposite of locally) indexed.
Definition: Ifpack2_LocalFilter_def.hpp:412
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on all processes.
Definition: Ifpack2_LocalFilter_def.hpp:384
LocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_LocalFilter_def.hpp:65
virtual ~LocalFilter()
Destructor.
Definition: Ifpack2_LocalFilter_def.hpp:201
virtual global_size_t getGlobalNumRows() const
The number of global rows in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:287
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalFilter_def.hpp:866
virtual size_t getLocalNumRows() const
The number of rows owned on the calling process.
Definition: Ifpack2_LocalFilter_def.hpp:301
virtual size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const
The current number of entries on this node in the specified local row.
Definition: Ifpack2_LocalFilter_def.hpp:363
virtual Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map that describes the range distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:249
virtual global_size_t getGlobalNumEntries() const
Returns the global number of entries in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:323
virtual Teuchos::RCP< const row_matrix_type > getUnderlyingMatrix() const
Return matrix that LocalFilter was built on.
Definition: Ifpack2_LocalFilter_def.hpp:926
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:128
virtual global_size_t getGlobalNumCols() const
The number of global columns in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:294
virtual Teuchos::RCP< const map_type > getColMap() const
Returns the Map that describes the column distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:229
virtual void getLocalRowView(local_ordinal_type LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const
Extract a const, non-persisting view of local indices in a specified row of the matrix.
Definition: Ifpack2_LocalFilter_def.hpp:594
virtual bool hasColMap() const
Whether this matrix has a well-defined column Map.
Definition: Ifpack2_LocalFilter_def.hpp:398
virtual local_ordinal_type getBlockSize() const
The number of degrees of freedom per mesh point.
Definition: Ifpack2_LocalFilter_def.hpp:336