Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalFilter_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_LOCALFILTER_DEF_HPP
44 #define IFPACK2_LOCALFILTER_DEF_HPP
45 
46 #include <Ifpack2_LocalFilter_decl.hpp>
47 #include <Tpetra_Map.hpp>
48 #include <Tpetra_MultiVector.hpp>
49 #include <Tpetra_Vector.hpp>
50 
51 #ifdef HAVE_MPI
53 #else
54 # include "Teuchos_DefaultSerialComm.hpp"
55 #endif
56 
57 namespace Ifpack2 {
58 
59 
60 template<class MatrixType>
61 bool
62 LocalFilter<MatrixType>::
63 mapPairsAreFitted (const row_matrix_type& A)
64 {
65  const auto rangeMap = A.getRangeMap();
66  const auto rowMap = A.getRowMap();
67  const bool rangeAndRowFitted = mapPairIsFitted (*rowMap, *rangeMap);
68 
69  const auto domainMap = A.getDomainMap();
70  const auto columnMap = A.getColMap();
71  const bool domainAndColumnFitted = mapPairIsFitted (*columnMap, *domainMap);
72 
73  //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
74  //This means that it can return different values on different ranks. This can cause MPI to hang,
75  //even though it's supposed to terminate globally when any single rank does.
76  //
77  //This function doesn't need to be fast since it's debug-only code.
78  int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
79  int globalSuccess;
80 
81  Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
82 
83  return globalSuccess == 1;
84 }
85 
86 
87 template<class MatrixType>
88 bool
89 LocalFilter<MatrixType>::
90 mapPairIsFitted (const map_type& map1, const map_type& map2)
91 {
92  return map1.isLocallyFitted (map2);
93 }
94 
95 
96 template<class MatrixType>
99  A_ (A),
100  NumNonzeros_ (0),
101  MaxNumEntries_ (0),
102  MaxNumEntriesA_ (0)
103 {
104  using Teuchos::RCP;
105  using Teuchos::rcp;
106 
107 #ifdef HAVE_IFPACK2_DEBUG
108  if(! mapPairsAreFitted (*A))
109  {
110  std::cout << "WARNING: Ifpack2::LocalFilter:\n" <<
111  "A's Map pairs are not fitted to each other on Process "
112  << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
113  "communicator.\n"
114  "This means that LocalFilter may not work with A. "
115  "Please see discussion of Bug 5992.";
116  }
117 #endif // HAVE_IFPACK2_DEBUG
118 
119  // Build the local communicator (containing this process only).
120  RCP<const Teuchos::Comm<int> > localComm;
121 #ifdef HAVE_MPI
122  localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
123 #else
124  localComm = rcp (new Teuchos::SerialComm<int> ());
125 #endif // HAVE_MPI
126 
127 
128  // // FIXME (mfh 21 Nov 2013) Currently, the implementation implicitly
129  // // assumes that the range Map is fitted to the row Map. Otherwise,
130  // // it probably won't work at all.
131  // TEUCHOS_TEST_FOR_EXCEPTION(
132  // mapPairIsFitted (* (A_->getRangeMap ()), * (A_->getRowMap ())),
133  // std::logic_error, "Ifpack2::LocalFilter: Range Map of the input matrix "
134  // "is not fitted to its row Map. LocalFilter does not currently work in "
135  // "this case. See Bug 5992.");
136 
137  // Build the local row, domain, and range Maps. They both use the
138  // local communicator built above. The global indices of each are
139  // different than those of the corresponding original Map; they
140  // actually are the same as the local indices of the original Map.
141  //
142  // It's even OK if signedness of local_ordinal_type and
143  // global_ordinal_type are different. (That would be a BAD IDEA but
144  // some users seem to enjoy making trouble for themselves and us.)
145  //
146  // Both the local row and local range Maps must have the same number
147  // of entries, namely, that of the local number of entries of A's
148  // range Map.
149 
150  const int blockSize = getBlockSize();
151  const size_t numRows = A_->getRangeMap()->getLocalNumElements () / blockSize;
152 
153  const global_ordinal_type indexBase = static_cast<global_ordinal_type> (0);
154 
155  localRowMap_ =
156  rcp (new map_type (numRows, indexBase, localComm,
157  Tpetra::GloballyDistributed));
158  // If the original matrix's range Map is not fitted to its row Map,
159  // we'll have to do an Export when applying the matrix.
160  localRangeMap_ = localRowMap_;
161 
162  // If the original matrix's domain Map is not fitted to its column
163  // Map, we'll have to do an Import when applying the matrix.
164  if (A_->getRangeMap ().getRawPtr () == A_->getDomainMap ().getRawPtr ()) {
165  // The input matrix's domain and range Maps are identical, so the
166  // locally filtered matrix's domain and range Maps can be also.
167  localDomainMap_ = localRangeMap_;
168  }
169  else {
170  const size_t numCols = A_->getDomainMap()->getLocalNumElements () / blockSize;
171  localDomainMap_ =
172  rcp (new map_type (numCols, indexBase, localComm,
173  Tpetra::GloballyDistributed));
174  }
175 
176  // NodeNumEntries_ will contain the actual number of nonzeros for
177  // each localized row (that is, without external nodes, and always
178  // with the diagonal entry)
179  NumEntries_.resize (numRows);
180 
181  // tentative value for MaxNumEntries. This is the number of
182  // nonzeros in the local matrix
183  MaxNumEntries_ = A_->getLocalMaxNumRowEntries ();
184  MaxNumEntriesA_ = A_->getLocalMaxNumRowEntries ();
185 
186  // Allocate temporary arrays for getLocalRowCopy().
187  Kokkos::resize(localIndices_,MaxNumEntries_);
188  Kokkos::resize(localIndicesForGlobalCopy_,MaxNumEntries_);
189  Kokkos::resize(Values_,MaxNumEntries_*blockSize*blockSize);
190 
191  // now compute:
192  // - the number of nonzero per row
193  // - the total number of nonzeros
194  // - the diagonal entries
195 
196  // compute nonzeros (total and per-row), and store the
197  // diagonal entries (already modified)
198  size_t ActualMaxNumEntries = 0;
199 
200  for (size_t i = 0; i < numRows; ++i) {
201  NumEntries_[i] = 0;
202  size_t Nnz, NewNnz = 0;
203  A_->getLocalRowCopy (i, localIndices_, Values_, Nnz);
204  for (size_t j = 0; j < Nnz; ++j) {
205  // FIXME (mfh 03 Apr 2013) This assumes the following:
206  //
207  // 1. Row Map, range Map, and domain Map are all the same.
208  //
209  // 2. The column Map's list of GIDs on this process is the
210  // domain Map's list of GIDs, followed by remote GIDs. Thus,
211  // for any GID in the domain Map on this process, its LID in
212  // the domain Map (and therefore in the row Map, by (1)) is
213  // the same as its LID in the column Map. (Hence the
214  // less-than test, which if true, means that localIndices_[j]
215  // belongs to the row Map.)
216  if (static_cast<size_t> (localIndices_[j]) < numRows) {
217  ++NewNnz;
218  }
219  }
220 
221  if (NewNnz > ActualMaxNumEntries) {
222  ActualMaxNumEntries = NewNnz;
223  }
224 
225  NumNonzeros_ += NewNnz;
226  NumEntries_[i] = NewNnz;
227  }
228 
229  MaxNumEntries_ = ActualMaxNumEntries;
230 }
231 
232 
233 template<class MatrixType>
235 {}
236 
237 
238 template<class MatrixType>
241 {
242  return localRowMap_->getComm ();
243 }
244 
245 
246 
247 
248 template<class MatrixType>
249 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
250  typename MatrixType::global_ordinal_type,
251  typename MatrixType::node_type> >
253 {
254  return localRowMap_;
255 }
256 
257 
258 template<class MatrixType>
259 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
260  typename MatrixType::global_ordinal_type,
261  typename MatrixType::node_type> >
263 {
264  return localRowMap_; // FIXME (mfh 20 Nov 2013)
265 }
266 
267 
268 template<class MatrixType>
269 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
270  typename MatrixType::global_ordinal_type,
271  typename MatrixType::node_type> >
273 {
274  return localDomainMap_;
275 }
276 
277 
278 template<class MatrixType>
279 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
280  typename MatrixType::global_ordinal_type,
281  typename MatrixType::node_type> >
283 {
284  return localRangeMap_;
285 }
286 
287 
288 template<class MatrixType>
289 Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
290  typename MatrixType::global_ordinal_type,
291  typename MatrixType::node_type> >
293 {
294  if (local_graph_ == Teuchos::null) {
295  local_ordinal_type numRows = this->getLocalNumRows();
296  Teuchos::Array<size_t> entriesPerRow(numRows);
297  for(local_ordinal_type i = 0; i < numRows; i++) {
298  entriesPerRow[i] = this->getNumEntriesInLocalRow(i);
299  }
300  Teuchos::RCP<crs_graph_type> local_graph_nc =
301  Teuchos::rcp (new crs_graph_type (this->getRowMap (),
302  this->getColMap (),
303  entriesPerRow()));
304  // copy local indexes into local graph
305  nonconst_local_inds_host_view_type indices("indices",this->getLocalMaxNumRowEntries());
306  nonconst_values_host_view_type values("values",this->getLocalMaxNumRowEntries());
307  for(local_ordinal_type i = 0; i < numRows; i++) {
308  size_t numEntries = 0;
309  this->getLocalRowCopy(i, indices, values, numEntries); // get indices & values
310  local_graph_nc->insertLocalIndices (i, numEntries, indices.data());
311  }
312  local_graph_nc->fillComplete (this->getDomainMap (), this->getRangeMap ());
313  local_graph_ = Teuchos::rcp_const_cast<const crs_graph_type> (local_graph_nc);
314  }
315  return local_graph_;
316 }
317 
318 
319 template<class MatrixType>
321 {
322  return static_cast<global_size_t> (localRangeMap_->getLocalNumElements ());
323 }
324 
325 
326 template<class MatrixType>
328 {
329  return static_cast<global_size_t> (localDomainMap_->getLocalNumElements ());
330 }
331 
332 
333 template<class MatrixType>
335 {
336  return static_cast<size_t> (localRangeMap_->getLocalNumElements ());
337 }
338 
339 
340 template<class MatrixType>
342 {
343  return static_cast<size_t> (localDomainMap_->getLocalNumElements ());
344 }
345 
346 
347 template<class MatrixType>
348 typename MatrixType::global_ordinal_type
350 {
351  return A_->getIndexBase ();
352 }
353 
354 
355 template<class MatrixType>
357 {
358  return NumNonzeros_;
359 }
360 
361 
362 template<class MatrixType>
364 {
365  return NumNonzeros_;
366 }
367 
368 template<class MatrixType>
369 typename MatrixType::local_ordinal_type LocalFilter<MatrixType>::getBlockSize() const
370 {
371  return A_->getBlockSize();
372 }
373 
374 template<class MatrixType>
375 size_t
377 getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
378 {
379  const local_ordinal_type localRow = getRowMap ()->getLocalElement (globalRow);
381  // NOTE (mfh 26 Mar 2014) We return zero if globalRow is not in
382  // the row Map on this process, since "get the number of entries
383  // in the global row" refers only to what the calling process owns
384  // in that row. In this case, it owns no entries in that row,
385  // since it doesn't own the row.
386  return 0;
387  } else {
388  return NumEntries_[localRow];
389  }
390 }
391 
392 
393 template<class MatrixType>
394 size_t
396 getNumEntriesInLocalRow (local_ordinal_type localRow) const
397 {
398  // FIXME (mfh 07 Jul 2014) Shouldn't localRow be a local row index
399  // in the matrix's row Map, not in the LocalFilter's row Map? The
400  // latter is different; it even has different global indices!
401  // (Maybe _that_'s the bug.)
402 
403  if (getRowMap ()->isNodeLocalElement (localRow)) {
404  return NumEntries_[localRow];
405  } else {
406  // NOTE (mfh 26 Mar 2014) We return zero if localRow is not in the
407  // row Map on this process, since "get the number of entries in
408  // the local row" refers only to what the calling process owns in
409  // that row. In this case, it owns no entries in that row, since
410  // it doesn't own the row.
411  return 0;
412  }
413 }
414 
415 
416 template<class MatrixType>
418 {
419  return MaxNumEntries_;
420 }
421 
422 
423 template<class MatrixType>
425 {
426  return MaxNumEntries_;
427 }
428 
429 
430 template<class MatrixType>
432 {
433  return true;
434 }
435 
436 
437 template<class MatrixType>
439 {
440  return A_->isLocallyIndexed ();
441 }
442 
443 
444 template<class MatrixType>
446 {
447  return A_->isGloballyIndexed();
448 }
449 
450 
451 template<class MatrixType>
453 {
454  return A_->isFillComplete ();
455 }
456 
457 
458 template<class MatrixType>
459 void
461  getGlobalRowCopy (global_ordinal_type globalRow,
462  nonconst_global_inds_host_view_type &globalIndices,
463  nonconst_values_host_view_type &values,
464  size_t& numEntries) const
465 {
466  typedef local_ordinal_type LO;
467  typedef typename Teuchos::Array<LO>::size_type size_type;
468 
469  const LO localRow = this->getRowMap ()->getLocalElement (globalRow);
470  if (localRow == Teuchos::OrdinalTraits<LO>::invalid ()) {
471  // NOTE (mfh 26 Mar 2014) We return no entries if globalRow is not
472  // in the row Map on this process, since "get a copy of the
473  // entries in the global row" refers only to what the calling
474  // process owns in that row. In this case, it owns no entries in
475  // that row, since it doesn't own the row.
476  numEntries = 0;
477  }
478  else {
479  // First get a copy of the current row using local indices. Then,
480  // convert to global indices using the input matrix's column Map.
481  //
482  numEntries = this->getNumEntriesInLocalRow (localRow);
483  // FIXME (mfh 26 Mar 2014) If local_ordinal_type ==
484  // global_ordinal_type, we could just alias the input array
485  // instead of allocating a temporary array.
486 
487  // In this case, getLocalRowCopy *does* use the localIndices_, so we use a second temp array
488  this->getLocalRowCopy (localRow, localIndicesForGlobalCopy_, values, numEntries);
489 
490  const map_type& colMap = * (this->getColMap ());
491 
492  // Don't fill the output array beyond its size.
493  const size_type numEnt =
494  std::min (static_cast<size_type> (numEntries),
495  std::min ((size_type)globalIndices.size (), (size_type)values.size ()));
496  for (size_type k = 0; k < numEnt; ++k) {
497  globalIndices[k] = colMap.getGlobalElement (localIndicesForGlobalCopy_[k]);
498  }
499  }
500 }
501 
502 
503 template<class MatrixType>
504 void
506 getLocalRowCopy (local_ordinal_type LocalRow,
507  nonconst_local_inds_host_view_type &Indices,
508  nonconst_values_host_view_type &Values,
509  size_t& NumEntries) const
510 {
511  typedef local_ordinal_type LO;
512  typedef global_ordinal_type GO;
513 
514  if (! A_->getRowMap ()->isNodeLocalElement (LocalRow)) {
515  // The calling process owns zero entries in the row.
516  NumEntries = 0;
517  return;
518  }
519 
520  if (A_->getRowMap()->getComm()->getSize() == 1) {
521  A_->getLocalRowCopy (LocalRow, Indices, Values, NumEntries);
522  return;
523  }
524 
525  const LO blockNumEntr = getBlockSize() * getBlockSize();
526 
527  const size_t numEntInLclRow = NumEntries_[LocalRow];
528  if (static_cast<size_t> (Indices.size ()) < numEntInLclRow ||
529  static_cast<size_t> (Values.size ()) < numEntInLclRow*blockNumEntr) {
530  // FIXME (mfh 07 Jul 2014) Return an error code instead of
531  // throwing. We should really attempt to fill as much space as
532  // we're given, in this case.
534  true, std::runtime_error,
535  "Ifpack2::LocalFilter::getLocalRowCopy: Invalid output array length. "
536  "The output arrays must each have length at least " << numEntInLclRow
537  << " for local row " << LocalRow << " on Process "
538  << localRowMap_->getComm ()->getRank () << ".");
539  }
540  else if (numEntInLclRow == static_cast<size_t> (0)) {
541  // getNumEntriesInLocalRow() returns zero if LocalRow is not owned
542  // by the calling process. In that case, the calling process owns
543  // zero entries in the row.
544  NumEntries = 0;
545  return;
546  }
547 
548  // Always extract using the temporary arrays Values_ and
549  // localIndices_. This is because I may need more space than that
550  // given by the user. The users expects only the local (in the
551  // domain Map) column indices, but I have to extract both local and
552  // remote (not in the domain Map) column indices.
553  //
554  // FIXME (mfh 07 Jul 2014) Use of temporary local storage is not
555  // conducive to thread parallelism. A better way would be to change
556  // the interface so that it only extracts values for the "local"
557  // column indices. CrsMatrix could take a set of column indices,
558  // and return their corresponding values.
559  size_t numEntInMat = 0;
560  A_->getLocalRowCopy (LocalRow, localIndices_, Values_ , numEntInMat);
561 
562  // Fill the user's arrays with the "local" indices and values in
563  // that row. Note that the matrix might have a different column Map
564  // than the local filter.
565  const map_type& matrixDomMap = * (A_->getDomainMap ());
566  const map_type& matrixColMap = * (A_->getColMap ());
567 
568  const size_t capacity = static_cast<size_t> (std::min (Indices.size (),
569  Values.size ()/blockNumEntr));
570  NumEntries = 0;
571  const size_t numRows = localRowMap_->getLocalNumElements (); // superfluous
572  const bool buggy = true; // mfh 07 Jul 2014: See FIXME below.
573  for (size_t j = 0; j < numEntInMat; ++j) {
574  // The LocalFilter only includes entries in the domain Map on
575  // the calling process. We figure out whether an entry is in
576  // the domain Map by converting the (matrix column Map) index to
577  // a global index, and then asking whether that global index is
578  // in the domain Map.
579  const LO matrixLclCol = localIndices_[j];
580  const GO gblCol = matrixColMap.getGlobalElement (matrixLclCol);
581 
582  // FIXME (mfh 07 Jul 2014) This is the likely center of Bug 5992
583  // and perhaps other bugs, like Bug 6117. If 'buggy' is true,
584  // Ifpack2 tests pass; if 'buggy' is false, the tests don't pass.
585  // This suggests that Ifpack2 classes could be using LocalFilter
586  // incorrectly, perhaps by giving it an incorrect domain Map.
587  if (buggy) {
588  // only local indices
589  if ((size_t) localIndices_[j] < numRows) {
590  Indices[NumEntries] = localIndices_[j];
591  for (LO k=0;k<blockNumEntr;++k)
592  Values[NumEntries*blockNumEntr + k] = Values_[j*blockNumEntr + k];
593  NumEntries++;
594  }
595  } else {
596  if (matrixDomMap.isNodeGlobalElement (gblCol)) {
597  // Don't fill more space than the user gave us. It's an error
598  // for them not to give us enough space, but we still shouldn't
599  // overwrite memory that doesn't belong to us.
600  if (NumEntries < capacity) {
601  Indices[NumEntries] = matrixLclCol;
602  for (LO k=0;k<blockNumEntr;++k)
603  Values[NumEntries*blockNumEntr + k] = Values_[j*blockNumEntr + k];
604  }
605  NumEntries++;
606  }
607  }
608  }
609 }
610 
611 
612 template<class MatrixType>
613 void
615 getGlobalRowView (global_ordinal_type /*GlobalRow*/,
616  global_inds_host_view_type &/*indices*/,
617  values_host_view_type &/*values*/) const
618 {
619  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
620  "Ifpack2::LocalFilter does not implement getGlobalRowView.");
621 }
622 
623 
624 template<class MatrixType>
625 void
627 getLocalRowView (local_ordinal_type /*LocalRow*/,
628  local_inds_host_view_type &/*indices*/,
629  values_host_view_type &/*values*/) const
630 {
631  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
632  "Ifpack2::LocalFilter does not implement getLocalRowView.");
633 }
634 
635 
636 template<class MatrixType>
637 void
639 getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& diag) const
640 {
641  using Teuchos::RCP;
642  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
643  global_ordinal_type, node_type> vector_type;
644  // This is always correct, and doesn't require a collective check
645  // for sameness of Maps.
646  RCP<vector_type> diagView = diag.offsetViewNonConst (A_->getRowMap (), 0);
647  A_->getLocalDiagCopy (*diagView);
648 }
649 
650 
651 template<class MatrixType>
652 void
654 leftScale (const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
655 {
656  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
657  "Ifpack2::LocalFilter does not implement leftScale.");
658 }
659 
660 
661 template<class MatrixType>
662 void
664 rightScale (const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
665 {
666  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
667  "Ifpack2::LocalFilter does not implement rightScale.");
668 }
669 
670 
671 template<class MatrixType>
672 void
674 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
675  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
676  Teuchos::ETransp mode,
677  scalar_type alpha,
678  scalar_type beta) const
679 {
680  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
682  X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
683  "Ifpack2::LocalFilter::apply: X and Y must have the same number of columns. "
684  "X has " << X.getNumVectors () << " columns, but Y has "
685  << Y.getNumVectors () << " columns.");
686 
687 #ifdef HAVE_IFPACK2_DEBUG
688  {
690  Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
691  X.norm1 (norms ());
692  bool good = true;
693  for (size_t j = 0; j < X.getNumVectors (); ++j) {
694  if (STM::isnaninf (norms[j])) {
695  good = false;
696  break;
697  }
698  }
699  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::LocalFilter::apply: The 1-norm of the input X is NaN or Inf.");
700  }
701 #endif // HAVE_IFPACK2_DEBUG
702 
704  getBlockSize() > 1, std::runtime_error,
705  "Ifpack2::LocalFilter::apply: Block size greater than zero is not yet supported for "
706  "LocalFilter::apply. Please contact an Ifpack2 developer to request this feature.");
707 
708  if (&X == &Y) {
709  // FIXME (mfh 23 Apr 2014) We have to do more work to figure out
710  // if X and Y alias one another. For example, we should check
711  // whether one is a noncontiguous view of the other.
712  //
713  // X and Y alias one another, so we have to copy X.
714  MV X_copy (X, Teuchos::Copy);
715  applyNonAliased (X_copy, Y, mode, alpha, beta);
716  } else {
717  applyNonAliased (X, Y, mode, alpha, beta);
718  }
719 
720 #ifdef HAVE_IFPACK2_DEBUG
721  {
723  Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
724  Y.norm1 (norms ());
725  bool good = true;
726  for (size_t j = 0; j < Y.getNumVectors (); ++j) {
727  if (STM::isnaninf (norms[j])) {
728  good = false;
729  break;
730  }
731  }
732  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::LocalFilter::apply: The 1-norm of the output Y is NaN or Inf.");
733  }
734 #endif // HAVE_IFPACK2_DEBUG
735 }
736 
737 template<class MatrixType>
738 void
740 applyNonAliased (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
741  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
742  Teuchos::ETransp mode,
743  scalar_type alpha,
744  scalar_type beta) const
745 {
746  using Teuchos::ArrayView;
747  using Teuchos::ArrayRCP;
749 
750  const scalar_type zero = STS::zero ();
751  const scalar_type one = STS::one ();
752 
753  if (beta == zero) {
754  Y.putScalar (zero);
755  }
756  else if (beta != one) {
757  Y.scale (beta);
758  }
759 
760  const size_t NumVectors = Y.getNumVectors ();
761  const size_t numRows = localRowMap_->getLocalNumElements ();
762 
763  // FIXME (mfh 14 Apr 2014) At some point, we would like to
764  // parallelize this using Kokkos. This would require a
765  // Kokkos-friendly version of getLocalRowCopy, perhaps with
766  // thread-local storage.
767 
768  const bool constantStride = X.isConstantStride () && Y.isConstantStride ();
769  if (constantStride) {
770  // Since both X and Y have constant stride, we can work with "1-D"
771  // views of their data.
772  const size_t x_stride = X.getStride();
773  const size_t y_stride = Y.getStride();
774  ArrayRCP<scalar_type> y_rcp = Y.get1dViewNonConst();
775  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
776  ArrayView<scalar_type> y_ptr = y_rcp();
777  ArrayView<const scalar_type> x_ptr = x_rcp();
778  for (size_t i = 0; i < numRows; ++i) {
779  size_t Nnz;
780  // Use this class's getrow to make the below code simpler
781  getLocalRowCopy (i, localIndices_ , Values_ , Nnz);
782  scalar_type* Values = reinterpret_cast<scalar_type*>(Values_.data());
783  if (mode == Teuchos::NO_TRANS) {
784  for (size_t j = 0; j < Nnz; ++j) {
785  const local_ordinal_type col = localIndices_[j];
786  for (size_t k = 0; k < NumVectors; ++k) {
787  y_ptr[i + y_stride*k] +=
788  alpha * Values[j] * x_ptr[col + x_stride*k];
789  }
790  }
791  }
792  else if (mode == Teuchos::TRANS) {
793  for (size_t j = 0; j < Nnz; ++j) {
794  const local_ordinal_type col = localIndices_[j];
795  for (size_t k = 0; k < NumVectors; ++k) {
796  y_ptr[col + y_stride*k] +=
797  alpha * Values[j] * x_ptr[i + x_stride*k];
798  }
799  }
800  }
801  else { //mode==Teuchos::CONJ_TRANS
802  for (size_t j = 0; j < Nnz; ++j) {
803  const local_ordinal_type col = localIndices_[j];
804  for (size_t k = 0; k < NumVectors; ++k) {
805  y_ptr[col + y_stride*k] +=
806  alpha * STS::conjugate (Values[j]) * x_ptr[i + x_stride*k];
807  }
808  }
809  }
810  }
811  }
812  else {
813  // At least one of X or Y does not have constant stride.
814  // This means we must work with "2-D" views of their data.
815  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView();
816  ArrayRCP<ArrayRCP<scalar_type> > y_ptr = Y.get2dViewNonConst();
817 
818  for (size_t i = 0; i < numRows; ++i) {
819  size_t Nnz;
820  // Use this class's getrow to make the below code simpler
821  getLocalRowCopy (i, localIndices_ , Values_ , Nnz);
822  scalar_type* Values = reinterpret_cast<scalar_type*>(Values_.data());
823  if (mode == Teuchos::NO_TRANS) {
824  for (size_t k = 0; k < NumVectors; ++k) {
825  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
826  ArrayView<scalar_type> y_local = (y_ptr())[k]();
827  for (size_t j = 0; j < Nnz; ++j) {
828  y_local[i] += alpha * Values[j] * x_local[localIndices_[j]];
829  }
830  }
831  }
832  else if (mode == Teuchos::TRANS) {
833  for (size_t k = 0; k < NumVectors; ++k) {
834  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
835  ArrayView<scalar_type> y_local = (y_ptr())[k]();
836  for (size_t j = 0; j < Nnz; ++j) {
837  y_local[localIndices_[j]] += alpha * Values[j] * x_local[i];
838  }
839  }
840  }
841  else { //mode==Teuchos::CONJ_TRANS
842  for (size_t k = 0; k < NumVectors; ++k) {
843  ArrayView<const scalar_type> x_local = (x_ptr())[k]();
844  ArrayView<scalar_type> y_local = (y_ptr())[k]();
845  for (size_t j = 0; j < Nnz; ++j) {
846  y_local[localIndices_[j]] +=
847  alpha * STS::conjugate (Values[j]) * x_local[i];
848  }
849  }
850  }
851  }
852  }
853 }
854 
855 template<class MatrixType>
857 {
858  return true;
859 }
860 
861 
862 template<class MatrixType>
864 {
865  return false;
866 }
867 
868 
869 template<class MatrixType>
870 typename
871 LocalFilter<MatrixType>::mag_type
873 {
874  typedef Kokkos::ArithTraits<scalar_type> STS;
875  typedef Kokkos::ArithTraits<mag_type> STM;
876  typedef typename Teuchos::Array<scalar_type>::size_type size_type;
877 
878  const size_type maxNumRowEnt = getLocalMaxNumRowEntries ();
879  const local_ordinal_type blockNumEntr = getBlockSize() * getBlockSize();
880  nonconst_local_inds_host_view_type ind ("ind",maxNumRowEnt);
881  nonconst_values_host_view_type val ("val",maxNumRowEnt*blockNumEntr);
882  const size_t numRows = static_cast<size_t> (localRowMap_->getLocalNumElements ());
883 
884  // FIXME (mfh 03 Apr 2013) Scale during sum to avoid overflow.
885  mag_type sumSquared = STM::zero ();
886  for (size_t i = 0; i < numRows; ++i) {
887  size_t numEntries = 0;
888  this->getLocalRowCopy (i, ind, val, numEntries);
889  for (size_type k = 0; k < static_cast<size_type> (numEntries*blockNumEntr); ++k) {
890  const mag_type v_k_abs = STS::magnitude (val[k]);
891  sumSquared += v_k_abs * v_k_abs;
892  }
893  }
894  return STM::squareroot (sumSquared); // Different for each process; that's OK.
895 }
896 
897 template<class MatrixType>
898 std::string
900 {
902  std::ostringstream os;
903 
904  os << "Ifpack2::LocalFilter: {";
905  os << "MatrixType: " << TypeNameTraits<MatrixType>::name ();
906  if (this->getObjectLabel () != "") {
907  os << ", Label: \"" << this->getObjectLabel () << "\"";
908  }
909  os << ", Number of rows: " << getGlobalNumRows ()
910  << ", Number of columns: " << getGlobalNumCols ()
911  << "}";
912  return os.str ();
913 }
914 
915 
916 template<class MatrixType>
917 void
920  const Teuchos::EVerbosityLevel verbLevel) const
921 {
922  using Teuchos::OSTab;
924  using std::endl;
925 
926  const Teuchos::EVerbosityLevel vl =
927  (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
928 
929  if (vl > Teuchos::VERB_NONE) {
930  // describe() starts with a tab, by convention.
931  OSTab tab0 (out);
932 
933  out << "Ifpack2::LocalFilter:" << endl;
934  OSTab tab1 (out);
935  out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
936  if (this->getObjectLabel () != "") {
937  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
938  }
939  out << "Number of rows: " << getGlobalNumRows () << endl
940  << "Number of columns: " << getGlobalNumCols () << endl
941  << "Number of nonzeros: " << NumNonzeros_ << endl;
942 
943  if (vl > Teuchos::VERB_LOW) {
944  out << "Row Map:" << endl;
945  localRowMap_->describe (out, vl);
946  out << "Domain Map:" << endl;
947  localDomainMap_->describe (out, vl);
948  out << "Range Map:" << endl;
949  localRangeMap_->describe (out, vl);
950  }
951  }
952 }
953 
954 template<class MatrixType>
955 Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type,
956  typename MatrixType::local_ordinal_type,
957  typename MatrixType::global_ordinal_type,
958  typename MatrixType::node_type> >
960 {
961  return A_;
962 }
963 
964 
965 } // namespace Ifpack2
966 
967 #define IFPACK2_LOCALFILTER_INSTANT(S,LO,GO,N) \
968  template class Ifpack2::LocalFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
969 
970 #endif
virtual void rightScale(const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &x)
Scales the RowMatrix on the right with the Vector x.
Definition: Ifpack2_LocalFilter_def.hpp:664
virtual Teuchos::RCP< const map_type > getRowMap() const
Returns the Map that describes the row distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:252
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object to the given output stream.
Definition: Ifpack2_LocalFilter_def.hpp:919
basic_OSTab< char > OSTab
virtual Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map that describes the domain distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:272
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
virtual size_t getLocalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on this process.
Definition: Ifpack2_LocalFilter_def.hpp:424
virtual void getGlobalRowCopy(global_ordinal_type GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const
Get the entries in the given row, using global indices.
Definition: Ifpack2_LocalFilter_def.hpp:461
virtual bool isFillComplete() const
Returns true if fillComplete() has been called.
Definition: Ifpack2_LocalFilter_def.hpp:452
virtual void getLocalRowCopy(local_ordinal_type LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const
Get the entries in the given row, using local indices.
Definition: Ifpack2_LocalFilter_def.hpp:506
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the communicator.
Definition: Ifpack2_LocalFilter_def.hpp:240
virtual void getGlobalRowView(global_ordinal_type GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const
Extract a const, non-persisting view of global indices in a specified row of the matrix.
Definition: Ifpack2_LocalFilter_def.hpp:615
virtual void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Compute Y = beta*Y + alpha*A_local*X.
Definition: Ifpack2_LocalFilter_def.hpp:674
virtual bool hasTransposeApply() const
Whether this operator supports applying the transpose or conjugate transpose.
Definition: Ifpack2_LocalFilter_def.hpp:856
virtual bool isLocallyIndexed() const
Whether the underlying sparse matrix is locally (opposite of globally) indexed.
Definition: Ifpack2_LocalFilter_def.hpp:438
virtual void getLocalDiagCopy(Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &diag) const
Get the diagonal entries of the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:639
virtual size_t getLocalNumEntries() const
Returns the local number of entries in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:363
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Type of the Tpetra::Map specialization that this class uses.
Definition: Ifpack2_LocalFilter_decl.hpp:222
T * getRawPtr() const
virtual mag_type getFrobeniusNorm() const
The Frobenius norm of the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:872
virtual Teuchos::RCP< const Tpetra::RowGraph< local_ordinal_type, global_ordinal_type, node_type > > getGraph() const
The (locally filtered) matrix&#39;s graph.
Definition: Ifpack2_LocalFilter_def.hpp:292
Ordinal size_type
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
virtual bool supportsRowViews() const
Returns true if RowViews are supported.
Definition: Ifpack2_LocalFilter_def.hpp:863
virtual size_t getLocalNumCols() const
The number of columns in the (locally filtered) matrix.
Definition: Ifpack2_LocalFilter_def.hpp:341
virtual size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const
The current number of entries on this node in the specified global row.
Definition: Ifpack2_LocalFilter_def.hpp:377
virtual void leftScale(const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &x)
Scales the RowMatrix on the left with the Vector x.
Definition: Ifpack2_LocalFilter_def.hpp:654
virtual global_ordinal_type getIndexBase() const
Returns the index base for global indices for this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:349
virtual bool isGloballyIndexed() const
Whether the underlying sparse matrix is globally (opposite of locally) indexed.
Definition: Ifpack2_LocalFilter_def.hpp:445
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on all processes.
Definition: Ifpack2_LocalFilter_def.hpp:417
LocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_LocalFilter_def.hpp:98
virtual ~LocalFilter()
Destructor.
Definition: Ifpack2_LocalFilter_def.hpp:234
virtual global_size_t getGlobalNumRows() const
The number of global rows in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:320
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalFilter_def.hpp:899
virtual size_t getLocalNumRows() const
The number of rows owned on the calling process.
Definition: Ifpack2_LocalFilter_def.hpp:334
virtual size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const
The current number of entries on this node in the specified local row.
Definition: Ifpack2_LocalFilter_def.hpp:396
virtual Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map that describes the range distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:282
virtual global_size_t getGlobalNumEntries() const
Returns the global number of entries in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:356
virtual Teuchos::RCP< const row_matrix_type > getUnderlyingMatrix() const
Return matrix that LocalFilter was built on.
Definition: Ifpack2_LocalFilter_def.hpp:959
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:161
virtual global_size_t getGlobalNumCols() const
The number of global columns in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:327
virtual Teuchos::RCP< const map_type > getColMap() const
Returns the Map that describes the column distribution in this matrix.
Definition: Ifpack2_LocalFilter_def.hpp:262
virtual void getLocalRowView(local_ordinal_type LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const
Extract a const, non-persisting view of local indices in a specified row of the matrix.
Definition: Ifpack2_LocalFilter_def.hpp:627
virtual bool hasColMap() const
Whether this matrix has a well-defined column Map.
Definition: Ifpack2_LocalFilter_def.hpp:431
virtual local_ordinal_type getBlockSize() const
The number of degrees of freedom per mesh point.
Definition: Ifpack2_LocalFilter_def.hpp:369