Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_CrsMatrixMultiplyOp.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_HPP
43 #define TPETRA_CRSMATRIXMULTIPLYOP_HPP
44 
49 
50 #include "Tpetra_CrsMatrix.hpp"
51 #include "Tpetra_Util.hpp"
54 
55 namespace Tpetra {
56 
121  template <class Scalar,
122  class MatScalar = Scalar,
124  class GlobalOrdinal = ::Tpetra::Details::DefaultTypes::global_ordinal_type,
127  public Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>
128  {
129  public:
134 
136 
137 
142  CrsMatrixMultiplyOp (const Teuchos::RCP<const crs_matrix_type>& A) :
143  matrix_ (A)
144  {
145  // we don't require that A is fill complete; we will query for the
146  // importer/exporter at apply()-time
147  }
148 
150  virtual ~CrsMatrixMultiplyOp () {}
151 
153 
155 
161  void
164  Teuchos::ETransp mode = Teuchos::NO_TRANS,
165  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one (),
166  Scalar beta = Teuchos::ScalarTraits<Scalar>::zero ()) const
167  {
168  TEUCHOS_TEST_FOR_EXCEPTION
169  (! matrix_->isFillComplete (), std::runtime_error,
170  Teuchos::typeName (*this) << "::apply(): underlying matrix is not fill-complete.");
171  TEUCHOS_TEST_FOR_EXCEPTION
172  (X.getNumVectors () != Y.getNumVectors (), std::runtime_error,
173  Teuchos::typeName (*this) << "::apply(X,Y): X and Y must have the same number of vectors.");
174  TEUCHOS_TEST_FOR_EXCEPTION
175  (Teuchos::ScalarTraits<Scalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
176  Teuchos::typeName (*this) << "::apply() does not currently support transposed multiplications for complex scalar types.");
177  if (mode == Teuchos::NO_TRANS) {
178  applyNonTranspose (X, Y, alpha, beta);
179  }
180  else {
181  applyTranspose (X, Y, mode, alpha, beta);
182  }
183  }
184 
224  void
228  const Scalar& dampingFactor,
229  const ESweepDirection direction,
230  const int numSweeps) const
231  {
232  using Teuchos::null;
233  using Teuchos::RCP;
234  using Teuchos::rcp;
235  using Teuchos::rcpFromRef;
236  using Teuchos::rcp_const_cast;
237  typedef Scalar OS;
238  typedef Export<LocalOrdinal, GlobalOrdinal, Node> export_type;
239  typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
241 
242  TEUCHOS_TEST_FOR_EXCEPTION
243  (numSweeps < 0, std::invalid_argument,
244  "gaussSeidel: The number of sweeps must be nonnegative, "
245  "but you provided numSweeps = " << numSweeps << " < 0.");
246 
247  // Translate from global to local sweep direction.
248  // While doing this, validate the input.
249  ESweepDirection localDirection;
250  if (direction == Forward) {
251  localDirection = Forward;
252  }
253  else if (direction == Backward) {
254  localDirection = Backward;
255  }
256  else if (direction == Symmetric) {
257  // We'll control local sweep direction manually.
258  localDirection = Forward;
259  }
260  else {
261  TEUCHOS_TEST_FOR_EXCEPTION
262  (true, std::invalid_argument,
263  "gaussSeidel: The 'direction' enum does not have any of its valid "
264  "values: Forward, Backward, or Symmetric.");
265  }
266 
267  if (numSweeps == 0) {
268  return; // Nothing to do.
269  }
270 
271  // We don't need the Export object because this method assumes
272  // that the row, domain, and range Maps are the same. We do need
273  // the Import object, if there is one, though.
274  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
275  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
276  TEUCHOS_TEST_FOR_EXCEPTION
277  (! exporter.is_null (), std::runtime_error,
278  "Tpetra's gaussSeidel implementation requires that the row, domain, "
279  "and range Maps be the same. This cannot be the case, because the "
280  "matrix has a nontrivial Export object.");
281 
282  RCP<const map_type> domainMap = matrix_->getDomainMap ();
283  RCP<const map_type> rangeMap = matrix_->getRangeMap ();
284  RCP<const map_type> rowMap = matrix_->getGraph ()->getRowMap ();
285  RCP<const map_type> colMap = matrix_->getGraph ()->getColMap ();
286 
287  const bool debug = ::Tpetra::Details::Behavior::debug ();
288  if (debug) {
289  // The relation 'isSameAs' is transitive. It's also a
290  // collective, so we don't have to do a "shared" test for
291  // exception (i.e., a global reduction on the test value).
292  TEUCHOS_TEST_FOR_EXCEPTION
293  (! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
294  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
295  "multivector X be in the domain Map of the matrix.");
296  TEUCHOS_TEST_FOR_EXCEPTION
297  (! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
298  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
299  "B be in the range Map of the matrix.");
300  TEUCHOS_TEST_FOR_EXCEPTION
301  (! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
302  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
303  "D be in the row Map of the matrix.");
304  TEUCHOS_TEST_FOR_EXCEPTION
305  (! rowMap->isSameAs (*rangeMap), std::runtime_error,
306  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
307  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
308  TEUCHOS_TEST_FOR_EXCEPTION
309  (! domainMap->isSameAs (*rangeMap), std::runtime_error,
310  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
311  "the range Map of the matrix be the same.");
312  }
313 
314  // If B is not constant stride, copy it into a constant stride
315  // multivector. We'l handle the right-hand side B first and deal
316  // with X right before the sweeps, to improve locality of the
317  // first sweep. (If the problem is small enough, then that will
318  // hopefully keep more of the entries of X in cache. This
319  // optimizes for the typical case of a small number of sweeps.)
320  RCP<const OSMV> B_in;
321  if (B.isConstantStride()) {
322  B_in = rcpFromRef (B);
323  }
324  else {
325  // The range Map and row Map are the same in this case, so we
326  // can use the (possibly cached) row Map multivector to store a
327  // constant stride copy of B. We don't have to copy back, since
328  // Gauss-Seidel won't modify B.
329  RCP<OSMV> B_in_nonconst = getRowMapMultiVector (B, true);
330  deep_copy (*B_in_nonconst, B);
331  B_in = rcp_const_cast<const OSMV> (B_in_nonconst);
332 
334  (! B.isConstantStride (), std::runtime_error,
335  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
336  "requires that X and B both have constant stride. Since B does not "
337  "have constant stride, we had to make a copy. This is a limitation of "
338  "the current implementation and not your fault, but we still report it "
339  "as an efficiency warning for your information.");
340  }
341 
342  // If X is not constant stride, copy it into a constant stride
343  // multivector. Also, make the column Map multivector X_colMap,
344  // and its domain Map view X_domainMap. (X actually must be a
345  // domain Map view of a column Map multivector; exploit this, if X
346  // has constant stride.)
347 
348  RCP<OSMV> X_domainMap;
349  RCP<OSMV> X_colMap;
350  bool copiedInput = false;
351 
352  if (importer.is_null ()) { // Domain and column Maps are the same.
353  if (X.isConstantStride ()) {
354  X_domainMap = rcpFromRef (X);
355  X_colMap = X_domainMap;
356  copiedInput = false;
357  }
358  else {
359  // Get a temporary column Map multivector, make a domain Map
360  // view of it, and copy X into the domain Map view. We have
361  // to copy here because we won't be doing Import operations.
362  X_colMap = getColumnMapMultiVector (X, true);
363  X_domainMap = X_colMap; // Domain and column Maps are the same.
364  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
365  copiedInput = true;
367  (! X.isConstantStride (), std::runtime_error,
368  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
369  "requires that X and B both have constant stride. Since X does not "
370  "have constant stride, we had to make a copy. This is a limitation of "
371  "the current implementation and not your fault, but we still report it "
372  "as an efficiency warning for your information.");
373  }
374  }
375  else { // We will be doing Import operations in the sweeps.
376  if (X.isConstantStride ()) {
377  X_domainMap = rcpFromRef (X);
378  // This kernel assumes that X is a domain Map view of a
379  // column Map multivector. We will only check if this is
380  // valid in debug mode.
381  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
382 
383  // Do the first Import for the first sweep. This simplifies
384  // the logic in the sweeps.
385  X_colMap->doImport (X, *importer, INSERT);
386  copiedInput = false;
387  }
388  else {
389  // Get a temporary column Map multivector X_colMap, and make a
390  // domain Map view X_domainMap of it. Instead of copying, we
391  // do an Import from X into X_domainMap. This saves us a
392  // copy, since the Import has to copy the data anyway.
393  X_colMap = getColumnMapMultiVector (X, true);
394  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
395  X_colMap->doImport (X, *importer, INSERT);
396  copiedInput = true;
398  (! X.isConstantStride (), std::runtime_error,
399  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
400  "requires that X and B both have constant stride. Since X does not "
401  "have constant stride, we had to make a copy. This is a limitation of "
402  "the current implementation and not your fault, but we still report it "
403  "as an efficiency warning for your information.");
404  }
405  }
406 
407  for (int sweep = 0; sweep < numSweeps; ++sweep) {
408  if (! importer.is_null () && sweep > 0) {
409  // We already did the first Import for the zeroth sweep.
410  X_colMap->doImport (*X_domainMap, *importer, INSERT);
411  }
412 
413  // Do local Gauss-Seidel.
414  if (direction != Symmetric) {
415  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
416  dampingFactor,
417  localDirection);
418  }
419  else { // direction == Symmetric
420  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
421  dampingFactor,
422  Forward);
423  // Communicate again before the Backward sweep.
424  if (! importer.is_null ()) {
425  X_colMap->doImport (*X_domainMap, *importer, INSERT);
426  }
427  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
428  dampingFactor,
429  Backward);
430  }
431  }
432 
433  if (copiedInput) {
434  deep_copy (X, *X_domainMap); // Copy back: X_domainMap -> X.
435  }
436  }
437 
462  void
466  const Scalar& dampingFactor,
467  const ESweepDirection direction,
468  const int numSweeps) const
469  {
470  using Teuchos::null;
471  using Teuchos::RCP;
472  using Teuchos::rcp;
473  using Teuchos::rcpFromRef;
474  using Teuchos::rcp_const_cast;
475  typedef Scalar OS;
476  typedef Export<LocalOrdinal, GlobalOrdinal, Node> export_type;
477  typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
479 
480  TEUCHOS_TEST_FOR_EXCEPTION
481  (numSweeps < 0, std::invalid_argument,
482  "gaussSeidelCopy: The number of sweeps must be nonnegative, "
483  "but you provided numSweeps = " << numSweeps << " < 0.");
484 
485  // Translate from global to local sweep direction.
486  // While doing this, validate the input.
487  ESweepDirection localDirection;
488  if (direction == Forward) {
489  localDirection = Forward;
490  }
491  else if (direction == Backward) {
492  localDirection = Backward;
493  }
494  else if (direction == Symmetric) {
495  // We'll control local sweep direction manually.
496  localDirection = Forward;
497  }
498  else {
499  TEUCHOS_TEST_FOR_EXCEPTION
500  (true, std::invalid_argument,
501  "gaussSeidelCopy: The 'direction' enum does not have any of its "
502  "valid values: Forward, Backward, or Symmetric.");
503  }
504 
505  if (numSweeps == 0) {
506  return;
507  }
508 
509  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
510  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
511  TEUCHOS_TEST_FOR_EXCEPTION
512  (! exporter.is_null (),
513  std::runtime_error,
514  "Tpetra's gaussSeidelCopy implementation requires that the row, domain, "
515  "and range Maps be the same. This cannot be the case, because the "
516  "matrix has a nontrivial Export object.");
517 
518  RCP<const map_type> domainMap = matrix_->getDomainMap ();
519  RCP<const map_type> rangeMap = matrix_->getRangeMap ();
520  RCP<const map_type> rowMap = matrix_->getGraph ()->getRowMap ();
521  RCP<const map_type> colMap = matrix_->getGraph ()->getColMap ();
522 
523  const bool debug = ::Tpetra::Details::Behavior::debug ();
524  if (debug) {
525  // The relation 'isSameAs' is transitive. It's also a
526  // collective, so we don't have to do a "shared" test for
527  // exception (i.e., a global reduction on the test value).
528  TEUCHOS_TEST_FOR_EXCEPTION
529  (! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
530  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
531  "multivector X be in the domain Map of the matrix.");
532  TEUCHOS_TEST_FOR_EXCEPTION
533  (! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
534  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
535  "B be in the range Map of the matrix.");
536  TEUCHOS_TEST_FOR_EXCEPTION
537  (! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
538  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
539  "D be in the row Map of the matrix.");
540  TEUCHOS_TEST_FOR_EXCEPTION
541  (! rowMap->isSameAs (*rangeMap), std::runtime_error,
542  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
543  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
544  TEUCHOS_TEST_FOR_EXCEPTION
545  (! domainMap->isSameAs (*rangeMap), std::runtime_error,
546  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
547  "the range Map of the matrix be the same.");
548  }
549 
550  // Fetch a (possibly cached) temporary column Map multivector
551  // X_colMap, and a domain Map view X_domainMap of it. Both have
552  // constant stride by construction. We know that the domain Map
553  // must include the column Map, because our Gauss-Seidel kernel
554  // requires that the row Map, domain Map, and range Map are all
555  // the same, and that each process owns all of its own diagonal
556  // entries of the matrix.
557 
558  RCP<OSMV> X_colMap;
559  RCP<OSMV> X_domainMap;
560  bool copyBackOutput = false;
561  if (importer.is_null ()) {
562  if (X.isConstantStride ()) {
563  X_colMap = rcpFromRef (X);
564  X_domainMap = rcpFromRef (X);
565  // No need to copy back to X at end.
566  }
567  else { // We must copy X into a constant stride multivector.
568  // Just use the cached column Map multivector for that.
569  X_colMap = getColumnMapMultiVector (X, true);
570  // X_domainMap is always a domain Map view of the column Map
571  // multivector. In this case, the domain and column Maps are
572  // the same, so X_domainMap _is_ X_colMap.
573  X_domainMap = X_colMap;
574  deep_copy (*X_domainMap, X); // Copy X into constant stride multivector
575  copyBackOutput = true; // Don't forget to copy back at end.
577  (! X.isConstantStride (), std::runtime_error,
578  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
579  "kernel requires that X and B both have constant stride. Since X "
580  "does not have constant stride, we had to make a copy. This is a "
581  "limitation of the current implementation and not your fault, but we "
582  "still report it as an efficiency warning for your information.");
583  }
584  }
585  else { // Column Map and domain Map are _not_ the same.
586  X_colMap = getColumnMapMultiVector (X);
587  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
588 
589  // We could just copy X into X_domainMap. However, that wastes
590  // a copy, because the Import also does a copy (plus
591  // communication). Since the typical use case for Gauss-Seidel
592  // is a small number of sweeps (2 is typical), we don't want to
593  // waste that copy. Thus, we do the Import here, and skip the
594  // first Import in the first sweep. Importing directly from X
595  // effects the copy into X_domainMap (which is a view of
596  // X_colMap).
597  X_colMap->doImport (X, *importer, INSERT);
598 
599  copyBackOutput = true; // Don't forget to copy back at end.
600  }
601 
602  // The Gauss-Seidel / SOR kernel expects multivectors of constant
603  // stride. X_colMap is by construction, but B might not be. If
604  // it's not, we have to make a copy.
605  RCP<const OSMV> B_in;
606  if (B.isConstantStride ()) {
607  B_in = rcpFromRef (B);
608  }
609  else {
610  // Range Map and row Map are the same in this case, so we can
611  // use the cached row Map multivector to store a constant stride
612  // copy of B.
613  RCP<OSMV> B_in_nonconst = getRowMapMultiVector (B, true);
614  *B_in_nonconst = B;
615  B_in = rcp_const_cast<const OSMV> (B_in_nonconst);
616 
618  (! B.isConstantStride (), std::runtime_error,
619  "gaussSeidelCopy: The current implementation requires that B have "
620  "constant stride. Since B does not have constant stride, we had to "
621  "copy it into a separate constant-stride multivector. This is a "
622  "limitation of the current implementation and not your fault, but we "
623  "still report it as an efficiency warning for your information.");
624  }
625 
626  for (int sweep = 0; sweep < numSweeps; ++sweep) {
627  if (! importer.is_null () && sweep > 0) {
628  // We already did the first Import for the zeroth sweep above.
629  X_colMap->doImport (*X_domainMap, *importer, INSERT);
630  }
631 
632  // Do local Gauss-Seidel.
633  if (direction != Symmetric) {
634  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
635  dampingFactor,
636  localDirection);
637  }
638  else { // direction == Symmetric
639  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
640  dampingFactor,
641  Forward);
642  // Communicate again before the Backward sweep, if necessary.
643  if (! importer.is_null ()) {
644  X_colMap->doImport (*X_domainMap, *importer, INSERT);
645  }
646  matrix_->template localGaussSeidel<OS,OS> (*B_in, *X_colMap, D,
647  dampingFactor,
648  Backward);
649  }
650  }
651 
652  if (copyBackOutput) {
653  deep_copy (X, *X_domainMap); // Copy result back into X.
654  }
655  }
656 
662  bool hasTransposeApply() const {
663  return true;
664  }
665 
667  Teuchos::RCP<const map_type> getDomainMap () const {
668  return matrix_->getDomainMap ();
669  }
670 
672  Teuchos::RCP<const map_type> getRangeMap () const {
673  return matrix_->getRangeMap ();
674  }
675 
677 
678  protected:
680 
682  const Teuchos::RCP<const crs_matrix_type> matrix_;
683 
696  mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > importMV_;
697 
710  mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > exportMV_;
711 
714  void
717  Teuchos::ETransp mode,
718  Scalar alpha,
719  Scalar beta) const
720  {
721  typedef Teuchos::ScalarTraits<Scalar> ST;
722  using Teuchos::null;
723 
724  const int myImageID = Teuchos::rank(*matrix_->getComm());
725 
726  const size_t numVectors = X_in.getNumVectors();
727  // because of Views, it is difficult to determine if X and Y point to the same data.
728  // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
729  // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
730  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter();
731  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter();
732  // access X indirectly, in case we need to create temporary storage
733  Teuchos::RCP<const MV> X;
734 
735  // some parameters for below
736  const bool Y_is_replicated = !Y_in.isDistributed(),
737  Y_is_overwritten = (beta == ST::zero());
738  if (Y_is_replicated && myImageID > 0) {
739  beta = ST::zero();
740  }
741 
742  // currently, cannot multiply from multivector of non-constant stride
743  if (X_in.isConstantStride() == false && importer==null) {
744  // generate a strided copy of X_in
745  X = Teuchos::rcp(new MV(X_in));
746  }
747  else {
748  // just temporary, so this non-owning RCP is okay
749  X = Teuchos::rcp(&X_in, false);
750  }
751 
752  // set up import/export temporary multivectors
753  if (importer != null) {
754  if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
755  if (importMV_ == null) {
756  importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
757  }
758  }
759  if (exporter != null) {
760  if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
761  if (exportMV_ == null) {
762  exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );
763  }
764  }
765 
766  // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
767  if (exporter != null) {
768  {
769  exportMV_->doImport(X_in,*exporter,INSERT);
770  }
771  // multiply out of exportMV_
772  X = exportMV_;
773  }
774 
775 
776  // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
777  // We will compute solution into the to-be-exported MV; get a view
778  if (importer != null) {
779  // Do actual computation
780  matrix_->template localMultiply<Scalar, Scalar>(*X, *importMV_, mode, alpha, ST::zero());
781  if (Y_is_overwritten) Y_in.putScalar(ST::zero());
782  else Y_in.scale(beta);
783  //
784  {
785  Y_in.doExport(*importMV_,*importer,ADD);
786  }
787  }
788  // otherwise, multiply into Y
789  else {
790  // can't multiply in-situ; can't multiply into non-strided multivector
791  if (Y_in.isConstantStride() == false || X.getRawPtr() == &Y_in) {
792  // generate a strided copy of Y
793  MV Y(Y_in);
794  matrix_->template localMultiply<Scalar, Scalar>(*X, Y, mode, alpha, beta);
795  deep_copy (Y_in, Y);
796  }
797  else {
798  matrix_->template localMultiply<Scalar, Scalar>(*X, Y_in, mode, alpha, beta);
799  }
800  }
801  // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
802  if (Y_is_replicated) {
803  Y_in.reduce();
804  }
805  }
806 
808  void
811  Scalar alpha,
812  Scalar beta) const
813  {
815  using Teuchos::RCP;
816  using Teuchos::rcp;
817  using Teuchos::rcp_const_cast;
818  using Teuchos::rcpFromRef;
819  typedef Export<LocalOrdinal,GlobalOrdinal,Node> export_type;
820  typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
821  typedef Teuchos::ScalarTraits<Scalar> STS;
822 
823  if (alpha == STS::zero ()) {
824  if (beta == STS::zero ()) {
825  Y_in.putScalar (STS::zero ());
826  } else if (beta != STS::one ()) {
827  Y_in.scale (beta);
828  }
829  return;
830  }
831 
832  // It's possible that X is a view of Y or vice versa. We don't
833  // allow this (apply() requires that X and Y not alias one
834  // another), but it's helpful to detect and work around this
835  // case. We don't try to to detect the more subtle cases (e.g.,
836  // one is a subview of the other, but their initial pointers
837  // differ). We only need to do this if this matrix's Import is
838  // trivial; otherwise, we don't actually apply the operator from
839  // X into Y.
840 
841  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
842  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
843 
844  // If beta == 0, then the output MV will be overwritten; none of
845  // its entries should be read. (Sparse BLAS semantics say that we
846  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
847  // This matters if we need to do an Export operation; see below.
848  const bool Y_is_overwritten = (beta == STS::zero());
849 
850  // We treat the case of a replicated MV output specially.
851  const bool Y_is_replicated =
852  (! Y_in.isDistributed () && matrix_->getComm ()->getSize () != 1);
853 
854  // This is part of the special case for replicated MV output.
855  // We'll let each process do its thing, but do an all-reduce at
856  // the end to sum up the results. Setting beta=0 on all
857  // processes but Proc 0 makes the math work out for the
858  // all-reduce. (This assumes that the replicated data is
859  // correctly replicated, so that the data are the same on all
860  // processes.)
861  if (Y_is_replicated && matrix_->getComm ()->getRank () > 0) {
862  beta = STS::zero();
863  }
864 
865  // Temporary MV for Import operation. After the block of code
866  // below, this will be an (Imported if necessary) column Map MV
867  // ready to give to localMultiply().
868  RCP<const MV> X_colMap;
869  if (importer.is_null ()) {
870  if (! X_in.isConstantStride ()) {
871  // Not all sparse mat-vec kernels can handle an input MV with
872  // nonconstant stride correctly, so we have to copy it in that
873  // case into a constant stride MV. To make a constant stride
874  // copy of X_in, we force creation of the column (== domain)
875  // Map MV (if it hasn't already been created, else fetch the
876  // cached copy). This avoids creating a new MV each time.
877  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
878  Tpetra::deep_copy (*X_colMapNonConst, X_in);
879  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
880  }
881  else {
882  // The domain and column Maps are the same, so do the local
883  // multiply using the domain Map input MV X_in.
884  X_colMap = rcpFromRef (X_in);
885  }
886  }
887  else { // need to Import source (multi)vector
888  ProfilingRegion regionImport ("Tpetra::CrsMatrixMultiplyOp::apply: Import");
889 
890  // We're doing an Import anyway, which will copy the relevant
891  // elements of the domain Map MV X_in into a separate column Map
892  // MV. Thus, we don't have to worry whether X_in is constant
893  // stride.
894  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
895 
896  // Import from the domain Map MV to the column Map MV.
897  X_colMapNonConst->doImport (X_in, *importer, INSERT);
898  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
899  }
900 
901  // Temporary MV for doExport (if needed), or for copying a
902  // nonconstant stride output MV into a constant stride MV. This
903  // is null if we don't need the temporary MV, that is, if the
904  // Export is trivial (null).
905  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
906 
907  // If we have a nontrivial Export object, we must perform an
908  // Export. In that case, the local multiply result will go into
909  // the row Map multivector. We don't have to make a
910  // constant-stride version of Y_in in this case, because we had to
911  // make a constant stride Y_rowMap MV and do an Export anyway.
912  if (! exporter.is_null ()) {
913  matrix_->template localMultiply<Scalar, Scalar> (*X_colMap, *Y_rowMap,
914  Teuchos::NO_TRANS,
915  alpha, STS::zero());
916  {
917  ProfilingRegion regionExport ("Tpetra::CrsMatrixMultiplyOp::apply: Export");
918 
919  // If we're overwriting the output MV Y_in completely (beta
920  // == 0), then make sure that it is filled with zeros before
921  // we do the Export. Otherwise, the ADD combine mode will
922  // use data in Y_in, which is supposed to be zero.
923  if (Y_is_overwritten) {
924  Y_in.putScalar (STS::zero());
925  }
926  else {
927  // Scale the output MV by beta, so that the Export sums in
928  // the mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
929  Y_in.scale (beta);
930  }
931  // Do the Export operation.
932  Y_in.doExport (*Y_rowMap, *exporter, ADD);
933  }
934  }
935  else { // Don't do an Export: row Map and range Map are the same.
936  //
937  // If Y_in does not have constant stride, or if the column Map
938  // MV aliases Y_in, then we can't let the kernel write directly
939  // to Y_in. Instead, we have to use the cached row (== range)
940  // Map MV as temporary storage.
941  //
942  // FIXME (mfh 05 Jun 2014, mfh 07 Dec 2018) This test for
943  // aliasing only tests if the user passed in the same
944  // MultiVector for both X and Y. It won't detect whether one
945  // MultiVector views the other. We should also check the
946  // MultiVectors' raw data pointers.
947  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
948  // Force creating the MV if it hasn't been created already.
949  // This will reuse a previously created cached MV.
950  Y_rowMap = getRowMapMultiVector (Y_in, true);
951 
952  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
953  // since we're overwriting it anyway.
954  if (beta != STS::zero ()) {
955  Tpetra::deep_copy (*Y_rowMap, Y_in);
956  }
957  matrix_->template localMultiply<Scalar, Scalar> (*X_colMap,
958  *Y_rowMap,
959  Teuchos::NO_TRANS,
960  alpha, beta);
961  Tpetra::deep_copy (Y_in, *Y_rowMap);
962  }
963  else {
964  matrix_->template localMultiply<Scalar, Scalar> (*X_colMap, Y_in,
965  Teuchos::NO_TRANS,
966  alpha, beta);
967  }
968  }
969 
970  // If the range Map is a locally replicated Map, sum up
971  // contributions from each process. We set beta = 0 on all
972  // processes but Proc 0 initially, so this will handle the scaling
973  // factor beta correctly.
974  if (Y_is_replicated) {
975  ProfilingRegion regionReduce ("Tpetra::CrsMatrixMultiplyOp::apply: Reduce Y");
976  Y_in.reduce ();
977  }
978  }
979 
980  private:
1000  Teuchos::RCP<MV>
1001  getColumnMapMultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X_domainMap,
1002  const bool force = false) const
1003  {
1004  using Teuchos::null;
1005  using Teuchos::RCP;
1006  using Teuchos::rcp;
1007  typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
1008 
1009  const size_t numVecs = X_domainMap.getNumVectors ();
1010  RCP<const import_type> importer = matrix_->getGraph ()->getImporter ();
1011  RCP<const map_type> colMap = matrix_->getColMap ();
1012 
1013  RCP<MV> X_colMap; // null by default
1014 
1015  // If the Import object is trivial (null), then we don't need a
1016  // separate column Map multivector. Just return null in that
1017  // case. The caller is responsible for knowing not to use the
1018  // returned null pointer.
1019  //
1020  // If the Import is nontrivial, then we do need a separate
1021  // column Map multivector for the Import operation. Check in
1022  // that case if we have to (re)create the column Map
1023  // multivector.
1024  if (! importer.is_null () || force) {
1025  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
1026  X_colMap = rcp (new MV (colMap, numVecs));
1027 
1028  // Cache the newly created multivector for later reuse.
1029  importMV_ = X_colMap;
1030  }
1031  else { // Yay, we can reuse the cached multivector!
1032  X_colMap = importMV_;
1033  // mfh 09 Jan 2013: We don't have to fill with zeros first,
1034  // because the Import uses INSERT combine mode, which overwrites
1035  // existing entries.
1036  //
1037  //X_colMap->putScalar (STS::zero ());
1038  }
1039  }
1040  return X_colMap;
1041  }
1042 
1064  Teuchos::RCP<MV>
1065  getRowMapMultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y_rangeMap,
1066  const bool force = false) const
1067  {
1068  using Teuchos::null;
1069  using Teuchos::RCP;
1070  using Teuchos::rcp;
1071  typedef Export<LocalOrdinal,GlobalOrdinal,Node> export_type;
1072 
1073  const size_t numVecs = Y_rangeMap.getNumVectors ();
1074  RCP<const export_type> exporter = matrix_->getGraph ()->getExporter ();
1075  RCP<const map_type> rowMap = matrix_->getRowMap ();
1076 
1077  RCP<MV> Y_rowMap; // null by default
1078 
1079  // If the Export object is trivial (null), then we don't need a
1080  // separate row Map multivector. Just return null in that case.
1081  // The caller is responsible for knowing not to use the returned
1082  // null pointer.
1083  //
1084  // If the Export is nontrivial, then we do need a separate row
1085  // Map multivector for the Export operation. Check in that case
1086  // if we have to (re)create the row Map multivector.
1087  if (! exporter.is_null () || force) {
1088  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
1089  Y_rowMap = rcp (new MV (rowMap, numVecs));
1090  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse
1091  }
1092  else { // Yay, we can reuse the cached multivector!
1093  Y_rowMap = exportMV_;
1094  }
1095  }
1096  return Y_rowMap;
1097  }
1098  };
1099 
1107  template <class OpScalar,
1108  class MatScalar,
1109  class LocalOrdinal,
1110  class GlobalOrdinal,
1111  class Node>
1112  Teuchos::RCP<
1113  CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node> >
1114  createCrsMatrixMultiplyOp (const Teuchos::RCP<
1116  {
1117  typedef CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal,
1118  GlobalOrdinal, Node> op_type;
1119  return Teuchos::rcp (new op_type (A));
1120  }
1121 
1122 } // end of namespace Tpetra
1123 
1124 #endif // TPETRA_CRSMATRIXMULTIPLYOP_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute Y = beta*Y + alpha*Op(A)*X, where Op(A) is either A, , or .
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
CrsMatrix< MatScalar, LocalOrdinal, GlobalOrdinal, Node > crs_matrix_type
The specialization of CrsMatrix which this class wraps.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this Operator.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > exportMV_
Row Map MultiVector used in apply().
bool isConstantStride() const
Whether this multivector has constant stride between columns.
One or more distributed dense vectors.
bool isDistributed() const
Whether this is a globally distributed object.
static bool debug()
Whether Tpetra is in debug mode.
int local_ordinal_type
Default value of Scalar template parameter.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMV_
Column Map MultiVector used in apply().
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool hasTransposeApply() const
Whether this Operator&#39;s apply() method can apply the transpose or conjugate transpose.
CrsMatrixMultiplyOp(const Teuchos::RCP< const crs_matrix_type > &A)
Constructor.
Insert new values that don&#39;t currently exist.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Abstract interface for operators (e.g., matrices and preconditioners).
const Teuchos::RCP< const crs_matrix_type > matrix_
The underlying CrsMatrix object.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void applyTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Apply the transpose or conjugate transpose of the matrix to X_in, producing Y_in. ...
Sum new values into existing values.
Map< LocalOrdinal, GlobalOrdinal, Node > map_type
The specialization of Map which this class uses.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
::Kokkos::Compat::KokkosDeviceWrapperNode< execution_space > node_type
Default value of Node template parameter.
void applyNonTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Scalar alpha, Scalar beta) const
Apply the matrix (not its transpose) to X_in, producing Y_in.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
A class for wrapping a CrsMatrix multiply in a Operator.
Stand-alone utility functions and macros.
void reduce()
Sum values of a locally replicated multivector across all processes.
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this Operator.
Teuchos::RCP< CrsMatrixMultiplyOp< OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node > > createCrsMatrixMultiplyOp(const Teuchos::RCP< const CrsMatrix< MatScalar, LocalOrdinal, GlobalOrdinal, Node > > &A)
Non-member function to create a CrsMatrixMultiplyOp.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
&quot;Hybrid&quot; Jacobi + (Gauss-Seidel or SOR) on .
virtual ~CrsMatrixMultiplyOp()
Destructor (virtual for memory safety of derived classes).
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Version of gaussSeidel(), with fewer requirements on X.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.