Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Stokhos_TpetraCrsMatrixMPVectorUnitTest.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Stokhos Package
4 //
5 // Copyright 2009 NTESS and the Stokhos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
12 #include "Stokhos_Ensemble_Sizes.hpp"
13 
14 // Teuchos
16 
17 // Tpetra
20 #include "Tpetra_Core.hpp"
21 #include "Tpetra_Map.hpp"
22 #include "Tpetra_MultiVector.hpp"
23 #include "Tpetra_Vector.hpp"
24 #include "Tpetra_CrsGraph.hpp"
25 #include "Tpetra_CrsMatrix.hpp"
26 #include "Tpetra_Details_WrappedDualView.hpp"
27 #include "Stokhos_Tpetra_CG.hpp"
28 
29 // Belos solver
30 #ifdef HAVE_STOKHOS_BELOS
32 #include "BelosLinearProblem.hpp"
33 #include "BelosPseudoBlockGmresSolMgr.hpp"
34 #include "BelosPseudoBlockCGSolMgr.hpp"
35 #endif
36 
37 // Ifpack2 preconditioner
38 #ifdef HAVE_STOKHOS_IFPACK2
40 #include "Ifpack2_Factory.hpp"
41 #endif
42 
43 // MueLu preconditioner
44 #ifdef HAVE_STOKHOS_MUELU
46 #include "MueLu_CreateTpetraPreconditioner.hpp"
47 #endif
48 
49 // Amesos2 solver
50 #ifdef HAVE_STOKHOS_AMESOS2
52 #include "Amesos2_Factory.hpp"
53 #endif
54 
55 template <typename scalar, typename ordinal>
56 inline
57 scalar generate_vector_coefficient( const ordinal nFEM,
58  const ordinal nStoch,
59  const ordinal iColFEM,
60  const ordinal iStoch )
61 {
62  const scalar X_fem = 100.0 + scalar(iColFEM) / scalar(nFEM);
63  const scalar X_stoch = 1.0 + scalar(iStoch) / scalar(nStoch);
64  return X_fem + X_stoch;
65  //return 1.0;
66 }
67 
68 template <typename scalar, typename ordinal>
69 inline
70 scalar generate_multi_vector_coefficient( const ordinal nFEM,
71  const ordinal nVec,
72  const ordinal nStoch,
73  const ordinal iColFEM,
74  const ordinal iVec,
75  const ordinal iStoch)
76 {
77  const scalar X_fem = 100.0 + scalar(iColFEM) / scalar(nFEM);
78  const scalar X_stoch = 1.0 + scalar(iStoch) / scalar(nStoch);
79  const scalar X_col = 0.01 + scalar(iVec) / scalar(nVec);
80  return X_fem + X_stoch + X_col;
81  //return 1.0;
82 }
83 
84 //
85 // Tests
86 //
87 
88 const int VectorSize = STOKHOS_DEFAULT_ENSEMBLE_SIZE;
89 
90 //
91 // Test vector addition
92 //
94  Tpetra_CrsMatrix_MP, VectorAdd, Storage, LocalOrdinal, GlobalOrdinal, Node )
95 {
96  using Teuchos::RCP;
97  using Teuchos::rcp;
98  using Teuchos::ArrayView;
99  using Teuchos::Array;
100  using Teuchos::ArrayRCP;
101 
102  typedef typename Storage::value_type BaseScalar;
104 
105  typedef Teuchos::Comm<int> Tpetra_Comm;
106  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
107  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
108 
109  // Ensure device is initialized
110  if ( !Kokkos::is_initialized() )
111  Kokkos::initialize();
112 
113  // Comm
114  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
115 
116  // Map
117  GlobalOrdinal nrow = 10;
118  RCP<const Tpetra_Map> map =
119  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
120  nrow, comm);
121  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
122  const size_t num_my_row = myGIDs.size();
123 
124  // Fill vectors
125  RCP<Tpetra_Vector> x1 = Tpetra::createVector<Scalar>(map);
126  RCP<Tpetra_Vector> x2 = Tpetra::createVector<Scalar>(map);
127  {
128  auto x1_view = x1->getLocalViewHost(Tpetra::Access::OverwriteAll);
129  auto x2_view = x2->getLocalViewHost(Tpetra::Access::OverwriteAll);
130  Scalar val1(VectorSize, BaseScalar(0.0)), val2(VectorSize, BaseScalar(0.0));
131  for (size_t i=0; i<num_my_row; ++i) {
132  const GlobalOrdinal row = myGIDs[i];
133  for (LocalOrdinal j=0; j<VectorSize; ++j) {
134  val1.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(nrow, VectorSize, row, j);
135  val2.fastAccessCoeff(j) = 0.12345 * generate_vector_coefficient<BaseScalar,size_t>(nrow, VectorSize, row, j);
136  }
137  x1_view(i,0) = val1;
138  x2_view(i,0) = val2;
139  }
140  }
141 
142  // Add
143  Scalar alpha = 2.1;
144  Scalar beta = 3.7;
145  RCP<Tpetra_Vector> y = Tpetra::createVector<Scalar>(map);
146  y->update(alpha, *x1, beta, *x2, Scalar(0.0));
147 
148  // y->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
149  // Teuchos::VERB_EXTREME);
150 
151  // Check
152  auto y_view = y->getLocalViewHost(Tpetra::Access::ReadOnly);
153  Scalar val(VectorSize, BaseScalar(0.0));
154  BaseScalar tol = 1.0e-14;
155  for (size_t i=0; i<num_my_row; ++i) {
156  const GlobalOrdinal row = myGIDs[i];
157  for (LocalOrdinal j=0; j<VectorSize; ++j) {
158  BaseScalar v = generate_vector_coefficient<BaseScalar,size_t>(
159  nrow, VectorSize, row, j);
160  val.fastAccessCoeff(j) = alpha.coeff(j)*v + 0.12345*beta.coeff(j)*v;
161  }
162  TEST_EQUALITY( y_view(i,0).size(), VectorSize );
163  for (LocalOrdinal j=0; j<VectorSize; ++j)
164  TEST_FLOATING_EQUALITY( y_view(i,0).fastAccessCoeff(j), val.fastAccessCoeff(j), tol );
165  }
166 }
167 
168 //
169 // Test vector dot product
170 //
172  Tpetra_CrsMatrix_MP, VectorDot, Storage, LocalOrdinal, GlobalOrdinal, Node )
173 {
174  using Teuchos::RCP;
175  using Teuchos::rcp;
176  using Teuchos::ArrayView;
177  using Teuchos::Array;
178  using Teuchos::ArrayRCP;
179 
180  typedef typename Storage::value_type BaseScalar;
182 
183  typedef Teuchos::Comm<int> Tpetra_Comm;
184  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
185  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
186  typedef typename Tpetra_Vector::dot_type dot_type;
187 
188  // Ensure device is initialized
189  if ( !Kokkos::is_initialized() )
190  Kokkos::initialize();
191 
192  // Comm
193  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
194 
195  // Map
196  GlobalOrdinal nrow = 10;
197  RCP<const Tpetra_Map> map =
198  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
199  nrow, comm);
200  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
201  const size_t num_my_row = myGIDs.size();
202 
203  // Fill vectors
204  RCP<Tpetra_Vector> x1 = Tpetra::createVector<Scalar>(map);
205  RCP<Tpetra_Vector> x2 = Tpetra::createVector<Scalar>(map);
206  {
207  auto x1_view = x1->getLocalViewHost(Tpetra::Access::OverwriteAll);
208  auto x2_view = x2->getLocalViewHost(Tpetra::Access::OverwriteAll);
209  Scalar val1(VectorSize, BaseScalar(0.0)), val2(VectorSize, BaseScalar(0.0));
210  for (size_t i=0; i<num_my_row; ++i) {
211  const GlobalOrdinal row = myGIDs[i];
212  for (LocalOrdinal j=0; j<VectorSize; ++j) {
213  val1.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(nrow, VectorSize, row, j);
214  val2.fastAccessCoeff(j) = 0.12345 * generate_vector_coefficient<BaseScalar,size_t>(nrow, VectorSize, row, j);
215  }
216  x1_view(i,0) = val1;
217  x2_view(i,0) = val2;
218  }
219  }
220 
221  // Dot product
222  dot_type dot = x1->dot(*x2);
223 
224  // Check
225 
226 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
227 
228  // Local contribution
229  dot_type local_val(0);
230  for (size_t i=0; i<num_my_row; ++i) {
231  const GlobalOrdinal row = myGIDs[i];
232  for (LocalOrdinal j=0; j<VectorSize; ++j) {
233  BaseScalar v = generate_vector_coefficient<BaseScalar,size_t>(
234  nrow, VectorSize, row, j);
235  local_val += 0.12345 * v * v;
236  }
237  }
238 
239  // Global reduction
240  dot_type val(0);
241  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, local_val,
242  Teuchos::outArg(val));
243 
244 #else
245 
246  // Local contribution
247  dot_type local_val(VectorSize, 0.0);
248  for (size_t i=0; i<num_my_row; ++i) {
249  const GlobalOrdinal row = myGIDs[i];
250  for (LocalOrdinal j=0; j<VectorSize; ++j) {
251  BaseScalar v = generate_vector_coefficient<BaseScalar,size_t>(
252  nrow, VectorSize, row, j);
253  local_val.fastAccessCoeff(j) += 0.12345 * v * v;
254  }
255  }
256 
257  // Global reduction
258  dot_type val(VectorSize, 0.0);
259  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, local_val,
260  Teuchos::outArg(val));
261 
262 #endif
263 
264  out << "dot = " << dot << " expected = " << val << std::endl;
265 
266  BaseScalar tol = 1.0e-14;
267  TEST_FLOATING_EQUALITY( dot, val, tol );
268 }
269 
270 //
271 // Test multi-vector addition
272 //
274  Tpetra_CrsMatrix_MP, MultiVectorAdd, Storage, LocalOrdinal, GlobalOrdinal, Node )
275 {
276  using Teuchos::RCP;
277  using Teuchos::rcp;
278  using Teuchos::ArrayView;
279  using Teuchos::Array;
280  using Teuchos::ArrayRCP;
281 
282  typedef typename Storage::value_type BaseScalar;
284 
285  typedef Teuchos::Comm<int> Tpetra_Comm;
286  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
287  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_MultiVector;
288 
289  // Ensure device is initialized
290  if ( !Kokkos::is_initialized() )
291  Kokkos::initialize();
292 
293  // Comm
294  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
295 
296  // Map
297  GlobalOrdinal nrow = 10;
298  RCP<const Tpetra_Map> map =
299  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
300  nrow, comm);
301  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
302  const size_t num_my_row = myGIDs.size();
303 
304  // Fill vectors
305  size_t ncol = 5;
306  RCP<Tpetra_MultiVector> x1 = Tpetra::createMultiVector<Scalar>(map, ncol);
307  RCP<Tpetra_MultiVector> x2 = Tpetra::createMultiVector<Scalar>(map, ncol);
308  {
309  auto x1_view = x1->getLocalViewHost(Tpetra::Access::OverwriteAll);
310  auto x2_view = x2->getLocalViewHost(Tpetra::Access::OverwriteAll);
311  Scalar val1(VectorSize, BaseScalar(0.0)), val2(VectorSize, BaseScalar(0.0));
312  for (size_t i=0; i<num_my_row; ++i) {
313  const GlobalOrdinal row = myGIDs[i];
314  for (size_t j=0; j<ncol; ++j) {
315  for (LocalOrdinal k=0; k<VectorSize; ++k) {
316  BaseScalar v =
317  generate_multi_vector_coefficient<BaseScalar,size_t>(
318  nrow, ncol, VectorSize, row, j, k);
319  val1.fastAccessCoeff(k) = v;
320  val2.fastAccessCoeff(k) = 0.12345 * v;
321  }
322  x1_view(i,j) = val1;
323  x2_view(i,j) = val2;
324  }
325  }
326  }
327 
328  // Add
329  Scalar alpha = 2.1;
330  Scalar beta = 3.7;
331  RCP<Tpetra_MultiVector> y = Tpetra::createMultiVector<Scalar>(map, ncol);
332  y->update(alpha, *x1, beta, *x2, Scalar(0.0));
333 
334  // y->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
335  // Teuchos::VERB_EXTREME);
336 
337  // Check
338  auto y_view = y->getLocalViewHost(Tpetra::Access::ReadOnly);
339  Scalar val(VectorSize, BaseScalar(0.0));
340  BaseScalar tol = 1.0e-14;
341  for (size_t i=0; i<num_my_row; ++i) {
342  const GlobalOrdinal row = myGIDs[i];
343  for (size_t j=0; j<ncol; ++j) {
344  for (LocalOrdinal k=0; k<VectorSize; ++k) {
345  BaseScalar v = generate_multi_vector_coefficient<BaseScalar,size_t>(
346  nrow, ncol, VectorSize, row, j, k);
347  val.fastAccessCoeff(k) = alpha.coeff(k)*v + 0.12345*beta.coeff(k)*v;
348  }
349  TEST_EQUALITY( y_view(i,j).size(), VectorSize );
350  for (LocalOrdinal k=0; k<VectorSize; ++k)
352  val.fastAccessCoeff(k), tol );
353  }
354  }
355 }
356 
357 //
358 // Test multi-vector dot product
359 //
361  Tpetra_CrsMatrix_MP, MultiVectorDot, Storage, LocalOrdinal, GlobalOrdinal, Node )
362 {
363  using Teuchos::RCP;
364  using Teuchos::rcp;
365  using Teuchos::ArrayView;
366  using Teuchos::Array;
367  using Teuchos::ArrayRCP;
368 
369  typedef typename Storage::value_type BaseScalar;
371 
372  typedef Teuchos::Comm<int> Tpetra_Comm;
373  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
374  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_MultiVector;
375  typedef typename Tpetra_MultiVector::dot_type dot_type;
376 
377  // Ensure device is initialized
378  if ( !Kokkos::is_initialized() )
379  Kokkos::initialize();
380 
381  // Comm
382  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
383 
384  // Map
385  GlobalOrdinal nrow = 10;
386  RCP<const Tpetra_Map> map =
387  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
388  nrow, comm);
389  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
390  const size_t num_my_row = myGIDs.size();
391 
392  // Fill vectors
393  size_t ncol = 5;
394  RCP<Tpetra_MultiVector> x1 = Tpetra::createMultiVector<Scalar>(map, ncol);
395  RCP<Tpetra_MultiVector> x2 = Tpetra::createMultiVector<Scalar>(map, ncol);
396  {
397  auto x1_view = x1->getLocalViewHost(Tpetra::Access::OverwriteAll);
398  auto x2_view = x2->getLocalViewHost(Tpetra::Access::OverwriteAll);
399  Scalar val1(VectorSize, BaseScalar(0.0)), val2(VectorSize, BaseScalar(0.0));
400  for (size_t i=0; i<num_my_row; ++i) {
401  const GlobalOrdinal row = myGIDs[i];
402  for (size_t j=0; j<ncol; ++j) {
403  for (LocalOrdinal k=0; k<VectorSize; ++k) {
404  BaseScalar v =
405  generate_multi_vector_coefficient<BaseScalar,size_t>(
406  nrow, ncol, VectorSize, row, j, k);
407  val1.fastAccessCoeff(k) = v;
408  val2.fastAccessCoeff(k) = 0.12345 * v;
409  }
410  x1_view(i,j) = val1;
411  x2_view(i,j) = val2;
412  }
413  }
414  }
415 
416  // Dot product
417  Array<dot_type> dots(ncol);
418  x1->dot(*x2, dots());
419 
420  // Check
421 
422 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
423 
424  // Local contribution
425  Array<dot_type> local_vals(ncol, dot_type(0));
426  for (size_t i=0; i<num_my_row; ++i) {
427  const GlobalOrdinal row = myGIDs[i];
428  for (size_t j=0; j<ncol; ++j) {
429  for (LocalOrdinal k=0; k<VectorSize; ++k) {
430  BaseScalar v = generate_multi_vector_coefficient<BaseScalar,size_t>(
431  nrow, ncol, VectorSize, row, j, k);
432  local_vals[j] += 0.12345 * v * v;
433  }
434  }
435  }
436 
437  // Global reduction
438  Array<dot_type> vals(ncol, dot_type(0));
439  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, Teuchos::as<int>(ncol),
440  local_vals.getRawPtr(), vals.getRawPtr());
441 
442 #else
443 
444  // Local contribution
445  Array<dot_type> local_vals(ncol, dot_type(VectorSize, 0.0));
446  for (size_t i=0; i<num_my_row; ++i) {
447  const GlobalOrdinal row = myGIDs[i];
448  for (size_t j=0; j<ncol; ++j) {
449  for (LocalOrdinal k=0; k<VectorSize; ++k) {
450  BaseScalar v = generate_multi_vector_coefficient<BaseScalar,size_t>(
451  nrow, ncol, VectorSize, row, j, k);
452  local_vals[j].fastAccessCoeff(k) += 0.12345 * v * v;
453  }
454  }
455  }
456 
457  // Global reduction
458  Array<dot_type> vals(ncol, dot_type(VectorSize, 0.0));
459  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, Teuchos::as<int>(ncol),
460  local_vals.getRawPtr(), vals.getRawPtr());
461 
462 #endif
463 
464  BaseScalar tol = 1.0e-14;
465  for (size_t j=0; j<ncol; ++j) {
466  out << "dots(" << j << ") = " << dots[j]
467  << " expected(" << j << ") = " << vals[j] << std::endl;
468  TEST_FLOATING_EQUALITY( dots[j], vals[j], tol );
469  }
470 }
471 
472 //
473 // Test multi-vector dot product using subviews
474 //
476  Tpetra_CrsMatrix_MP, MultiVectorDotSub, Storage, LocalOrdinal, GlobalOrdinal, Node )
477 {
478  using Teuchos::RCP;
479  using Teuchos::rcp;
480  using Teuchos::ArrayView;
481  using Teuchos::Array;
482  using Teuchos::ArrayRCP;
483 
484  typedef typename Storage::value_type BaseScalar;
486 
487  typedef Teuchos::Comm<int> Tpetra_Comm;
488  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
489  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_MultiVector;
490  typedef typename Tpetra_MultiVector::dot_type dot_type;
491 
492  // Ensure device is initialized
493  if ( !Kokkos::is_initialized() )
494  Kokkos::initialize();
495 
496  // Comm
497  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
498 
499  // Map
500  GlobalOrdinal nrow = 10;
501  RCP<const Tpetra_Map> map =
502  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
503  nrow, comm);
504  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
505  const size_t num_my_row = myGIDs.size();
506 
507  // Fill vectors
508  size_t ncol = 5;
509  RCP<Tpetra_MultiVector> x1 = Tpetra::createMultiVector<Scalar>(map, ncol);
510  RCP<Tpetra_MultiVector> x2 = Tpetra::createMultiVector<Scalar>(map, ncol);
511  {
512  auto x1_view = x1->getLocalViewHost(Tpetra::Access::OverwriteAll);
513  auto x2_view = x2->getLocalViewHost(Tpetra::Access::OverwriteAll);
514  Scalar val1(VectorSize, BaseScalar(0.0)), val2(VectorSize, BaseScalar(0.0));
515  for (size_t i=0; i<num_my_row; ++i) {
516  const GlobalOrdinal row = myGIDs[i];
517  for (size_t j=0; j<ncol; ++j) {
518  for (LocalOrdinal k=0; k<VectorSize; ++k) {
519  BaseScalar v =
520  generate_multi_vector_coefficient<BaseScalar,size_t>(
521  nrow, ncol, VectorSize, row, j, k);
522  val1.fastAccessCoeff(k) = v;
523  val2.fastAccessCoeff(k) = 0.12345 * v;
524  }
525  x1_view(i,j) = val1;
526  x2_view(i,j) = val2;
527  }
528  }
529  }
530 
531  // Get subviews
532  size_t ncol_sub = 2;
533  Teuchos::Array<size_t> cols(ncol_sub);
534  cols[0] = 4; cols[1] = 2;
535  RCP<const Tpetra_MultiVector> x1_sub = x1->subView(cols());
536  RCP<const Tpetra_MultiVector> x2_sub = x2->subView(cols());
537 
538  // Dot product
539  Array<dot_type> dots(ncol_sub);
540  x1_sub->dot(*x2_sub, dots());
541 
542  // Check
543 
544 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
545 
546  // Local contribution
547  Array<dot_type> local_vals(ncol_sub, dot_type(0));
548  for (size_t i=0; i<num_my_row; ++i) {
549  const GlobalOrdinal row = myGIDs[i];
550  for (size_t j=0; j<ncol_sub; ++j) {
551  for (LocalOrdinal k=0; k<VectorSize; ++k) {
552  BaseScalar v = generate_multi_vector_coefficient<BaseScalar,size_t>(
553  nrow, ncol, VectorSize, row, cols[j], k);
554  local_vals[j] += 0.12345 * v * v;
555  }
556  }
557  }
558 
559  // Global reduction
560  Array<dot_type> vals(ncol_sub, dot_type(0));
561  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM,
562  Teuchos::as<int>(ncol_sub), local_vals.getRawPtr(),
563  vals.getRawPtr());
564 
565 #else
566 
567  // Local contribution
568  Array<dot_type> local_vals(ncol_sub, dot_type(VectorSize, 0.0));
569  for (size_t i=0; i<num_my_row; ++i) {
570  const GlobalOrdinal row = myGIDs[i];
571  for (size_t j=0; j<ncol_sub; ++j) {
572  for (LocalOrdinal k=0; k<VectorSize; ++k) {
573  BaseScalar v = generate_multi_vector_coefficient<BaseScalar,size_t>(
574  nrow, ncol, VectorSize, row, cols[j], k);
575  local_vals[j].fastAccessCoeff(k) += 0.12345 * v * v;
576  }
577  }
578  }
579 
580  // Global reduction
581  Array<dot_type> vals(ncol_sub, dot_type(VectorSize, 0.0));
582  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM,
583  Teuchos::as<int>(ncol_sub), local_vals.getRawPtr(),
584  vals.getRawPtr());
585 
586 #endif
587 
588  BaseScalar tol = 1.0e-14;
589  for (size_t j=0; j<ncol_sub; ++j) {
590  out << "dots(" << j << ") = " << dots[j]
591  << " expected(" << j << ") = " << vals[j] << std::endl;
592  TEST_FLOATING_EQUALITY( dots[j], vals[j], tol );
593  }
594 }
595 
596 //
597 // Test matrix-vector multiplication for a simple banded upper-triangular matrix
598 //
600  Tpetra_CrsMatrix_MP, MatrixVectorMultiply, Storage, LocalOrdinal, GlobalOrdinal, Node )
601 {
602  using Teuchos::RCP;
603  using Teuchos::rcp;
604  using Teuchos::ArrayView;
605  using Teuchos::Array;
606  using Teuchos::ArrayRCP;
607 
608  typedef typename Storage::value_type BaseScalar;
610 
611  typedef Teuchos::Comm<int> Tpetra_Comm;
612  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
613  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
614  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
615  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
616 
617  // Ensure device is initialized
618  if ( !Kokkos::is_initialized() )
619  Kokkos::initialize();
620 
621  // Build banded matrix
622  GlobalOrdinal nrow = 10;
623  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
624  RCP<const Tpetra_Map> map =
625  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
626  nrow, comm);
627  RCP<Tpetra_CrsGraph> graph =
628  rcp(new Tpetra_CrsGraph(map, size_t(2)));
629  Array<GlobalOrdinal> columnIndices(2);
630  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
631  const size_t num_my_row = myGIDs.size();
632  for (size_t i=0; i<num_my_row; ++i) {
633  const GlobalOrdinal row = myGIDs[i];
634  columnIndices[0] = row;
635  size_t ncol = 1;
636  if (row != nrow-1) {
637  columnIndices[1] = row+1;
638  ncol = 2;
639  }
640  graph->insertGlobalIndices(row, columnIndices(0,ncol));
641  }
642  graph->fillComplete();
643  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
644 
645  // Set values in matrix
646  Array<Scalar> vals(2);
647  Scalar val(VectorSize, BaseScalar(0.0));
648  for (size_t i=0; i<num_my_row; ++i) {
649  const GlobalOrdinal row = myGIDs[i];
650  columnIndices[0] = row;
651  for (LocalOrdinal j=0; j<VectorSize; ++j)
652  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
653  nrow, VectorSize, row, j);
654  vals[0] = val;
655  size_t ncol = 1;
656 
657  if (row != nrow-1) {
658  columnIndices[1] = row+1;
659  for (LocalOrdinal j=0; j<VectorSize; ++j)
660  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
661  nrow, VectorSize, row+1, j);
662  vals[1] = val;
663  ncol = 2;
664  }
665  matrix->replaceGlobalValues(row, columnIndices(0,ncol), vals(0,ncol));
666  }
667  matrix->fillComplete();
668 
669  // Fill vector
670  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
671  {
672  auto x_view = x->getLocalViewHost(Tpetra::Access::OverwriteAll);
673  for (size_t i=0; i<num_my_row; ++i) {
674  const GlobalOrdinal row = myGIDs[i];
675  for (LocalOrdinal j=0; j<VectorSize; ++j)
676  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
677  nrow, VectorSize, row, j);
678  x_view(i,0) = val;
679  }
680  }
681 
682  // matrix->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
683  // Teuchos::VERB_EXTREME);
684 
685  // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
686  // Teuchos::VERB_EXTREME);
687 
688  // Multiply
689  RCP<Tpetra_Vector> y = Tpetra::createVector<Scalar>(map);
690  matrix->apply(*x, *y);
691 
692  // y->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
693  // Teuchos::VERB_EXTREME);
694 
695  // Check
696  auto y_view = y->getLocalViewHost(Tpetra::Access::ReadOnly);
697  BaseScalar tol = 1.0e-14;
698  for (size_t i=0; i<num_my_row; ++i) {
699  const GlobalOrdinal row = myGIDs[i];
700  for (LocalOrdinal j=0; j<VectorSize; ++j) {
701  BaseScalar v = generate_vector_coefficient<BaseScalar,size_t>(
702  nrow, VectorSize, row, j);
703  val.fastAccessCoeff(j) = v*v;
704  }
705  if (row != nrow-1) {
706  for (LocalOrdinal j=0; j<VectorSize; ++j) {
707  BaseScalar v = generate_vector_coefficient<BaseScalar,size_t>(
708  nrow, VectorSize, row+1, j);
709  val.fastAccessCoeff(j) += v*v;
710  }
711  }
712  TEST_EQUALITY( y_view(i,0).size(), VectorSize );
713  for (LocalOrdinal j=0; j<VectorSize; ++j)
714  TEST_FLOATING_EQUALITY( y_view(i,0).fastAccessCoeff(j), val.fastAccessCoeff(j), tol );
715  }
716 }
717 
718 //
719 // Test matrix-multi-vector multiplication for a simple banded upper-triangular matrix
720 //
722  Tpetra_CrsMatrix_MP, MatrixMultiVectorMultiply, Storage, LocalOrdinal, GlobalOrdinal, Node )
723 {
724  using Teuchos::RCP;
725  using Teuchos::rcp;
726  using Teuchos::ArrayView;
727  using Teuchos::Array;
728  using Teuchos::ArrayRCP;
729 
730  typedef typename Storage::value_type BaseScalar;
732 
733  typedef Teuchos::Comm<int> Tpetra_Comm;
734  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
735  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_MultiVector;
736  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
737  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
738 
739  // Ensure device is initialized
740  if ( !Kokkos::is_initialized() )
741  Kokkos::initialize();
742 
743  // Build banded matrix
744  GlobalOrdinal nrow = 10;
745  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
746  RCP<const Tpetra_Map> map =
747  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
748  nrow, comm);
749  RCP<Tpetra_CrsGraph> graph =
750  rcp(new Tpetra_CrsGraph(map, size_t(2)));
751  Array<GlobalOrdinal> columnIndices(2);
752  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
753  const size_t num_my_row = myGIDs.size();
754  for (size_t i=0; i<num_my_row; ++i) {
755  const GlobalOrdinal row = myGIDs[i];
756  columnIndices[0] = row;
757  size_t ncol = 1;
758  if (row != nrow-1) {
759  columnIndices[1] = row+1;
760  ncol = 2;
761  }
762  graph->insertGlobalIndices(row, columnIndices(0,ncol));
763  }
764  graph->fillComplete();
765  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
766 
767  // Set values in matrix
768  Array<Scalar> vals(2);
769  Scalar val(VectorSize, BaseScalar(0.0));
770  for (size_t i=0; i<num_my_row; ++i) {
771  const GlobalOrdinal row = myGIDs[i];
772  columnIndices[0] = row;
773  for (LocalOrdinal j=0; j<VectorSize; ++j)
774  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
775  nrow, VectorSize, row, j);
776  vals[0] = val;
777  size_t ncol = 1;
778 
779  if (row != nrow-1) {
780  columnIndices[1] = row+1;
781  for (LocalOrdinal j=0; j<VectorSize; ++j)
782  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
783  nrow, VectorSize, row+1, j);
784  vals[1] = val;
785  ncol = 2;
786  }
787  matrix->replaceGlobalValues(row, columnIndices(0,ncol), vals(0,ncol));
788  }
789  matrix->fillComplete();
790 
791  // Fill multi-vector
792  size_t ncol = 5;
793  RCP<Tpetra_MultiVector> x = Tpetra::createMultiVector<Scalar>(map, ncol);
794  {
795  auto x_view = x->getLocalViewHost(Tpetra::Access::OverwriteAll);
796  for (size_t i=0; i<num_my_row; ++i) {
797  const GlobalOrdinal row = myGIDs[i];
798  for (size_t j=0; j<ncol; ++j) {
799  for (LocalOrdinal k=0; k<VectorSize; ++k) {
800  BaseScalar v =
801  generate_multi_vector_coefficient<BaseScalar,size_t>(
802  nrow, ncol, VectorSize, row, j, k);
803  val.fastAccessCoeff(k) = v;
804  }
805  x_view(i,j) = val;
806  }
807  }
808  }
809 
810  // matrix->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
811  // Teuchos::VERB_EXTREME);
812 
813  // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
814  // Teuchos::VERB_EXTREME);
815 
816  // Multiply
817  RCP<Tpetra_MultiVector> y = Tpetra::createMultiVector<Scalar>(map, ncol);
818  matrix->apply(*x, *y);
819 
820  // y->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
821  // Teuchos::VERB_EXTREME);
822 
823  // Check
824  auto y_view = y->getLocalViewHost(Tpetra::Access::ReadOnly);
825  BaseScalar tol = 1.0e-14;
826  for (size_t i=0; i<num_my_row; ++i) {
827  const GlobalOrdinal row = myGIDs[i];
828  for (size_t j=0; j<ncol; ++j) {
829  for (LocalOrdinal k=0; k<VectorSize; ++k) {
830  BaseScalar v1 = generate_vector_coefficient<BaseScalar,size_t>(
831  nrow, VectorSize, row, k);
832  BaseScalar v2 = generate_multi_vector_coefficient<BaseScalar,size_t>(
833  nrow, ncol, VectorSize, row, j, k);
834  val.fastAccessCoeff(k) = v1*v2;
835  }
836  if (row != nrow-1) {
837  for (LocalOrdinal k=0; k<VectorSize; ++k) {
838  BaseScalar v1 = generate_vector_coefficient<BaseScalar,size_t>(
839  nrow, VectorSize, row+1, k);
840  BaseScalar v2 = generate_multi_vector_coefficient<BaseScalar,size_t>(
841  nrow, ncol, VectorSize, row+1, j, k);
842  val.fastAccessCoeff(k) += v1*v2;
843  }
844  }
845  TEST_EQUALITY( y_view(i,j).size(), VectorSize );
846  for (LocalOrdinal k=0; k<VectorSize; ++k)
848  val.fastAccessCoeff(k), tol );
849  }
850  }
851 }
852 
853 //
854 // Test flattening MP::Vector matrix
855 //
857  Tpetra_CrsMatrix_MP, Flatten, Storage, LocalOrdinal, GlobalOrdinal, Node )
858 {
859  using Teuchos::RCP;
860  using Teuchos::rcp;
861  using Teuchos::ArrayView;
862  using Teuchos::Array;
863  using Teuchos::ArrayRCP;
864 
865  typedef typename Storage::value_type BaseScalar;
867 
868  typedef Teuchos::Comm<int> Tpetra_Comm;
869  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
870  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
871  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
872  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
873 
874  typedef Tpetra::Vector<BaseScalar,LocalOrdinal,GlobalOrdinal,Node> Flat_Tpetra_Vector;
875  typedef Tpetra::CrsMatrix<BaseScalar,LocalOrdinal,GlobalOrdinal,Node> Flat_Tpetra_CrsMatrix;
876 
877  // Ensure device is initialized
878  if ( !Kokkos::is_initialized() )
879  Kokkos::initialize();
880 
881  // Build banded matrix
882  GlobalOrdinal nrow = 10;
883  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
884  RCP<const Tpetra_Map> map =
885  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
886  nrow, comm);
887  RCP<Tpetra_CrsGraph> graph =
888  rcp(new Tpetra_CrsGraph(map, size_t(2)));
889  Array<GlobalOrdinal> columnIndices(2);
890  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
891  const size_t num_my_row = myGIDs.size();
892  for (size_t i=0; i<num_my_row; ++i) {
893  const GlobalOrdinal row = myGIDs[i];
894  columnIndices[0] = row;
895  size_t ncol = 1;
896  if (row != nrow-1) {
897  columnIndices[1] = row+1;
898  ncol = 2;
899  }
900  graph->insertGlobalIndices(row, columnIndices(0,ncol));
901  }
902  graph->fillComplete();
903  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
904 
905  // Set values in matrix
906  Array<Scalar> vals(2);
907  Scalar val(VectorSize, BaseScalar(0.0));
908  for (size_t i=0; i<num_my_row; ++i) {
909  const GlobalOrdinal row = myGIDs[i];
910  columnIndices[0] = row;
911  for (LocalOrdinal j=0; j<VectorSize; ++j)
912  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
913  nrow, VectorSize, row, j);
914  vals[0] = val;
915  size_t ncol = 1;
916 
917  if (row != nrow-1) {
918  columnIndices[1] = row+1;
919  for (LocalOrdinal j=0; j<VectorSize; ++j)
920  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
921  nrow, VectorSize, row+1, j);
922  vals[1] = val;
923  ncol = 2;
924  }
925  matrix->replaceGlobalValues(row, columnIndices(0,ncol), vals(0,ncol));
926  }
927  matrix->fillComplete();
928 
929  // Fill vector
930  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
931  {
932  auto x_view = x->getLocalViewHost(Tpetra::Access::OverwriteAll);
933  for (size_t i=0; i<num_my_row; ++i) {
934  const GlobalOrdinal row = myGIDs[i];
935  for (LocalOrdinal j=0; j<VectorSize; ++j)
936  val.fastAccessCoeff(j) = generate_vector_coefficient<BaseScalar,size_t>(
937  nrow, VectorSize, row, j);
938  x_view(i,0) = val;
939  }
940  }
941 
942  // Multiply
943  RCP<Tpetra_Vector> y = Tpetra::createVector<Scalar>(map);
944  matrix->apply(*x, *y);
945 
946  // Flatten matrix
947  RCP<const Tpetra_Map> flat_x_map, flat_y_map;
948  RCP<const Tpetra_CrsGraph> flat_graph =
949  Stokhos::create_flat_mp_graph(*graph, flat_x_map, flat_y_map, VectorSize);
950  RCP<Flat_Tpetra_CrsMatrix> flat_matrix =
951  Stokhos::create_flat_matrix(*matrix, flat_graph, VectorSize);
952 
953  // Multiply with flattened matix
954  RCP<Tpetra_Vector> y2 = Tpetra::createVector<Scalar>(map);
955  {
956  RCP<Flat_Tpetra_Vector> flat_x =
957  Stokhos::create_flat_vector_view(*x, flat_x_map);
958  RCP<Flat_Tpetra_Vector> flat_y =
959  Stokhos::create_flat_vector_view(*y2, flat_y_map);
960  flat_matrix->apply(*flat_x, *flat_y);
961  }
962 
963  // flat_y->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
964  // Teuchos::VERB_EXTREME);
965 
966  // Check
967  BaseScalar tol = 1.0e-14;
968  auto y_view = y-> getLocalViewHost(Tpetra::Access::ReadOnly);
969  auto y2_view = y2->getLocalViewHost(Tpetra::Access::ReadOnly);
970  for (size_t i=0; i<num_my_row; ++i) {
971  TEST_EQUALITY( y_view( i,0).size(), VectorSize );
972  TEST_EQUALITY( y2_view(i,0).size(), VectorSize );
973  for (LocalOrdinal j=0; j<VectorSize; ++j)
975  y2_view(i,0).fastAccessCoeff(j), tol );
976  }
977 }
978 
979 //
980 // Test interaction between Tpetra WrappedDualView and MP::Vector
981 //
983  Tpetra_CrsMatrix_MP, WrappedDualView, Storage, LocalOrdinal, GlobalOrdinal, Node )
984 {
985  //BMK 6-2021: This test is required because a View of MP::Vector has slightly different behavior than a typical Kokkos::View.
986  //If you construct a Kokkos::View with a label and 0 extent, it gets a non-null allocation.
987  //But for View<MP::Vector>, the same constructor produces a null data pointer but
988  //an active reference counting node (use_count() > 0).
989  //This test makes sure that Tpetra WrappedDualView works correctly with a View where data() == nullptr but use_count() > 0.
990  using Teuchos::RCP;
991  using Teuchos::rcp;
992  using Teuchos::ArrayView;
993  using Teuchos::Array;
994  using Teuchos::ArrayRCP;
995 
996  //typedef typename Storage::value_type BaseScalar;
998 
999  using DualViewType = Kokkos::DualView<Scalar*, typename Node::device_type>;
1000  using WDV = Tpetra::Details::WrappedDualView<DualViewType>;
1001  using values_view = typename DualViewType::t_dev;
1002 
1003  // Ensure device is initialized
1004  if ( !Kokkos::is_initialized() )
1005  Kokkos::initialize();
1006 
1007  WDV wdv;
1008  {
1009  values_view myView("emptyTestView", 0);
1010  wdv = WDV(myView);
1011  }
1012  size_t use_h = wdv.getHostView(Tpetra::Access::ReadOnly).use_count();
1013  size_t use_d = wdv.getDeviceView(Tpetra::Access::ReadOnly).use_count();
1014  //The WrappedDualView is now the only object holding references to the host and device views,
1015  //so they should have identical use counts.
1016  TEST_EQUALITY(use_h, use_d);
1017 }
1018 
1019 //
1020 // Test simple CG solve without preconditioning for a 1-D Laplacian matrix
1021 //
1023  Tpetra_CrsMatrix_MP, SimpleCG, Storage, LocalOrdinal, GlobalOrdinal, Node )
1024 {
1025  using Teuchos::RCP;
1026  using Teuchos::rcp;
1027  using Teuchos::ArrayView;
1028  using Teuchos::Array;
1029  using Teuchos::ArrayRCP;
1030  using Teuchos::ParameterList;
1031 
1032  typedef typename Storage::value_type BaseScalar;
1034 
1035  typedef Teuchos::Comm<int> Tpetra_Comm;
1036  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
1037  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
1038  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
1039  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
1040 
1041  // Ensure device is initialized
1042  if ( !Kokkos::is_initialized() )
1043  Kokkos::initialize();
1044 
1045  // 1-D Laplacian matrix
1046  GlobalOrdinal nrow = 50;
1047  BaseScalar h = 1.0 / static_cast<BaseScalar>(nrow-1);
1048  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
1049  RCP<const Tpetra_Map> map =
1050  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
1051  nrow, comm);
1052  RCP<Tpetra_CrsGraph> graph =
1053  rcp(new Tpetra_CrsGraph(map, size_t(3)));
1054  Array<GlobalOrdinal> columnIndices(3);
1055  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
1056  const size_t num_my_row = myGIDs.size();
1057  for (size_t i=0; i<num_my_row; ++i) {
1058  const GlobalOrdinal row = myGIDs[i];
1059  if (row == 0 || row == nrow-1) { // Boundary nodes
1060  columnIndices[0] = row;
1061  graph->insertGlobalIndices(row, columnIndices(0,1));
1062  }
1063  else { // Interior nodes
1064  columnIndices[0] = row-1;
1065  columnIndices[1] = row;
1066  columnIndices[2] = row+1;
1067  graph->insertGlobalIndices(row, columnIndices(0,3));
1068  }
1069  }
1070  graph->fillComplete();
1071  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
1072 
1073  // Set values in matrix
1074  Array<Scalar> vals(3);
1075  Scalar a_val(VectorSize, BaseScalar(0.0));
1076  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1077  a_val.fastAccessCoeff(j) =
1078  BaseScalar(1.0) + BaseScalar(j) / BaseScalar(VectorSize);
1079  }
1080  for (size_t i=0; i<num_my_row; ++i) {
1081  const GlobalOrdinal row = myGIDs[i];
1082  if (row == 0 || row == nrow-1) { // Boundary nodes
1083  columnIndices[0] = row;
1084  vals[0] = Scalar(1.0);
1085  matrix->replaceGlobalValues(row, columnIndices(0,1), vals(0,1));
1086  }
1087  else {
1088  columnIndices[0] = row-1;
1089  columnIndices[1] = row;
1090  columnIndices[2] = row+1;
1091  vals[0] = Scalar(-1.0) * a_val;
1092  vals[1] = Scalar(2.0) * a_val;
1093  vals[2] = Scalar(-1.0) * a_val;
1094  matrix->replaceGlobalValues(row, columnIndices(0,3), vals(0,3));
1095  }
1096  }
1097  matrix->fillComplete();
1098 
1099  matrix->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
1101 
1102  // Fill RHS vector
1103  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
1104  Scalar b_val;
1105  {
1106  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
1107  b_val = Scalar(VectorSize, BaseScalar(0.0));
1108  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1109  b_val.fastAccessCoeff(j) =
1110  BaseScalar(-1.0) + BaseScalar(j) / BaseScalar(VectorSize);
1111  }
1112  for (size_t i=0; i<num_my_row; ++i) {
1113  const GlobalOrdinal row = myGIDs[i];
1114  if (row == 0 || row == nrow-1)
1115  b_view(i,0) = Scalar(0.0);
1116  else
1117  b_view(i,0) = -Scalar(b_val * h * h);
1118  }
1119  }
1120 
1121  // Solve
1122  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
1123  typedef Kokkos::ArithTraits<BaseScalar> BST;
1124  typedef typename BST::mag_type base_mag_type;
1125  typedef typename Tpetra_Vector::mag_type mag_type;
1126  base_mag_type btol = 1e-9;
1127  mag_type tol = btol;
1128  int max_its = 1000;
1129  bool solved = Stokhos::CG_Solve(*matrix, *x, *b, tol, max_its,
1130  out.getOStream().get());
1131  TEST_EQUALITY_CONST( solved, true );
1132 
1133  // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
1134  // Teuchos::VERB_EXTREME);
1135 
1136  // Check -- For a*y'' = b, correct answer is y = 0.5 *(b/a) * x * (x-1)
1137  btol = 1000*btol;
1138  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
1139  Scalar val(VectorSize, BaseScalar(0.0));
1140  for (size_t i=0; i<num_my_row; ++i) {
1141  const GlobalOrdinal row = myGIDs[i];
1142  BaseScalar xx = row * h;
1143  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1144  val.fastAccessCoeff(j) =
1145  BaseScalar(0.5) * (b_val.coeff(j)/a_val.coeff(j)) * xx * (xx - BaseScalar(1.0));
1146  }
1147  TEST_EQUALITY( x_view(i,0).size(), VectorSize );
1148 
1149  // Set small values to zero
1150  Scalar v = x_view(i,0);
1151  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1152  if (BST::abs(v.coeff(j)) < btol)
1153  v.fastAccessCoeff(j) = BaseScalar(0.0);
1154  if (BST::abs(val.coeff(j)) < btol)
1155  val.fastAccessCoeff(j) = BaseScalar(0.0);
1156  }
1157 
1158  for (LocalOrdinal j=0; j<VectorSize; ++j)
1159  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), btol);
1160  }
1161 
1162 }
1163 
1164 #if defined(HAVE_STOKHOS_MUELU) && defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_STOKHOS_IFPACK2)
1165 
1166 //
1167 // Test simple CG solve with MueLu preconditioning for a 1-D Laplacian matrix
1168 //
1170  Tpetra_CrsMatrix_MP, SimplePCG_Muelu, Storage, LocalOrdinal, GlobalOrdinal, Node )
1171 {
1172  using Teuchos::RCP;
1173  using Teuchos::rcp;
1174  using Teuchos::ArrayView;
1175  using Teuchos::Array;
1176  using Teuchos::ArrayRCP;
1177  using Teuchos::ParameterList;
1178  using Teuchos::getParametersFromXmlFile;
1179 
1180  typedef typename Storage::value_type BaseScalar;
1182 
1183  typedef Teuchos::Comm<int> Tpetra_Comm;
1184  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
1185  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
1186  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
1187  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
1188 
1189  // Ensure device is initialized
1190  if ( !Kokkos::is_initialized() )
1191  Kokkos::initialize();
1192 
1193  // 1-D Laplacian matrix
1194  GlobalOrdinal nrow = 50;
1195  BaseScalar h = 1.0 / static_cast<BaseScalar>(nrow-1);
1196  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
1197  RCP<const Tpetra_Map> map =
1198  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
1199  nrow, comm);
1200  RCP<Tpetra_CrsGraph> graph =
1201  rcp(new Tpetra_CrsGraph(map, size_t(3)));
1202  Array<GlobalOrdinal> columnIndices(3);
1203  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
1204  const size_t num_my_row = myGIDs.size();
1205  for (size_t i=0; i<num_my_row; ++i) {
1206  const GlobalOrdinal row = myGIDs[i];
1207  if (row == 0 || row == nrow-1) { // Boundary nodes
1208  columnIndices[0] = row;
1209  graph->insertGlobalIndices(row, columnIndices(0,1));
1210  }
1211  else { // Interior nodes
1212  columnIndices[0] = row-1;
1213  columnIndices[1] = row;
1214  columnIndices[2] = row+1;
1215  graph->insertGlobalIndices(row, columnIndices(0,3));
1216  }
1217  }
1218  graph->fillComplete();
1219  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
1220 
1221  // Set values in matrix
1222  Array<Scalar> vals(3);
1223  Scalar a_val(VectorSize, BaseScalar(0.0));
1224  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1225  a_val.fastAccessCoeff(j) =
1226  BaseScalar(1.0) + BaseScalar(j) / BaseScalar(VectorSize);
1227  }
1228  for (size_t i=0; i<num_my_row; ++i) {
1229  const GlobalOrdinal row = myGIDs[i];
1230  if (row == 0 || row == nrow-1) { // Boundary nodes
1231  columnIndices[0] = row;
1232  vals[0] = Scalar(1.0);
1233  matrix->replaceGlobalValues(row, columnIndices(0,1), vals(0,1));
1234  }
1235  else {
1236  columnIndices[0] = row-1;
1237  columnIndices[1] = row;
1238  columnIndices[2] = row+1;
1239  vals[0] = Scalar(-1.0) * a_val;
1240  vals[1] = Scalar(2.0) * a_val;
1241  vals[2] = Scalar(-1.0) * a_val;
1242  matrix->replaceGlobalValues(row, columnIndices(0,3), vals(0,3));
1243  }
1244  }
1245  matrix->fillComplete();
1246 
1247  // Fill RHS vector
1248  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
1249  Scalar b_val;
1250  {
1251  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
1252  b_val = Scalar(VectorSize, BaseScalar(0.0));
1253  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1254  b_val.fastAccessCoeff(j) =
1255  BaseScalar(-1.0) + BaseScalar(j) / BaseScalar(VectorSize);
1256  }
1257  for (size_t i=0; i<num_my_row; ++i) {
1258  const GlobalOrdinal row = myGIDs[i];
1259  if (row == 0 || row == nrow-1)
1260  b_view(i,0) = Scalar(0.0);
1261  else
1262  b_view(i,0) = -Scalar(b_val * h * h);
1263  }
1264  }
1265 
1266  // Create preconditioner
1267  typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP;
1268  RCP<OP> matrix_op = matrix;
1269  RCP<ParameterList> muelu_params =
1270  getParametersFromXmlFile("muelu_cheby.xml");
1271  RCP<OP> M =
1272  MueLu::CreateTpetraPreconditioner<Scalar,LocalOrdinal,GlobalOrdinal,Node>(matrix_op, *muelu_params);
1273 
1274  // Solve
1275  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
1276  typedef Kokkos::ArithTraits<BaseScalar> BST;
1277  typedef typename BST::mag_type base_mag_type;
1278  typedef typename Tpetra_Vector::mag_type mag_type;
1279  base_mag_type btol = 1e-9;
1280  mag_type tol = btol;
1281  int max_its = 1000;
1282  bool solved = Stokhos::PCG_Solve(*matrix, *x, *b, *M, tol, max_its,
1283  out.getOStream().get());
1284  TEST_EQUALITY_CONST( solved, true );
1285 
1286  // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
1287  // Teuchos::VERB_EXTREME);
1288 
1289  // Check -- For a*y'' = b, correct answer is y = 0.5 *(b/a) * x * (x-1)
1290  btol = 1000*btol;
1291  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
1292  Scalar val(VectorSize, BaseScalar(0.0));
1293  for (size_t i=0; i<num_my_row; ++i) {
1294  const GlobalOrdinal row = myGIDs[i];
1295  BaseScalar xx = row * h;
1296  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1297  val.fastAccessCoeff(j) =
1298  BaseScalar(0.5) * (b_val.coeff(j)/a_val.coeff(j)) * xx * (xx - BaseScalar(1.0));
1299  }
1300  TEST_EQUALITY( x_view(i,0).size(), VectorSize );
1301 
1302  // Set small values to zero
1303  Scalar v = x_view(i,0);
1304  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1305  if (BST::magnitude(v.coeff(j)) < btol)
1306  v.fastAccessCoeff(j) = BaseScalar(0.0);
1307  if (BST::magnitude(val.coeff(j)) < btol)
1308  val.fastAccessCoeff(j) = BaseScalar(0.0);
1309  }
1310 
1311  for (LocalOrdinal j=0; j<VectorSize; ++j)
1312  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), btol);
1313  }
1314 
1315 }
1316 
1317 #else
1318 
1320  Tpetra_CrsMatrix_MP, SimplePCG_Muelu, Storage, LocalOrdinal, GlobalOrdinal, Node ) {}
1321 
1322 #endif
1323 
1324 #if defined(HAVE_STOKHOS_BELOS)
1325 
1326 //
1327 // Test Belos GMRES solve for a simple banded upper-triangular matrix
1328 //
1330  Tpetra_CrsMatrix_MP, BelosGMRES, Storage, LocalOrdinal, GlobalOrdinal, Node )
1331 {
1332  using Teuchos::RCP;
1333  using Teuchos::rcp;
1334  using Teuchos::ArrayView;
1335  using Teuchos::Array;
1336  using Teuchos::ArrayRCP;
1337  using Teuchos::ParameterList;
1338 
1339  typedef typename Storage::value_type BaseScalar;
1341 
1342  typedef Teuchos::Comm<int> Tpetra_Comm;
1343  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
1344  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
1345  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
1346  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
1347 
1348  // Ensure device is initialized
1349  if ( !Kokkos::is_initialized() )
1350  Kokkos::initialize();
1351 
1352  // Build banded matrix
1353  GlobalOrdinal nrow = 10;
1354  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
1355  RCP<const Tpetra_Map> map =
1356  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
1357  nrow, comm);
1358  RCP<Tpetra_CrsGraph> graph =
1359  rcp(new Tpetra_CrsGraph(map, size_t(2)));
1360  Array<GlobalOrdinal> columnIndices(2);
1361  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
1362  const size_t num_my_row = myGIDs.size();
1363  for (size_t i=0; i<num_my_row; ++i) {
1364  const GlobalOrdinal row = myGIDs[i];
1365  columnIndices[0] = row;
1366  size_t ncol = 1;
1367  if (row != nrow-1) {
1368  columnIndices[1] = row+1;
1369  ncol = 2;
1370  }
1371  graph->insertGlobalIndices(row, columnIndices(0,ncol));
1372  }
1373  graph->fillComplete();
1374  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
1375 
1376  // Set values in matrix
1377  Array<Scalar> vals(2);
1378  Scalar val(VectorSize, BaseScalar(0.0));
1379  for (size_t i=0; i<num_my_row; ++i) {
1380  const GlobalOrdinal row = myGIDs[i];
1381  columnIndices[0] = row;
1382  for (LocalOrdinal j=0; j<VectorSize; ++j)
1383  val.fastAccessCoeff(j) = j+1;
1384  vals[0] = val;
1385  size_t ncol = 1;
1386 
1387  if (row != nrow-1) {
1388  columnIndices[1] = row+1;
1389  for (LocalOrdinal j=0; j<VectorSize; ++j)
1390  val.fastAccessCoeff(j) = j+1;
1391  vals[1] = val;
1392  ncol = 2;
1393  }
1394  matrix->replaceGlobalValues(row, columnIndices(0,ncol), vals(0,ncol));
1395  }
1396  matrix->fillComplete();
1397 
1398  // Fill RHS vector
1399  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
1400  {
1401  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
1402  for (size_t i=0; i<num_my_row; ++i) {
1403  b_view(i,0) = Scalar(1.0);
1404  }
1405  }
1406 
1407  // Solve
1409 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
1410  typedef BaseScalar BelosScalar;
1411 #else
1412  typedef Scalar BelosScalar;
1413 #endif
1414  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
1415  typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP;
1416  typedef Belos::LinearProblem<BelosScalar,MV,OP> BLinProb;
1417  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
1418  RCP< BLinProb > problem = rcp(new BLinProb(matrix, x, b));
1419  RCP<ParameterList> belosParams = rcp(new ParameterList);
1420  typename ST::magnitudeType tol = 1e-9;
1421  belosParams->set("Flexible Gmres", false);
1422  belosParams->set("Num Blocks", 100);
1423  belosParams->set("Convergence Tolerance", BelosScalar(tol));
1424  belosParams->set("Maximum Iterations", 100);
1425  belosParams->set("Verbosity", 33);
1426  belosParams->set("Output Style", 1);
1427  belosParams->set("Output Frequency", 1);
1428  belosParams->set("Output Stream", out.getOStream());
1429  RCP<Belos::SolverManager<BelosScalar,MV,OP> > solver =
1430  rcp(new Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP>(problem, belosParams));
1431  problem->setProblem();
1432  Belos::ReturnType ret = solver->solve();
1433  TEST_EQUALITY_CONST( ret, Belos::Converged );
1434 
1435  // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
1436  // Teuchos::VERB_EXTREME);
1437 
1438  // Check -- Correct answer is:
1439  // [ 0, 0, ..., 0 ]
1440  // [ 1, 1/2, ..., 1/VectorSize ]
1441  // [ 0, 0, ..., 0 ]
1442  // [ 1, 1/2, ..., 1/VectorSize ]
1443  // ....
1444  tol = 1000*tol;
1445  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
1446  for (size_t i=0; i<num_my_row; ++i) {
1447  const GlobalOrdinal row = myGIDs[i];
1448  if (row % 2) {
1449  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1450  val.fastAccessCoeff(j) = BaseScalar(1.0) / BaseScalar(j+1);
1451  }
1452  }
1453  else
1454  val = Scalar(VectorSize, BaseScalar(0.0));
1455  TEST_EQUALITY( x_view(i,0).size(), VectorSize );
1456 
1457  // Set small values to zero
1458  Scalar v = x_view(i,0);
1459  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1460  if (ST::magnitude(v.coeff(j)) < tol)
1461  v.fastAccessCoeff(j) = BaseScalar(0.0);
1462  }
1463 
1464  for (LocalOrdinal j=0; j<VectorSize; ++j)
1465  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol);
1466  }
1467 }
1468 
1469 //
1470 // Test Belos GMRES solve for a simple lower-triangular matrix with lucky breakdown with DGKS orthogonalization
1471 //
1473  Tpetra_CrsMatrix_MP, BelosGMRES_DGKS, Storage, LocalOrdinal, GlobalOrdinal, Node )
1474 {
1475  using Teuchos::RCP;
1476  using Teuchos::rcp;
1477  using Teuchos::tuple;
1478  using Teuchos::ArrayView;
1479  using Teuchos::Array;
1480  using Teuchos::ArrayRCP;
1481  using Teuchos::ParameterList;
1482 
1483  typedef typename Storage::value_type BaseScalar;
1485 
1486  typedef Teuchos::Comm<int> Tpetra_Comm;
1487  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
1488  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
1489  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
1490  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
1491 
1492  // Ensure device is initialized
1493  if ( !Kokkos::is_initialized() )
1494  Kokkos::initialize();
1495 
1496  // Build diagonal matrix
1497  GlobalOrdinal nrow = 20;
1498  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
1499  RCP<const Tpetra_Map> map =
1500  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
1501  nrow, comm);
1502  RCP<Tpetra_CrsGraph> graph =
1503  rcp(new Tpetra_CrsGraph(map, size_t(1)));
1504  Array<GlobalOrdinal> columnIndices(1);
1505  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
1506  const size_t num_my_row = myGIDs.size();
1507  for (size_t i=0; i<num_my_row; ++i) {
1508  const GlobalOrdinal row = myGIDs[i];
1509  columnIndices[0] = row;
1510  size_t ncol = 1;
1511  graph->insertGlobalIndices(row, columnIndices(0,ncol));
1512  }
1513  graph->fillComplete();
1514  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
1515 
1516  Array<Scalar> vals(1);
1517  for (size_t i=0; i<num_my_row; ++i) {
1518  const GlobalOrdinal row = myGIDs[i];
1519  columnIndices[0] = row;
1520  vals[0] = Scalar(row+1);
1521  matrix->replaceGlobalValues(row, columnIndices(0,1), vals(0,1));
1522  }
1523  matrix->fillComplete();
1524 
1525  // Fill RHS vector:
1526  // [ 0, 0, ..., 0, 0, 1]
1527  // [ 0, 0, ..., 0, 2, 2]
1528  // [ 0, 0, ..., 3, 3, 3]
1529  // [ 0, 0, ..., 4, 4, 4]
1530  // ...
1531 
1532  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
1533  {
1534  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
1535  for (size_t i=0; i<num_my_row; ++i) {
1536  const GlobalOrdinal row = myGIDs[i];
1537  b_view(i,0) = Scalar(0.0);
1538  for (LocalOrdinal j=0; j<VectorSize; ++j)
1539  if (int(j+2+row-VectorSize) > 0)
1540  b_view(i,0).fastAccessCoeff(j) = BaseScalar(row+1);
1541  }
1542  }
1543 
1544  // Solve
1546 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
1547  typedef BaseScalar BelosScalar;
1548 #else
1549  typedef Scalar BelosScalar;
1550 #endif
1551  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
1552  typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP;
1553  typedef Belos::LinearProblem<BelosScalar,MV,OP> BLinProb;
1554  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
1555  RCP< BLinProb > problem = rcp(new BLinProb(matrix, x, b));
1556  RCP<ParameterList> belosParams = rcp(new ParameterList);
1557  typename ST::magnitudeType tol = 1e-9;
1558  belosParams->set("Flexible Gmres", false);
1559  belosParams->set("Num Blocks", 100);
1560  belosParams->set("Convergence Tolerance", BelosScalar(tol));
1561  belosParams->set("Maximum Iterations", 100);
1562  belosParams->set("Verbosity", 33);
1563  belosParams->set("Output Style", 1);
1564  belosParams->set("Output Frequency", 1);
1565  belosParams->set("Output Stream", out.getOStream());
1566  belosParams->set("Orthogonalization","DGKS");
1567  RCP<Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP> > solver =
1568  rcp(new Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP>(problem, belosParams));
1569  problem->setProblem();
1570  Belos::ReturnType ret = solver->solve();
1571  TEST_EQUALITY_CONST( ret, Belos::Converged );
1572 
1573 #ifndef HAVE_STOKHOS_ENSEMBLE_REDUCT
1574  int numItersExpected = nrow;
1575  int numIters = solver->getNumIters();
1576  out << "numIters = " << numIters << std::endl;
1577  TEST_EQUALITY( numIters, numItersExpected);
1578 
1579  // Get and print number of ensemble iterations
1580  std::vector<int> ensemble_iterations =
1581  static_cast<const Belos::StatusTestImpResNorm<BelosScalar, MV, OP> *>(solver->getResidualStatusTest())->getEnsembleIterations();
1582  out << "Ensemble iterations = ";
1583  for (auto ensemble_iteration : ensemble_iterations)
1584  out << ensemble_iteration << " ";
1585  out << std::endl;
1586 
1587  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1588  if (int(j+1+nrow-VectorSize) > 0) {
1589  TEST_EQUALITY(int(j+1+nrow-VectorSize), ensemble_iterations[j]);
1590  }
1591  else {
1592  TEST_EQUALITY(int(0), ensemble_iterations[j]);
1593  }
1594  }
1595 #endif
1596 
1597  // Check -- Correct answer is:
1598  // [ 0, 0, ..., 0, 0, 1]
1599  // [ 0, 0, ..., 0, 1, 1]
1600  // [ 0, 0, ..., 1, 1, 1]
1601  // [ 0, 0, ..., 1, 1, 1]
1602  // ...
1603  tol = 1000*tol;
1604  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
1605  for (size_t i=0; i<num_my_row; ++i) {
1606  const GlobalOrdinal row = myGIDs[i];
1607  Scalar v = x_view(i,0);
1608 
1609  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1610  if (ST::magnitude(v.coeff(j)) < tol)
1611  v.fastAccessCoeff(j) = BaseScalar(0.0);
1612  }
1613 
1614  Scalar val = Scalar(0.0);
1615 
1616  for (LocalOrdinal j=0; j<VectorSize; ++j)
1617  if (j+2+row-VectorSize > 0)
1618  val.fastAccessCoeff(j) = BaseScalar(1.0);
1619 
1620  for (LocalOrdinal j=0; j<VectorSize; ++j)
1621  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol);
1622  }
1623 }
1624 
1625 //
1626 // Test Belos GMRES solve for a simple lower-triangular matrix with lucky breakdown with ICGS orthogonalization
1627 //
1629  Tpetra_CrsMatrix_MP, BelosGMRES_ICGS, Storage, LocalOrdinal, GlobalOrdinal, Node )
1630 {
1631  using Teuchos::RCP;
1632  using Teuchos::rcp;
1633  using Teuchos::tuple;
1634  using Teuchos::ArrayView;
1635  using Teuchos::Array;
1636  using Teuchos::ArrayRCP;
1637  using Teuchos::ParameterList;
1638 
1639  typedef typename Storage::value_type BaseScalar;
1641 
1642  typedef Teuchos::Comm<int> Tpetra_Comm;
1643  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
1644  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
1645  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
1646  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
1647 
1648  // Ensure device is initialized
1649  if ( !Kokkos::is_initialized() )
1650  Kokkos::initialize();
1651 
1652  // Build diagonal matrix
1653  GlobalOrdinal nrow = 20;
1654  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
1655  RCP<const Tpetra_Map> map =
1656  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
1657  nrow, comm);
1658  RCP<Tpetra_CrsGraph> graph =
1659  rcp(new Tpetra_CrsGraph(map, size_t(1)));
1660  Array<GlobalOrdinal> columnIndices(1);
1661  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
1662  const size_t num_my_row = myGIDs.size();
1663  for (size_t i=0; i<num_my_row; ++i) {
1664  const GlobalOrdinal row = myGIDs[i];
1665  columnIndices[0] = row;
1666  size_t ncol = 1;
1667  graph->insertGlobalIndices(row, columnIndices(0,ncol));
1668  }
1669  graph->fillComplete();
1670  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
1671 
1672  Array<Scalar> vals(1);
1673  for (size_t i=0; i<num_my_row; ++i) {
1674  const GlobalOrdinal row = myGIDs[i];
1675  columnIndices[0] = row;
1676  vals[0] = Scalar(row+1);
1677  matrix->replaceGlobalValues(row, columnIndices(0,1), vals(0,1));
1678  }
1679  matrix->fillComplete();
1680 
1681  // Fill RHS vector:
1682  // [ 0, 0, ..., 0, 0, 1]
1683  // [ 0, 0, ..., 0, 2, 2]
1684  // [ 0, 0, ..., 3, 3, 3]
1685  // [ 0, 0, ..., 4, 4, 4]
1686  // ...
1687 
1688  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
1689  {
1690  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
1691  for (size_t i=0; i<num_my_row; ++i) {
1692  const GlobalOrdinal row = myGIDs[i];
1693  b_view(i,0) = Scalar(0.0);
1694  for (LocalOrdinal j=0; j<VectorSize; ++j)
1695  if (int(j+2+row-VectorSize) > 0)
1696  b_view(i,0).fastAccessCoeff(j) = BaseScalar(row+1);
1697  }
1698  }
1699 
1700  // Solve
1702 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
1703  typedef BaseScalar BelosScalar;
1704 #else
1705  typedef Scalar BelosScalar;
1706 #endif
1707  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
1708  typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP;
1709  typedef Belos::LinearProblem<BelosScalar,MV,OP> BLinProb;
1710  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
1711  RCP< BLinProb > problem = rcp(new BLinProb(matrix, x, b));
1712  RCP<ParameterList> belosParams = rcp(new ParameterList);
1713  typename ST::magnitudeType tol = 1e-9;
1714  belosParams->set("Flexible Gmres", false);
1715  belosParams->set("Num Blocks", 100);
1716  belosParams->set("Convergence Tolerance", BelosScalar(tol));
1717  belosParams->set("Maximum Iterations", 100);
1718  belosParams->set("Verbosity", 33);
1719  belosParams->set("Output Style", 1);
1720  belosParams->set("Output Frequency", 1);
1721  belosParams->set("Output Stream", out.getOStream());
1722  belosParams->set("Orthogonalization","ICGS");
1723  RCP<Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP> > solver =
1724  rcp(new Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP>(problem, belosParams));
1725  problem->setProblem();
1726  Belos::ReturnType ret = solver->solve();
1727  TEST_EQUALITY_CONST( ret, Belos::Converged );
1728 
1729 #ifndef HAVE_STOKHOS_ENSEMBLE_REDUCT
1730  int numItersExpected = nrow;
1731  int numIters = solver->getNumIters();
1732  out << "numIters = " << numIters << std::endl;
1733  TEST_EQUALITY( numIters, numItersExpected);
1734 
1735  // Get and print number of ensemble iterations
1736  std::vector<int> ensemble_iterations =
1737  static_cast<const Belos::StatusTestImpResNorm<BelosScalar, MV, OP> *>(solver->getResidualStatusTest())->getEnsembleIterations();
1738  out << "Ensemble iterations = ";
1739  for (auto ensemble_iteration : ensemble_iterations)
1740  out << ensemble_iteration << " ";
1741  out << std::endl;
1742 
1743  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1744  if (int(j+1+nrow-VectorSize) > 0) {
1745  TEST_EQUALITY(int(j+1+nrow-VectorSize), ensemble_iterations[j]);
1746  }
1747  else {
1748  TEST_EQUALITY(int(0), ensemble_iterations[j]);
1749  }
1750  }
1751 #endif
1752 
1753  // Check -- Correct answer is:
1754  // [ 0, 0, ..., 0, 0, 1]
1755  // [ 0, 0, ..., 0, 1, 1]
1756  // [ 0, 0, ..., 1, 1, 1]
1757  // [ 0, 0, ..., 1, 1, 1]
1758  // ...
1759  tol = 1000*tol;
1760  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
1761  for (size_t i=0; i<num_my_row; ++i) {
1762  const GlobalOrdinal row = myGIDs[i];
1763  Scalar v = x_view(i,0);
1764 
1765  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1766  if (ST::magnitude(v.coeff(j)) < tol)
1767  v.fastAccessCoeff(j) = BaseScalar(0.0);
1768  }
1769 
1770  Scalar val = Scalar(0.0);
1771 
1772  for (LocalOrdinal j=0; j<VectorSize; ++j)
1773  if (j+2+row-VectorSize > 0)
1774  val.fastAccessCoeff(j) = BaseScalar(1.0);
1775 
1776  for (LocalOrdinal j=0; j<VectorSize; ++j)
1777  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol);
1778  }
1779 }
1780 
1781 //
1782 // Test Belos GMRES solve for a simple lower-triangular matrix with lucky breakdown with IMGS orthogonalization
1783 //
1785  Tpetra_CrsMatrix_MP, BelosGMRES_IMGS, Storage, LocalOrdinal, GlobalOrdinal, Node )
1786 {
1787  using Teuchos::RCP;
1788  using Teuchos::rcp;
1789  using Teuchos::tuple;
1790  using Teuchos::ArrayView;
1791  using Teuchos::Array;
1792  using Teuchos::ArrayRCP;
1793  using Teuchos::ParameterList;
1794 
1795  typedef typename Storage::value_type BaseScalar;
1797 
1798  typedef Teuchos::Comm<int> Tpetra_Comm;
1799  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
1800  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
1801  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
1802  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
1803 
1804  // Ensure device is initialized
1805  if ( !Kokkos::is_initialized() )
1806  Kokkos::initialize();
1807 
1808  // Build diagonal matrix
1809  GlobalOrdinal nrow = 20;
1810  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
1811  RCP<const Tpetra_Map> map =
1812  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
1813  nrow, comm);
1814  RCP<Tpetra_CrsGraph> graph =
1815  rcp(new Tpetra_CrsGraph(map, size_t(1)));
1816  Array<GlobalOrdinal> columnIndices(1);
1817  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
1818  const size_t num_my_row = myGIDs.size();
1819  for (size_t i=0; i<num_my_row; ++i) {
1820  const GlobalOrdinal row = myGIDs[i];
1821  columnIndices[0] = row;
1822  size_t ncol = 1;
1823  graph->insertGlobalIndices(row, columnIndices(0,ncol));
1824  }
1825  graph->fillComplete();
1826  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
1827 
1828  Array<Scalar> vals(1);
1829  for (size_t i=0; i<num_my_row; ++i) {
1830  const GlobalOrdinal row = myGIDs[i];
1831  columnIndices[0] = row;
1832  vals[0] = Scalar(row+1);
1833  matrix->replaceGlobalValues(row, columnIndices(0,1), vals(0,1));
1834  }
1835  matrix->fillComplete();
1836 
1837  // Fill RHS vector:
1838  // [ 0, 0, ..., 0, 0, 1]
1839  // [ 0, 0, ..., 0, 2, 2]
1840  // [ 0, 0, ..., 3, 3, 3]
1841  // [ 0, 0, ..., 4, 4, 4]
1842  // ...
1843 
1844  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
1845  {
1846  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
1847  for (size_t i=0; i<num_my_row; ++i) {
1848  const GlobalOrdinal row = myGIDs[i];
1849  b_view(i,0) = Scalar(0.0);
1850  for (LocalOrdinal j=0; j<VectorSize; ++j)
1851  if (int(j+2+row-VectorSize) > 0)
1852  b_view(i,0).fastAccessCoeff(j) = BaseScalar(row+1);
1853  }
1854  }
1855 
1856  // Solve
1858 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
1859  typedef BaseScalar BelosScalar;
1860 #else
1861  typedef Scalar BelosScalar;
1862 #endif
1863  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
1864  typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP;
1865  typedef Belos::LinearProblem<BelosScalar,MV,OP> BLinProb;
1866  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
1867  RCP< BLinProb > problem = rcp(new BLinProb(matrix, x, b));
1868  RCP<ParameterList> belosParams = rcp(new ParameterList);
1869  typename ST::magnitudeType tol = 1e-9;
1870  belosParams->set("Flexible Gmres", false);
1871  belosParams->set("Num Blocks", 100);
1872  belosParams->set("Convergence Tolerance", BelosScalar(tol));
1873  belosParams->set("Maximum Iterations", 100);
1874  belosParams->set("Verbosity", 33);
1875  belosParams->set("Output Style", 1);
1876  belosParams->set("Output Frequency", 1);
1877  belosParams->set("Output Stream", out.getOStream());
1878  belosParams->set("Orthogonalization","IMGS");
1879  RCP<Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP> > solver =
1880  rcp(new Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP>(problem, belosParams));
1881  problem->setProblem();
1882  Belos::ReturnType ret = solver->solve();
1883  TEST_EQUALITY_CONST( ret, Belos::Converged );
1884 
1885 #ifndef HAVE_STOKHOS_ENSEMBLE_REDUCT
1886  int numItersExpected = nrow;
1887  int numIters = solver->getNumIters();
1888  out << "numIters = " << numIters << std::endl;
1889  TEST_EQUALITY( numIters, numItersExpected);
1890 
1891  // Get and print number of ensemble iterations
1892  std::vector<int> ensemble_iterations =
1893  static_cast<const Belos::StatusTestImpResNorm<BelosScalar, MV, OP> *>(solver->getResidualStatusTest())->getEnsembleIterations();
1894  out << "Ensemble iterations = ";
1895  for (auto ensemble_iteration : ensemble_iterations)
1896  out << ensemble_iteration << " ";
1897  out << std::endl;
1898 
1899  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1900  if (int(j+1+nrow-VectorSize) > 0) {
1901  TEST_EQUALITY(int(j+1+nrow-VectorSize), ensemble_iterations[j]);
1902  }
1903  else {
1904  TEST_EQUALITY(int(0), ensemble_iterations[j]);
1905  }
1906  }
1907 #endif
1908 
1909  // Check -- Correct answer is:
1910  // [ 0, 0, ..., 0, 0, 1]
1911  // [ 0, 0, ..., 0, 1, 1]
1912  // [ 0, 0, ..., 1, 1, 1]
1913  // [ 0, 0, ..., 1, 1, 1]
1914  // ...
1915  tol = 1000*tol;
1916  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
1917  for (size_t i=0; i<num_my_row; ++i) {
1918  const GlobalOrdinal row = myGIDs[i];
1919  Scalar v = x_view(i,0);
1920 
1921  for (LocalOrdinal j=0; j<VectorSize; ++j) {
1922  if (ST::magnitude(v.coeff(j)) < tol)
1923  v.fastAccessCoeff(j) = BaseScalar(0.0);
1924  }
1925 
1926  Scalar val = Scalar(0.0);
1927 
1928  for (LocalOrdinal j=0; j<VectorSize; ++j)
1929  if (j+2+row-VectorSize > 0)
1930  val.fastAccessCoeff(j) = BaseScalar(1.0);
1931 
1932  for (LocalOrdinal j=0; j<VectorSize; ++j)
1933  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol);
1934  }
1935 }
1936 
1937 #else
1938 
1940  Tpetra_CrsMatrix_MP, BelosGMRES, Storage, LocalOrdinal, GlobalOrdinal, Node )
1941 {}
1942 
1944  Tpetra_CrsMatrix_MP, BelosGMRES_DGKS, Storage, LocalOrdinal, GlobalOrdinal, Node )
1945 {}
1946 
1948  Tpetra_CrsMatrix_MP, BelosGMRES_ICGS, Storage, LocalOrdinal, GlobalOrdinal, Node )
1949 {}
1950 
1952  Tpetra_CrsMatrix_MP, BelosGMRES_IMGS, Storage, LocalOrdinal, GlobalOrdinal, Node )
1953 {}
1954 
1955 #endif
1956 
1957 #if defined(HAVE_STOKHOS_BELOS) && defined(HAVE_STOKHOS_IFPACK2)
1958 
1959 //
1960 // Test Belos GMRES solve with Ifpack2 RILUK preconditioning for a
1961 // simple banded upper-triangular matrix
1962 //
1964  Tpetra_CrsMatrix_MP, BelosGMRES_RILUK, Storage, LocalOrdinal, GlobalOrdinal, Node )
1965 {
1966  using Teuchos::RCP;
1967  using Teuchos::rcp;
1968  using Teuchos::ArrayView;
1969  using Teuchos::Array;
1970  using Teuchos::ArrayRCP;
1971  using Teuchos::ParameterList;
1972 
1973  typedef typename Storage::value_type BaseScalar;
1975 
1976  typedef Teuchos::Comm<int> Tpetra_Comm;
1977  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
1978  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
1979  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
1980  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
1981 
1982  // Ensure device is initialized
1983  if ( !Kokkos::is_initialized() )
1984  Kokkos::initialize();
1985 
1986  // Build banded matrix
1987  GlobalOrdinal nrow = 10;
1988  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
1989  RCP<const Tpetra_Map> map =
1990  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
1991  nrow, comm);
1992  RCP<Tpetra_CrsGraph> graph =
1993  rcp(new Tpetra_CrsGraph(map, size_t(2)));
1994  Array<GlobalOrdinal> columnIndices(2);
1995  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
1996  const size_t num_my_row = myGIDs.size();
1997  for (size_t i=0; i<num_my_row; ++i) {
1998  const GlobalOrdinal row = myGIDs[i];
1999  columnIndices[0] = row;
2000  size_t ncol = 1;
2001  if (row != nrow-1) {
2002  columnIndices[1] = row+1;
2003  ncol = 2;
2004  }
2005  graph->insertGlobalIndices(row, columnIndices(0,ncol));
2006  }
2007  graph->fillComplete();
2008  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
2009 
2010  // Set values in matrix
2011  Array<Scalar> vals(2);
2012  Scalar val(VectorSize, BaseScalar(0.0));
2013  for (size_t i=0; i<num_my_row; ++i) {
2014  const GlobalOrdinal row = myGIDs[i];
2015  columnIndices[0] = row;
2016  for (LocalOrdinal j=0; j<VectorSize; ++j)
2017  val.fastAccessCoeff(j) = j+1;
2018  vals[0] = val;
2019  size_t ncol = 1;
2020 
2021  if (row != nrow-1) {
2022  columnIndices[1] = row+1;
2023  for (LocalOrdinal j=0; j<VectorSize; ++j)
2024  val.fastAccessCoeff(j) = j+1;
2025  vals[1] = val;
2026  ncol = 2;
2027  }
2028  matrix->replaceGlobalValues(row, columnIndices(0,ncol), vals(0,ncol));
2029  }
2030  matrix->fillComplete();
2031 
2032  // Fill RHS vector
2033  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
2034  {
2035  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
2036  for (size_t i=0; i<num_my_row; ++i) {
2037  b_view(i,0) = Scalar(1.0);
2038  }
2039  }
2040 
2041  // Create preconditioner
2042  typedef Ifpack2::Preconditioner<Scalar,LocalOrdinal,GlobalOrdinal,Node> Prec;
2043  Ifpack2::Factory factory;
2044  RCP<Prec> M = factory.create<Tpetra_CrsMatrix>("RILUK", matrix);
2045  M->initialize();
2046  M->compute();
2047 
2048  // Solve
2050 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
2051  typedef BaseScalar BelosScalar;
2052 #else
2053  typedef Scalar BelosScalar;
2054 #endif
2055  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
2056  typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP;
2057  typedef Belos::LinearProblem<BelosScalar,MV,OP> BLinProb;
2058  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
2059  RCP< BLinProb > problem = rcp(new BLinProb(matrix, x, b));
2060  problem->setRightPrec(M);
2061  problem->setProblem();
2062  RCP<ParameterList> belosParams = rcp(new ParameterList);
2063  typename ST::magnitudeType tol = 1e-9;
2064  belosParams->set("Flexible Gmres", false);
2065  belosParams->set("Num Blocks", 100);
2066  belosParams->set("Convergence Tolerance", BelosScalar(tol));
2067  belosParams->set("Maximum Iterations", 100);
2068  belosParams->set("Verbosity", 33);
2069  belosParams->set("Output Style", 1);
2070  belosParams->set("Output Frequency", 1);
2071  belosParams->set("Output Stream", out.getOStream());
2072  //belosParams->set("Orthogonalization", "TSQR");
2073  RCP<Belos::SolverManager<BelosScalar,MV,OP> > solver =
2074  rcp(new Belos::PseudoBlockGmresSolMgr<BelosScalar,MV,OP>(problem, belosParams));
2075  Belos::ReturnType ret = solver->solve();
2076  TEST_EQUALITY_CONST( ret, Belos::Converged );
2077 
2078  // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
2079  // Teuchos::VERB_EXTREME);
2080 
2081  // Check -- Correct answer is:
2082  // [ 0, 0, ..., 0 ]
2083  // [ 1, 1/2, ..., 1/VectorSize ]
2084  // [ 0, 0, ..., 0 ]
2085  // [ 1, 1/2, ..., 1/VectorSize ]
2086  // ....
2087  tol = 1000*tol;
2088  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
2089  for (size_t i=0; i<num_my_row; ++i) {
2090  const GlobalOrdinal row = myGIDs[i];
2091  if (row % 2) {
2092  for (LocalOrdinal j=0; j<VectorSize; ++j) {
2093  val.fastAccessCoeff(j) = BaseScalar(1.0) / BaseScalar(j+1);
2094  }
2095  }
2096  else
2097  val = Scalar(VectorSize, BaseScalar(0.0));
2098  TEST_EQUALITY( x_view(i,0).size(), VectorSize );
2099 
2100  // Set small values to zero
2101  Scalar v = x_view(i,0);
2102  for (LocalOrdinal j=0; j<VectorSize; ++j) {
2103  if (ST::magnitude(v.coeff(j)) < tol)
2104  v.fastAccessCoeff(j) = BaseScalar(0.0);
2105  }
2106 
2107  for (LocalOrdinal j=0; j<VectorSize; ++j)
2108  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol);
2109  }
2110 }
2111 
2112 #else
2113 
2115  Tpetra_CrsMatrix_MP, BelosGMRES_RILUK, Storage, LocalOrdinal, GlobalOrdinal, Node )
2116 {}
2117 
2118 #endif
2119 
2120 #if defined(HAVE_STOKHOS_BELOS) && defined(HAVE_STOKHOS_IFPACK2) && defined(HAVE_STOKHOS_MUELU) && defined(HAVE_STOKHOS_AMESOS2)
2121 
2122 //
2123 // Test Belos CG solve with MueLu preconditioning for a 1-D Laplacian matrix
2124 //
2126  Tpetra_CrsMatrix_MP, BelosCG_Muelu, Storage, LocalOrdinal, GlobalOrdinal, Node )
2127 {
2128  using Teuchos::RCP;
2129  using Teuchos::rcp;
2130  using Teuchos::ArrayView;
2131  using Teuchos::Array;
2132  using Teuchos::ArrayRCP;
2133  using Teuchos::ParameterList;
2134  using Teuchos::getParametersFromXmlFile;
2135 
2136  typedef typename Storage::value_type BaseScalar;
2138 
2139  typedef Teuchos::Comm<int> Tpetra_Comm;
2140  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map;
2141  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector;
2142  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix;
2143  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph;
2144 
2145  // Ensure device is initialized
2146  if ( !Kokkos::is_initialized() )
2147  Kokkos::initialize();
2148 
2149  // 1-D Laplacian matrix
2150  GlobalOrdinal nrow = 50;
2151  BaseScalar h = 1.0 / static_cast<BaseScalar>(nrow-1);
2152  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm();
2153  RCP<const Tpetra_Map> map =
2154  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(
2155  nrow, comm);
2156  RCP<Tpetra_CrsGraph> graph =
2157  rcp(new Tpetra_CrsGraph(map, size_t(3)));
2158  Array<GlobalOrdinal> columnIndices(3);
2159  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList();
2160  const size_t num_my_row = myGIDs.size();
2161  for (size_t i=0; i<num_my_row; ++i) {
2162  const GlobalOrdinal row = myGIDs[i];
2163  if (row == 0 || row == nrow-1) { // Boundary nodes
2164  columnIndices[0] = row;
2165  graph->insertGlobalIndices(row, columnIndices(0,1));
2166  }
2167  else { // Interior nodes
2168  columnIndices[0] = row-1;
2169  columnIndices[1] = row;
2170  columnIndices[2] = row+1;
2171  graph->insertGlobalIndices(row, columnIndices(0,3));
2172  }
2173  }
2174  graph->fillComplete();
2175  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph));
2176 
2177  // Set values in matrix
2178  Array<Scalar> vals(3);
2179  Scalar a_val(VectorSize, BaseScalar(0.0));
2180  for (LocalOrdinal j=0; j<VectorSize; ++j) {
2181  a_val.fastAccessCoeff(j) =
2182  BaseScalar(1.0) + BaseScalar(j) / BaseScalar(VectorSize);
2183  }
2184  for (size_t i=0; i<num_my_row; ++i) {
2185  const GlobalOrdinal row = myGIDs[i];
2186  if (row == 0 || row == nrow-1) { // Boundary nodes
2187  columnIndices[0] = row;
2188  vals[0] = Scalar(1.0);
2189  matrix->replaceGlobalValues(row, columnIndices(0,1), vals(0,1));
2190  }
2191  else {
2192  columnIndices[0] = row-1;
2193  columnIndices[1] = row;
2194  columnIndices[2] = row+1;
2195  vals[0] = Scalar(-1.0) * a_val;
2196  vals[1] = Scalar(2.0) * a_val;
2197  vals[2] = Scalar(-1.0) * a_val;
2198  matrix->replaceGlobalValues(row, columnIndices(0,3), vals(0,3));
2199  }
2200  }
2201  matrix->fillComplete();
2202 
2203  // Fill RHS vector
2204  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map);
2205  Scalar b_val;
2206  {
2207  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll);
2208  b_val = Scalar(VectorSize, BaseScalar(0.0));
2209  for (LocalOrdinal j=0; j<VectorSize; ++j) {
2210  b_val.fastAccessCoeff(j) =
2211  BaseScalar(-1.0) + BaseScalar(j) / BaseScalar(VectorSize);
2212  }
2213  for (size_t i=0; i<num_my_row; ++i) {
2214  const GlobalOrdinal row = myGIDs[i];
2215  if (row == 0 || row == nrow-1)
2216  b_view(i,0) = Scalar(0.0);
2217  else
2218  b_view(i,0) = -Scalar(b_val * h * h);
2219  }
2220  }
2221 
2222  // Create preconditioner
2223  typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> OP;
2224  RCP<ParameterList> muelu_params =
2225  getParametersFromXmlFile("muelu_cheby.xml");
2226  RCP<OP> matrix_op = matrix;
2227  RCP<OP> M =
2228  MueLu::CreateTpetraPreconditioner<Scalar,LocalOrdinal,GlobalOrdinal,Node>(matrix_op, *muelu_params);
2229 
2230  // Solve
2232 #ifdef HAVE_STOKHOS_ENSEMBLE_REDUCT
2233  typedef BaseScalar BelosScalar;
2234 #else
2235  typedef Scalar BelosScalar;
2236 #endif
2237  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
2238  typedef Belos::LinearProblem<BelosScalar,MV,OP> BLinProb;
2239  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map);
2240  RCP< BLinProb > problem = rcp(new BLinProb(matrix, x, b));
2241  problem->setRightPrec(M);
2242  problem->setProblem();
2243  RCP<ParameterList> belosParams = rcp(new ParameterList);
2244  typename ST::magnitudeType tol = 1e-9;
2245  belosParams->set("Num Blocks", 100);
2246  belosParams->set("Convergence Tolerance", BelosScalar(tol));
2247  belosParams->set("Maximum Iterations", 100);
2248  belosParams->set("Verbosity", 33);
2249  belosParams->set("Output Style", 1);
2250  belosParams->set("Output Frequency", 1);
2251  belosParams->set("Output Stream", out.getOStream());
2252  // Turn off residual scaling so we can see some variation in the number
2253  // of iterations across the ensemble when not doing ensemble reductions
2254  belosParams->set("Implicit Residual Scaling", "None");
2255 
2256  RCP<Belos::PseudoBlockCGSolMgr<BelosScalar,MV,OP,true> > solver =
2257  rcp(new Belos::PseudoBlockCGSolMgr<BelosScalar,MV,OP,true>(problem, belosParams));
2258  Belos::ReturnType ret = solver->solve();
2259  TEST_EQUALITY_CONST( ret, Belos::Converged );
2260 
2261 #ifndef HAVE_STOKHOS_ENSEMBLE_REDUCT
2262  // Get and print number of ensemble iterations
2263  std::vector<int> ensemble_iterations =
2264  solver->getResidualStatusTest()->getEnsembleIterations();
2265  out << "Ensemble iterations = ";
2266  for (int i=0; i<VectorSize; ++i)
2267  out << ensemble_iterations[i] << " ";
2268  out << std::endl;
2269 #endif
2270 
2271  // x->describe(*(Teuchos::fancyOStream(rcp(&std::cout,false))),
2272  // Teuchos::VERB_EXTREME);
2273 
2274  // Check -- For a*y'' = b, correct answer is y = 0.5 *(b/a) * x * (x-1)
2275  tol = 1000*tol;
2276  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly);
2277  Scalar val(VectorSize, BaseScalar(0.0));
2278  for (size_t i=0; i<num_my_row; ++i) {
2279  const GlobalOrdinal row = myGIDs[i];
2280  BaseScalar xx = row * h;
2281  for (LocalOrdinal j=0; j<VectorSize; ++j) {
2282  val.fastAccessCoeff(j) =
2283  BaseScalar(0.5) * (b_val.coeff(j)/a_val.coeff(j)) * xx * (xx - BaseScalar(1.0));
2284  }
2285  TEST_EQUALITY( x_view(i,0).size(), VectorSize );
2286 
2287  // Set small values to zero
2288  Scalar v = x_view(i,0);
2289  for (LocalOrdinal j=0; j<VectorSize; ++j) {
2290  if (ST::magnitude(v.coeff(j)) < tol)
2291  v.fastAccessCoeff(j) = BaseScalar(0.0);
2292  if (ST::magnitude(val.coeff(j)) < tol)
2293  val.fastAccessCoeff(j) = BaseScalar(0.0);
2294  }
2295 
2296  for (LocalOrdinal j=0; j<VectorSize; ++j)
2297  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol);
2298  }
2299 
2300 }
2301 
2302 #else
2303 
2305  Tpetra_CrsMatrix_MP, BelosCG_Muelu, Storage, LocalOrdinal, GlobalOrdinal, Node )
2306 {}
2307 
2308 #endif
2309 
2310 #if defined(HAVE_STOKHOS_AMESOS2)
2311 
2312 //
2313 // Test Amesos2 solve for a 1-D Laplacian matrix
2314 //
2315 #define TEST_AMESOS2_SOLVER(SolverName) \
2316  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_##SolverName, Storage, LocalOrdinal, GlobalOrdinal, Node) \
2317 { \
2318  using Teuchos::RCP; \
2319  using Teuchos::rcp; \
2320  using Teuchos::ArrayView; \
2321  using Teuchos::Array; \
2322  using Teuchos::ArrayRCP; \
2323  using Teuchos::ParameterList; \
2324  \
2325  typedef typename Storage::value_type BaseScalar; \
2326  typedef Sacado::MP::Vector<Storage> Scalar; \
2327  \
2328  typedef Teuchos::Comm<int> Tpetra_Comm; \
2329  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> Tpetra_Map; \
2330  typedef Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_Vector; \
2331  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_MultiVector; \
2332  typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsMatrix; \
2333  typedef Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node> Tpetra_CrsGraph; \
2334  \
2335  /* Ensure device is initialized */ \
2336  if ( !Kokkos::is_initialized() ) \
2337  Kokkos::initialize(); \
2338  \
2339  /* 1-D Laplacian matrix */ \
2340  GlobalOrdinal nrow = 50; \
2341  BaseScalar h = 1.0 / static_cast<BaseScalar>(nrow-1); \
2342  RCP<const Tpetra_Comm> comm = Tpetra::getDefaultComm(); \
2343  RCP<const Tpetra_Map> map = \
2344  Tpetra::createUniformContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>( \
2345  nrow, comm); \
2346  RCP<Tpetra_CrsGraph> graph = Tpetra::createCrsGraph(map, size_t(3)); \
2347  Array<GlobalOrdinal> columnIndices(3); \
2348  ArrayView<const GlobalOrdinal> myGIDs = map->getLocalElementList(); \
2349  const size_t num_my_row = myGIDs.size(); \
2350  for (size_t i=0; i<num_my_row; ++i) { \
2351  const GlobalOrdinal row = myGIDs[i]; \
2352  if (row == 0 || row == nrow-1) { /* Boundary nodes */ \
2353  columnIndices[0] = row; \
2354  graph->insertGlobalIndices(row, columnIndices(0,1)); \
2355  } \
2356  else { /* Interior nodes */ \
2357  columnIndices[0] = row-1; \
2358  columnIndices[1] = row; \
2359  columnIndices[2] = row+1; \
2360  graph->insertGlobalIndices(row, columnIndices(0,3)); \
2361  } \
2362  } \
2363  graph->fillComplete(); \
2364  RCP<Tpetra_CrsMatrix> matrix = rcp(new Tpetra_CrsMatrix(graph)); \
2365  \
2366  /* Set values in matrix */ \
2367  Array<Scalar> vals(3); \
2368  Scalar a_val(VectorSize, BaseScalar(0.0)); \
2369  for (LocalOrdinal j=0; j<VectorSize; ++j) { \
2370  a_val.fastAccessCoeff(j) = \
2371  BaseScalar(1.0) + BaseScalar(j) / BaseScalar(VectorSize); \
2372  } \
2373  for (size_t i=0; i<num_my_row; ++i) { \
2374  const GlobalOrdinal row = myGIDs[i]; \
2375  if (row == 0 || row == nrow-1) { /* Boundary nodes */ \
2376  columnIndices[0] = row; \
2377  vals[0] = Scalar(1.0); \
2378  matrix->replaceGlobalValues(row, columnIndices(0,1), vals(0,1)); \
2379  } \
2380  else { \
2381  columnIndices[0] = row-1; \
2382  columnIndices[1] = row; \
2383  columnIndices[2] = row+1; \
2384  vals[0] = Scalar(-1.0) * a_val; \
2385  vals[1] = Scalar(2.0) * a_val; \
2386  vals[2] = Scalar(-1.0) * a_val; \
2387  matrix->replaceGlobalValues(row, columnIndices(0,3), vals(0,3)); \
2388  } \
2389  } \
2390  matrix->fillComplete();\
2391  \
2392  /* Fill RHS vector */ \
2393  RCP<Tpetra_Vector> b = Tpetra::createVector<Scalar>(map); \
2394  Scalar b_val; \
2395  { \
2396  auto b_view = b->getLocalViewHost(Tpetra::Access::OverwriteAll); \
2397  b_val = Scalar(VectorSize, BaseScalar(0.0)); \
2398  for (LocalOrdinal j=0; j<VectorSize; ++j) { \
2399  b_val.fastAccessCoeff(j) = \
2400  BaseScalar(-1.0) + BaseScalar(j) / BaseScalar(VectorSize); \
2401  } \
2402  for (size_t i=0; i<num_my_row; ++i) { \
2403  const GlobalOrdinal row = myGIDs[i]; \
2404  if (row == 0 || row == nrow-1) \
2405  b_view(i,0) = Scalar(0.0); \
2406  else \
2407  b_view(i,0) = -Scalar(b_val * h * h); \
2408  } \
2409  } \
2410  \
2411  /* Solve */ \
2412  typedef Amesos2::Solver<Tpetra_CrsMatrix,Tpetra_MultiVector> Solver; \
2413  RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(map); \
2414  std::string solver_name(#SolverName); \
2415  out << "Solving linear system with " << solver_name << std::endl; \
2416  RCP<Solver> solver = Amesos2::create<Tpetra_CrsMatrix,Tpetra_MultiVector>( \
2417  solver_name, matrix, x, b); \
2418  solver->solve(); \
2419  \
2420  /* Check -- For a*y'' = b, correct answer is y = 0.5 *(b/a) * x * (x-1) */ \
2421  solver = Teuchos::null; /* Delete solver to eliminate live device views of x */ \
2422  typedef Teuchos::ScalarTraits<BaseScalar> ST; \
2423  typename ST::magnitudeType tol = 1e-9; \
2424  auto x_view = x->getLocalViewHost(Tpetra::Access::ReadOnly); \
2425  Scalar val(VectorSize, BaseScalar(0.0)); \
2426  for (size_t i=0; i<num_my_row; ++i) { \
2427  const GlobalOrdinal row = myGIDs[i]; \
2428  BaseScalar xx = row * h; \
2429  for (LocalOrdinal j=0; j<VectorSize; ++j) { \
2430  val.fastAccessCoeff(j) = \
2431  BaseScalar(0.5) * (b_val.coeff(j)/a_val.coeff(j)) * xx * (xx - BaseScalar(1.0)); \
2432  } \
2433  TEST_EQUALITY( x_view(i,0).size(), VectorSize ); \
2434  \
2435  /* Set small values to zero */ \
2436  Scalar v = x_view(i,0); \
2437  for (LocalOrdinal j=0; j<VectorSize; ++j) { \
2438  if (ST::magnitude(v.coeff(j)) < tol) \
2439  v.fastAccessCoeff(j) = BaseScalar(0.0); \
2440  if (ST::magnitude(val.coeff(j)) < tol) \
2441  val.fastAccessCoeff(j) = BaseScalar(0.0); \
2442  } \
2443  \
2444  for (LocalOrdinal j=0; j<VectorSize; ++j) \
2445  TEST_FLOATING_EQUALITY(v.coeff(j), val.coeff(j), tol); \
2446  } \
2447 }
2448 
2449 #endif // defined(HAVE_STOKHOS_AMESOS2)
2450 
2451 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_BASKER)
2452  TEST_AMESOS2_SOLVER(basker)
2453 #else
2454  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_basker, Storage, LocalOrdinal, GlobalOrdinal, Node) {}
2455 #endif
2456 
2457 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_KLU2)
2458  TEST_AMESOS2_SOLVER(klu2)
2459 #else
2461 #endif
2462 
2463 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_SUPERLUDIST)
2464  TEST_AMESOS2_SOLVER(superlu_dist)
2465 #else
2466  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_superlu_dist, Storage, LocalOrdinal, GlobalOrdinal, Node) {}
2467 #endif
2468 
2469 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_SUPERLUMT)
2470  TEST_AMESOS2_SOLVER(superlu_mt)
2471 #else
2472  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_superlu_mt, Storage, LocalOrdinal, GlobalOrdinal, Node) {}
2473 #endif
2474 
2475 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_SUPERLU)
2476  TEST_AMESOS2_SOLVER(superlu)
2477 #else
2478  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_superlu, Storage, LocalOrdinal, GlobalOrdinal, Node) {}
2479 #endif
2480 
2481 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_PARDISO_MKL)
2482  TEST_AMESOS2_SOLVER(pardisomkl)
2483 #else
2484  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_pardisomkl, Storage, LocalOrdinal, GlobalOrdinal, Node) {}
2485 #endif
2486 
2487 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_LAPACK)
2488  TEST_AMESOS2_SOLVER(lapack)
2489 #else
2490  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_lapack, Storage, LocalOrdinal, GlobalOrdinal, Node) {}
2491 #endif
2492 
2493 #if defined(HAVE_STOKHOS_AMESOS2) && defined(HAVE_AMESOS2_CHOLMOD) && defined (HAVE_AMESOS2_EXPERIMENTAL)
2494  TEST_AMESOS2_SOLVER(cholmod)
2495 #else
2496  TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, Amesos2_cholmod, Storage, LocalOrdinal, GlobalOrdinal, Node) {}
2497 #endif
2498 
2499 #define CRSMATRIX_MP_VECTOR_TESTS_SLGN(S, LO, GO, N) \
2500  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, VectorAdd, S, LO, GO, N ) \
2501  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, VectorDot, S, LO, GO, N ) \
2502  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, MultiVectorAdd, S, LO, GO, N ) \
2503  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, MultiVectorDot, S, LO, GO, N ) \
2504  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, MultiVectorDotSub, S, LO, GO, N ) \
2505  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, MatrixVectorMultiply, S, LO, GO, N ) \
2506  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, MatrixMultiVectorMultiply, S, LO, GO, N ) \
2507  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, WrappedDualView, S, LO, GO, N ) \
2508  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Flatten, S, LO, GO, N ) \
2509  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, SimpleCG, S, LO, GO, N ) \
2510  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, SimplePCG_Muelu, S, LO, GO, N ) \
2511  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, BelosGMRES, S, LO, GO, N ) \
2512  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, BelosGMRES_DGKS, S, LO, GO, N ) \
2513  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, BelosGMRES_ICGS, S, LO, GO, N ) \
2514  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, BelosGMRES_IMGS, S, LO, GO, N ) \
2515  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, BelosGMRES_RILUK, S, LO, GO, N ) \
2516  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, BelosCG_Muelu, S, LO, GO, N ) \
2517  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_basker, S, LO, GO, N ) \
2518  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_klu2, S, LO, GO, N ) \
2519  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_superlu_dist, S, LO, GO, N ) \
2520  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_superlu_mt, S, LO, GO, N ) \
2521  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_superlu, S, LO, GO, N ) \
2522  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_pardisomkl, S, LO, GO, N ) \
2523  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_lapack, S, LO, GO, N ) \
2524  TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Tpetra_CrsMatrix_MP, Amesos2_cholmod, S, LO, GO, N )
2525 
2526 #define CRSMATRIX_MP_VECTOR_TESTS_N_SFS(N) \
2527  typedef Stokhos::DeviceForNode<N>::type Device; \
2528  typedef Stokhos::StaticFixedStorage<int,double,VectorSize,Device::execution_space> SFS; \
2529  using default_global_ordinal_type = ::Tpetra::Map<>::global_ordinal_type; \
2530  using default_local_ordinal_type = ::Tpetra::Map<>::local_ordinal_type; \
2531  CRSMATRIX_MP_VECTOR_TESTS_SLGN(SFS, default_local_ordinal_type, default_global_ordinal_type, N)
2532 
2533 #define CRSMATRIX_MP_VECTOR_TESTS_N(N) \
2534  CRSMATRIX_MP_VECTOR_TESTS_N_SFS(N)
2535 
2536 // Disabling testing of dynamic storage -- we don't really need it
2537  // typedef Stokhos::DynamicStorage<int,double,Device> DS;
2538  // using default_global_ordinal_type = ::Tpetra::Map<>::global_ordinal_type;
2539  // using default_local_ordinal_type = ::Tpetra::Map<>::local_ordinal_type;
2540  // CRSMATRIX_MP_VECTOR_TESTS_SLGN(DS, default_global_ordinal_type, default_local_ordinal_type, N)
Teuchos::RCP< Tpetra::CrsMatrix< typename Storage::value_type, LocalOrdinal, GlobalOrdinal, Node > > create_flat_matrix(const Tpetra::CrsMatrix< Sacado::UQ::PCE< Storage >, LocalOrdinal, GlobalOrdinal, Node > &mat, const Teuchos::RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &flat_graph, const Teuchos::RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &cijk_graph, const CijkType &cijk_dev)
bool CG_Solve(const Matrix &A, Vector &x, const Vector &b, typename Vector::mag_type tol, Ordinal max_its, std::ostream *out=0)
scalar generate_multi_vector_coefficient(const ordinal nFEM, const ordinal nVec, const ordinal nStoch, const ordinal iColFEM, const ordinal iVec, const ordinal iStoch)
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Tpetra_CrsMatrix_MP, VectorAdd, Storage, LocalOrdinal, GlobalOrdinal, Node)
Convergence test using the implicit residual norm(s), with an explicit residual norm(s) check for los...
#define TEST_EQUALITY_CONST(v1, v2)
Teuchos::RCP< const Tpetra::MultiVector< typename Storage::value_type, LocalOrdinal, GlobalOrdinal, Node > > create_flat_vector_view(const Tpetra::MultiVector< Sacado::UQ::PCE< Storage >, LocalOrdinal, GlobalOrdinal, Node > &vec, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &flat_map)
scalar generate_vector_coefficient(const ordinal nFEM, const ordinal nStoch, const ordinal iColFEM, const ordinal iStoch)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP...> >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP...> >::value, typename Kokkos::Details::InnerProductSpaceTraits< typename Kokkos::View< XD, XP...>::non_const_value_type >::dot_type >::type dot(const Kokkos::View< XD, XP...> &x, const Kokkos::View< YD, YP...> &y)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
bool PCG_Solve(const Matrix &A, Vector &x, const Vector &b, const Prec &M, typename Vector::mag_type tol, Ordinal max_its, std::ostream *out=0)
Teuchos::RCP< Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > create_flat_mp_graph(const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &flat_domain_map, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &flat_range_map, const LocalOrdinal block_size)
KOKKOS_INLINE_FUNCTION PCE< Storage > abs(const PCE< Storage > &a)
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j)-expr2.val(j)
#define TEST_FLOATING_EQUALITY(v1, v2, tol)
expr val()
Tpetra::KokkosClassic::DefaultNode::DefaultNodeType Node
BelosGMRES
#define TEST_EQUALITY(v1, v2)