Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Fad_CommTests.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
30 #include "Teuchos_CommHelpers.hpp"
31 #include "Teuchos_DefaultComm.hpp"
32 #include "Teuchos_Array.hpp"
33 #include "Teuchos_Comm.hpp"
34 
35 #include "Sacado_mpl_apply.hpp"
36 #include "Sacado_Random.hpp"
37 
38 using Teuchos::RCP;
39 using Teuchos::rcp;
41 
42 template <typename ArrayType>
43 bool checkFadArrays(const ArrayType& x,
44  const ArrayType& x2,
45  const std::string& tag,
46  Teuchos::FancyOStream& out) {
47  typedef typename ArrayType::value_type FadType;
48 
49  // Check sizes match
50  bool success = (x.size() == x2.size());
51  out << tag << " Fad array size test";
52  if (success)
53  out << " passed";
54  else
55  out << " failed";
56  out << ": \n\tExpected: " << x.size() << ", \n\tGot: " << x2.size()
57  << "." << std::endl;
58 
59  // Check Fads match
60  const int sz = x.size();
61  for (int i=0; i<sz; i++) {
62  bool success2 = Sacado::IsEqual<FadType>::eval(x[i], x2[i]);
63  out << tag << " Fad array comparison test " << i;
64  if (success2)
65  out << " passed";
66  else
67  out << " failed";
68  out << ": \n\tExpected: " << x[i] << ", \n\tGot: " << x2[i] << "."
69  << std::endl;
70  success = success && success2;
71  }
72 
73  return success;
74 }
75 
76 template<typename Ordinal>
78  const Teuchos::Comm<Ordinal> &comm,
80  const bool result
81  )
82 {
83  out << "\nChecking that the above test passed in all processes ...";
84  int thisResult = ( result ? 1 : 0 );
85  int sumResult = -1;
86  Teuchos::reduceAll(comm,Teuchos::REDUCE_SUM,Ordinal(1),&thisResult,
87  &sumResult);
88  const bool passed = sumResult==Teuchos::size(comm);
89  if(passed)
90  out << " passed\n";
91  else
92  out << " (sumResult="<<sumResult<<"!=numProcs="<<Teuchos::size(comm)<<") failed\n";
93  return passed;
94 }
95 
96 #define FAD_BASE_COMM_TESTS(FadType, FAD) \
97 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_Broadcast ) { \
98  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
99  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
100  \
101  int n = 7; \
102  int p = 5; \
103  ValueTypeSerializer<int,FadType> fts( \
104  rcp(new ValueTypeSerializer<int,double>), p); \
105  \
106  Teuchos::Array<FadType> x(n), x2(n), x3(n); \
107  for (int i=0; i<n; i++) { \
108  x[i] = FadType(p, rnd.number()); \
109  for (int j=0; j<p; j++) \
110  x[i].fastAccessDx(j) = rnd.number(); \
111  } \
112  for (int i=0; i<n; i++) { \
113  x2[i] = FadType(p, 0.0); \
114  } \
115  if (comm->getRank() == 0) { \
116  x2 = x; \
117  x3 = x; \
118  } \
119  \
120  Teuchos::broadcast(*comm, 0, n, &x2[0]); \
121  bool success1 = checkFadArrays( \
122  x, x2, std::string(#FAD)+" Broadcast", out); \
123  success1 = checkResultOnAllProcs(*comm, out, success1); \
124  \
125  Teuchos::broadcast(*comm, fts, 0, n, &x3[0]); \
126  bool success2 = checkFadArrays( \
127  x, x3, std::string(#FAD)+" Broadcast FTS", out); \
128  success2 = checkResultOnAllProcs(*comm, out, success2); \
129  \
130  success = success1 && success2; \
131 } \
132  \
133 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_GatherAll ) { \
134  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
135  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
136  \
137  int n = 7; \
138  int p = 5; \
139  int size = comm->getSize(); \
140  int rank = comm->getRank(); \
141  int N = n*size; \
142  ValueTypeSerializer<int,FadType> fts( \
143  rcp(new ValueTypeSerializer<int,double>), p); \
144  \
145  Teuchos::Array<FadType> x(n), x2(N), x3(N), x4(N); \
146  for (int i=0; i<n; i++) { \
147  x[i] = FadType(p, (rank+1)*(i+1)); \
148  for (int j=0; j<p; j++) \
149  x[i].fastAccessDx(j) = (rank+1)*(i+1)*(j+1); \
150  } \
151  for (int i=0; i<N; i++) { \
152  x2[i] = FadType(p, 0.0); \
153  } \
154  for (int j=0; j<size; j++) { \
155  for (int i=0; i<n; i++) { \
156  x3[n*j+i] = FadType(p, (j+1)*(i+1)); \
157  for (int k=0; k<p; k++) \
158  x3[n*j+i].fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
159  } \
160  } \
161  \
162  Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
163  bool success1 = checkFadArrays( \
164  x3, x2, std::string(#FAD)+" Gather All", out); \
165  success1 = checkResultOnAllProcs(*comm, out, success1); \
166  \
167  Teuchos::gatherAll(*comm, fts, n, &x[0], N, &x4[0]); \
168  bool success2 = checkFadArrays( \
169  x3, x4, std::string(#FAD)+" Gather All FTS", out); \
170  success2 = checkResultOnAllProcs(*comm, out, success2); \
171  \
172  success = success1 && success2; \
173 } \
174  \
175 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SumAll ) { \
176  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
177  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
178  \
179  int n = 7; \
180  int p = 5; \
181  int num_proc = comm->getSize(); \
182  ValueTypeSerializer<int,FadType> fts( \
183  rcp(new ValueTypeSerializer<int,double>), p); \
184  \
185  Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
186  for (int i=0; i<n; i++) { \
187  x[i] = FadType(p, 1.0*(i+1)); \
188  for (int j=0; j<p; j++) \
189  x[i].fastAccessDx(j) = 2.0*(i+1); \
190  } \
191  for (int i=0; i<n; i++) { \
192  sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
193  for (int j=0; j<p; j++) \
194  sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
195  } \
196  for (int i=0; i<n; i++) { \
197  sums2[i] = FadType(p, 0.0); \
198  } \
199  \
200  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
201  bool success1 = checkFadArrays( \
202  sums, sums2, std::string(#FAD)+" Sum All", out); \
203  success1 = checkResultOnAllProcs(*comm, out, success1); \
204  \
205  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
206  bool success2 = checkFadArrays( \
207  sums, sums3, std::string(#FAD)+" Sum All FTS", out); \
208  success2 = checkResultOnAllProcs(*comm, out, success2); \
209  \
210  success = success1 && success2; \
211 } \
212  \
213 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MaxAll ) { \
214  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
215  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
216  \
217  int n = 7; \
218  int p = 5; \
219  int rank = comm->getRank(); \
220  int num_proc = comm->getSize(); \
221  ValueTypeSerializer<int,FadType> fts( \
222  rcp(new ValueTypeSerializer<int,double>), p); \
223  \
224  Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
225  for (int i=0; i<n; i++) { \
226  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
227  for (int j=0; j<p; j++) \
228  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
229  } \
230  for (int i=0; i<n; i++) { \
231  maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
232  for (int j=0; j<p; j++) \
233  maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
234  } \
235  for (int i=0; i<n; i++) { \
236  maxs2[i] = FadType(p, 0.0); \
237  } \
238  \
239  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
240  bool success1 = checkFadArrays( \
241  maxs, maxs2, std::string(#FAD)+" Max All", out); \
242  success1 = checkResultOnAllProcs(*comm, out, success1); \
243  \
244  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
245  bool success2 = checkFadArrays( \
246  maxs, maxs3, std::string(#FAD)+" Max All FTS", out); \
247  success2 = checkResultOnAllProcs(*comm, out, success2); \
248  \
249  success = success1 && success2; \
250 } \
251  \
252 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MinAll ) { \
253  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
254  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
255  \
256  int n = 7; \
257  int p = 5; \
258  int rank = comm->getRank(); \
259  ValueTypeSerializer<int,FadType> fts( \
260  rcp(new ValueTypeSerializer<int,double>), p); \
261  \
262  Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
263  for (int i=0; i<n; i++) { \
264  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
265  for (int j=0; j<p; j++) \
266  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
267  } \
268  for (int i=0; i<n; i++) { \
269  mins[i] = FadType(p, 1.0*(i+1)); \
270  for (int j=0; j<p; j++) \
271  mins[i].fastAccessDx(j) = 2.0*(i+1); \
272  } \
273  for (int i=0; i<n; i++) { \
274  mins2[i] = FadType(p, 0.0); \
275  } \
276  \
277  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
278  bool success1 = checkFadArrays( \
279  mins, mins2, std::string(#FAD)+" Min All", out); \
280  success1 = checkResultOnAllProcs(*comm, out, success1); \
281  \
282  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
283  bool success2 = checkFadArrays( \
284  mins, mins3, std::string(#FAD)+" Min All FTS", out); \
285  success2 = checkResultOnAllProcs(*comm, out, success2); \
286  \
287  success = success1 && success2; \
288 } \
289  \
290 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanSum ) { \
291  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
292  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
293  \
294  int n = 7; \
295  int p = 5; \
296  int rank = comm->getRank(); \
297  ValueTypeSerializer<int,FadType> fts( \
298  rcp(new ValueTypeSerializer<int,double>), p); \
299  \
300  Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
301  for (int i=0; i<n; i++) { \
302  x[i] = FadType(p, 1.0*(i+1)); \
303  for (int j=0; j<p; j++) \
304  x[i].fastAccessDx(j) = 2.0*(i+1); \
305  } \
306  for (int i=0; i<n; i++) { \
307  sums[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
308  for (int j=0; j<p; j++) \
309  sums[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
310  } \
311  for (int i=0; i<n; i++) { \
312  sums2[i] = FadType(p, 0.0); \
313  } \
314  \
315  Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
316  bool success1 = checkFadArrays( \
317  sums, sums2, std::string(#FAD)+" Scan Sum", out); \
318  success1 = checkResultOnAllProcs(*comm, out, success1); \
319  \
320  Teuchos::scan(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
321  bool success2 = checkFadArrays( \
322  sums, sums3, std::string(#FAD)+" Scan Sum FTS", out); \
323  success2 = checkResultOnAllProcs(*comm, out, success2); \
324  \
325  success = success1 && success2; \
326 } \
327  \
328 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMax ) { \
329  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
330  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
331  \
332  int n = 7; \
333  int p = 5; \
334  int rank = comm->getRank(); \
335  ValueTypeSerializer<int,FadType> fts( \
336  rcp(new ValueTypeSerializer<int,double>), p); \
337  \
338  Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
339  for (int i=0; i<n; i++) { \
340  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
341  for (int j=0; j<p; j++) \
342  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
343  } \
344  for (int i=0; i<n; i++) { \
345  maxs[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
346  for (int j=0; j<p; j++) \
347  maxs[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
348  } \
349  for (int i=0; i<n; i++) { \
350  maxs2[i] = FadType(p, 0.0); \
351  } \
352  \
353  Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
354  bool success1 = checkFadArrays( \
355  maxs, maxs2, std::string(#FAD)+" Scan Max", out); \
356  success1 = checkResultOnAllProcs(*comm, out, success1); \
357  \
358  Teuchos::scan(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
359  bool success2 = checkFadArrays( \
360  maxs, maxs3, std::string(#FAD)+" Scan Max FTS", out); \
361  success2 = checkResultOnAllProcs(*comm, out, success2); \
362  \
363  success = success1 && success2; \
364 } \
365  \
366 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMin ) { \
367  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
368  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
369  \
370  int n = 7; \
371  int p = 5; \
372  int rank = comm->getRank(); \
373  ValueTypeSerializer<int,FadType> fts( \
374  rcp(new ValueTypeSerializer<int,double>), p); \
375  \
376  Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
377  for (int i=0; i<n; i++) { \
378  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
379  for (int j=0; j<p; j++) \
380  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
381  } \
382  for (int i=0; i<n; i++) { \
383  mins[i] = FadType(p, 1.0*(i+1)); \
384  for (int j=0; j<p; j++) \
385  mins[i].fastAccessDx(j) = 2.0*(i+1); \
386  } \
387  for (int i=0; i<n; i++) { \
388  mins2[i] = FadType(p, 0.0); \
389  } \
390  \
391  Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
392  bool success1 = checkFadArrays( \
393  mins, mins2, std::string(#FAD)+" Scan Min", out); \
394  success1 = checkResultOnAllProcs(*comm, out, success1); \
395  \
396  Teuchos::scan(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
397  bool success2 = checkFadArrays( \
398  mins, mins3, std::string(#FAD)+" Scan Min FTS", out); \
399  success2 = checkResultOnAllProcs(*comm, out, success2); \
400  \
401  success = success1 && success2; \
402 } \
403  \
404 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SendReceive ) { \
405  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
406  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
407  \
408  int num_proc = comm->getSize(); \
409  if (num_proc > 1) { \
410  int rank = comm->getRank(); \
411  int n = 7; \
412  int p = 5; \
413  ValueTypeSerializer<int,FadType> fts( \
414  rcp(new ValueTypeSerializer<int,double>), p); \
415  \
416  Teuchos::Array<FadType> x(n), x2(n), x3(n); \
417  for (int i=0; i<n; i++) { \
418  x[i] = FadType(p, 1.0*(i+1)); \
419  for (int j=0; j<p; j++) \
420  x[i].fastAccessDx(j) = 2.0*(i+1)*(j+1); \
421  } \
422  for (int i=0; i<n; i++) { \
423  x2[i] = FadType(p, 0.0); \
424  } \
425  if (rank != 1) { \
426  x2 = x; \
427  x3 = x; \
428  } \
429  \
430  if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
431  if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
432  bool success1 = checkFadArrays( \
433  x, x2, std::string(#FAD)+" Send/Receive", out); \
434  success1 = checkResultOnAllProcs(*comm, out, success1); \
435  \
436  if (rank == 0) Teuchos::send(*comm, fts, n, &x[0], 1); \
437  if (rank == 1) Teuchos::receive(*comm, fts, 0, n, &x3[0]); \
438  bool success2 = checkFadArrays( \
439  x, x3, std::string(#FAD)+" Send/Receive FTS", out); \
440  success2 = checkResultOnAllProcs(*comm, out, success2); \
441  \
442  success = success1 && success2; \
443  } \
444  else \
445  success = true; \
446 } \
447  \
448 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_Broadcast ) { \
449  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
450  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
451  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
452  \
453  int n = 7; \
454  int p1 = 5; \
455  int p2 = 5; \
456  RCP< ValueTypeSerializer<int,FadType> > fts = \
457  rcp(new ValueTypeSerializer<int,FadType>( \
458  rcp(new ValueTypeSerializer<int,double>), p1)); \
459  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
460  \
461  Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
462  for (int i=0; i<n; i++) { \
463  FadType f(p1, rnd.number()); \
464  for (int k=0; k<p1; k++) \
465  f.fastAccessDx(k) = rnd.number(); \
466  x[i] = FadFadType(p2, f); \
467  for (int j=0; j<p2; j++) { \
468  FadType g(p1, rnd.number()); \
469  for (int k=0; k<p1; k++) \
470  g.fastAccessDx(k) = rnd.number(); \
471  x[i].fastAccessDx(j) = g; \
472  } \
473  } \
474  for (int i=0; i<n; i++) { \
475  x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
476  for (int j=0; j<p2; j++) \
477  x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
478  } \
479  if (comm->getRank() == 0) { \
480  x2 = x; \
481  x3 = x; \
482  } \
483  \
484  Teuchos::broadcast(*comm, 0, n, &x2[0]); \
485  bool success1 = checkFadArrays( \
486  x, x2, std::string(#FAD)+"<"+#FAD+"> Broadcast", out); \
487  success1 = checkResultOnAllProcs(*comm, out, success1); \
488  \
489  Teuchos::broadcast(*comm, ffts, 0, n, &x3[0]); \
490  bool success2 = checkFadArrays( \
491  x, x3, std::string(#FAD)+"<"+#FAD+"> Broadcast FTS", out); \
492  success2 = checkResultOnAllProcs(*comm, out, success2); \
493  \
494  success = success1 && success2; \
495 } \
496  \
497 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_GatherAll ) { \
498  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
499  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
500  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
501  \
502  int n = 7; \
503  int p1 = 5; \
504  int p2 = 5; \
505  int size = comm->getSize(); \
506  int rank = comm->getRank(); \
507  int N = n*size; \
508  RCP< ValueTypeSerializer<int,FadType> > fts = \
509  rcp(new ValueTypeSerializer<int,FadType>( \
510  rcp(new ValueTypeSerializer<int,double>), p1)); \
511  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
512  \
513  Teuchos::Array<FadFadType> x(n), x2(N), x3(N), x4(N); \
514  for (int i=0; i<n; i++) { \
515  FadType f(p1, (rank+1)*(i+1)); \
516  for (int k=0; k<p1; k++) \
517  f.fastAccessDx(k) = (rank+1)*(i+1)*(k+1); \
518  x[i] = FadFadType(p2, f); \
519  for (int j=0; j<p2; j++) { \
520  x[i].fastAccessDx(j) = f; \
521  } \
522  } \
523  for (int i=0; i<N; i++) { \
524  x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
525  for (int j=0; j<p2; j++) \
526  x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
527  } \
528  for (int j=0; j<size; j++) { \
529  for (int i=0; i<n; i++) { \
530  FadType f(p1, (j+1)*(i+1)); \
531  for (int k=0; k<p1; k++) \
532  f.fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
533  x3[n*j+i] = FadFadType(p2, f); \
534  for (int k=0; k<p2; k++) \
535  x3[n*j+i].fastAccessDx(k) = f; \
536  } \
537  } \
538  \
539  Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
540  bool success1 = checkFadArrays( \
541  x3, x2, std::string(#FAD)+"<"+#FAD+"> Gather All", out); \
542  success1 = checkResultOnAllProcs(*comm, out, success1); \
543  \
544  Teuchos::gatherAll(*comm, ffts, n, &x[0], N, &x4[0]); \
545  bool success2 = checkFadArrays( \
546  x3, x4, std::string(#FAD)+"<"+#FAD+"> Gather All FTS", out); \
547  success2 = checkResultOnAllProcs(*comm, out, success2); \
548  \
549  success = success1 && success2; \
550 } \
551  \
552 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SumAll ) { \
553  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
554  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
555  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
556  \
557  int n = 7; \
558  int p1 = 5; \
559  int p2 = 5; \
560  int num_proc = comm->getSize(); \
561  RCP< ValueTypeSerializer<int,FadType> > fts = \
562  rcp(new ValueTypeSerializer<int,FadType>( \
563  rcp(new ValueTypeSerializer<int,double>), p1)); \
564  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
565  \
566  Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
567  for (int i=0; i<n; i++) { \
568  FadType f(p1, 1.0*(i+1)); \
569  for (int k=0; k<p1; k++) \
570  f.fastAccessDx(k) = 2.0*(i+1); \
571  x[i] = FadFadType(p2, f); \
572  for (int j=0; j<p2; j++) { \
573  x[i].fastAccessDx(j) = f; \
574  } \
575  } \
576  for (int i=0; i<n; i++) { \
577  FadType f(p1, 1.0*(i+1)*num_proc); \
578  for (int k=0; k<p1; k++) \
579  f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
580  sums[i] = FadFadType(p2, f); \
581  for (int j=0; j<p2; j++) \
582  sums[i].fastAccessDx(j) = f; \
583  } \
584  for (int i=0; i<n; i++) { \
585  sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
586  for (int j=0; j<p2; j++) \
587  sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
588  } \
589  \
590  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
591  bool success1 = checkFadArrays( \
592  sums, sums2, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
593  success1 = checkResultOnAllProcs(*comm, out, success1); \
594  \
595  Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
596  bool success2 = checkFadArrays( \
597  sums, sums3, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
598  success2 = checkResultOnAllProcs(*comm, out, success2); \
599  \
600  success = success1 && success2; \
601 } \
602  \
603 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MaxAll ) { \
604  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
605  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
606  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
607  \
608  int n = 7; \
609  int p1 = 5; \
610  int p2 = 5; \
611  int rank = comm->getRank(); \
612  int num_proc = comm->getSize(); \
613  RCP< ValueTypeSerializer<int,FadType> > fts = \
614  rcp(new ValueTypeSerializer<int,FadType>( \
615  rcp(new ValueTypeSerializer<int,double>), p1)); \
616  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
617  \
618  Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
619  for (int i=0; i<n; i++) { \
620  FadType f(p1, 1.0*(i+1)*(rank+1)); \
621  for (int k=0; k<p1; k++) \
622  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
623  x[i] = FadFadType(p2, f); \
624  for (int j=0; j<p2; j++) { \
625  x[i].fastAccessDx(j) = f; \
626  } \
627  } \
628  for (int i=0; i<n; i++) { \
629  FadType f(p1, 1.0*(i+1)*num_proc); \
630  for (int k=0; k<p1; k++) \
631  f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
632  maxs[i] = FadFadType(p2, f); \
633  for (int j=0; j<p2; j++) \
634  maxs[i].fastAccessDx(j) = f; \
635  } \
636  for (int i=0; i<n; i++) { \
637  maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
638  for (int j=0; j<p2; j++) \
639  maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
640  } \
641  \
642  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
643  bool success1 = checkFadArrays( \
644  maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Max All", out); \
645  success1 = checkResultOnAllProcs(*comm, out, success1); \
646  \
647  Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
648  bool success2 = checkFadArrays( \
649  maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Max All FTS", out); \
650  success2 = checkResultOnAllProcs(*comm, out, success2); \
651  \
652  success = success1 && success2; \
653 } \
654  \
655 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MinAll ) { \
656  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
657  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
658  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
659  \
660  int n = 7; \
661  int p1 = 5; \
662  int p2 = 5; \
663  int rank = comm->getRank(); \
664  RCP< ValueTypeSerializer<int,FadType> > fts = \
665  rcp(new ValueTypeSerializer<int,FadType>( \
666  rcp(new ValueTypeSerializer<int,double>), p1)); \
667  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
668  \
669  Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
670  for (int i=0; i<n; i++) { \
671  FadType f(p1, 1.0*(i+1)*(rank+1)); \
672  for (int k=0; k<p1; k++) \
673  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
674  x[i] = FadFadType(p2, f); \
675  for (int j=0; j<p2; j++) { \
676  x[i].fastAccessDx(j) = f; \
677  } \
678  } \
679  for (int i=0; i<n; i++) { \
680  FadType f(p1, 1.0*(i+1)); \
681  for (int k=0; k<p1; k++) \
682  f.fastAccessDx(k) = 2.0*(i+1); \
683  mins[i] = FadFadType(p2, f); \
684  for (int j=0; j<p2; j++) \
685  mins[i].fastAccessDx(j) = f; \
686  } \
687  for (int i=0; i<n; i++) { \
688  mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
689  for (int j=0; j<p2; j++) \
690  mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
691  } \
692  \
693  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
694  bool success1 = checkFadArrays( \
695  mins, mins2, std::string(#FAD)+"<"+#FAD+"> Min All", out); \
696  success1 = checkResultOnAllProcs(*comm, out, success1); \
697  \
698  Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
699  bool success2 = checkFadArrays( \
700  mins, mins3, std::string(#FAD)+"<"+#FAD+"> Min All FTS", out); \
701  success2 = checkResultOnAllProcs(*comm, out, success2); \
702  \
703  success = success1 && success2; \
704 } \
705  \
706 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanSum ) { \
707  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
708  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
709  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
710  \
711  int n = 7; \
712  int p1 = 5; \
713  int p2 = 5; \
714  int rank = comm->getRank(); \
715  RCP< ValueTypeSerializer<int,FadType> > fts = \
716  rcp(new ValueTypeSerializer<int,FadType>( \
717  rcp(new ValueTypeSerializer<int,double>), p1)); \
718  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
719  \
720  Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
721  for (int i=0; i<n; i++) { \
722  FadType f(p1, 1.0*(i+1)); \
723  for (int k=0; k<p1; k++) \
724  f.fastAccessDx(k) = 2.0*(i+1); \
725  x[i] = FadFadType(p2, f); \
726  for (int j=0; j<p2; j++) { \
727  x[i].fastAccessDx(j) = f; \
728  } \
729  } \
730  for (int i=0; i<n; i++) { \
731  FadType f(p1, 1.0*(i+1)*(rank+1)); \
732  for (int k=0; k<p1; k++) \
733  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
734  sums[i] = FadFadType(p2, f); \
735  for (int j=0; j<p2; j++) \
736  sums[i].fastAccessDx(j) = f; \
737  } \
738  for (int i=0; i<n; i++) { \
739  sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
740  for (int j=0; j<p2; j++) \
741  sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
742  } \
743  \
744  Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
745  bool success1 = checkFadArrays( \
746  sums, sums2, std::string(#FAD)+"<"+#FAD+"> Scan Sum", out); \
747  success1 = checkResultOnAllProcs(*comm, out, success1); \
748  \
749  Teuchos::scan(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
750  bool success2 = checkFadArrays( \
751  sums, sums3, std::string(#FAD)+"<"+#FAD+"> Scan Sum FTS", out); \
752  success2 = checkResultOnAllProcs(*comm, out, success2); \
753  \
754  success = success1 && success2; \
755 } \
756  \
757 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMax ) { \
758  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
759  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
760  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
761  \
762  int n = 7; \
763  int p1 = 5; \
764  int p2 = 5; \
765  int rank = comm->getRank(); \
766  RCP< ValueTypeSerializer<int,FadType> > fts = \
767  rcp(new ValueTypeSerializer<int,FadType>( \
768  rcp(new ValueTypeSerializer<int,double>), p1)); \
769  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
770  \
771  Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
772  for (int i=0; i<n; i++) { \
773  FadType f(p1, 1.0*(i+1)*(rank+1)); \
774  for (int k=0; k<p1; k++) \
775  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
776  x[i] = FadFadType(p2, f); \
777  for (int j=0; j<p2; j++) { \
778  x[i].fastAccessDx(j) = f; \
779  } \
780  } \
781  for (int i=0; i<n; i++) { \
782  FadType f(p1, 1.0*(i+1)*(rank+1)); \
783  for (int k=0; k<p1; k++) \
784  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
785  maxs[i] = FadFadType(p2, f); \
786  for (int j=0; j<p2; j++) \
787  maxs[i].fastAccessDx(j) = f; \
788  } \
789  for (int i=0; i<n; i++) { \
790  maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
791  for (int j=0; j<p2; j++) \
792  maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
793  } \
794  \
795  Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
796  bool success1 = checkFadArrays( \
797  maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Scan Max", out); \
798  success1 = checkResultOnAllProcs(*comm, out, success1); \
799  \
800  Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
801  bool success2 = checkFadArrays( \
802  maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Scan Max FTS", out); \
803  success2 = checkResultOnAllProcs(*comm, out, success2); \
804  \
805  success = success1 && success2; \
806 } \
807  \
808 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMin ) { \
809  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
810  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
811  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
812  \
813  int n = 7; \
814  int p1 = 5; \
815  int p2 = 5; \
816  int rank = comm->getRank(); \
817  RCP< ValueTypeSerializer<int,FadType> > fts = \
818  rcp(new ValueTypeSerializer<int,FadType>( \
819  rcp(new ValueTypeSerializer<int,double>), p1)); \
820  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
821  \
822  Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
823  for (int i=0; i<n; i++) { \
824  FadType f(p1, 1.0*(i+1)*(rank+1)); \
825  for (int k=0; k<p1; k++) \
826  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
827  x[i] = FadFadType(p2, f); \
828  for (int j=0; j<p2; j++) { \
829  x[i].fastAccessDx(j) = f; \
830  } \
831  } \
832  for (int i=0; i<n; i++) { \
833  FadType f(p1, 1.0*(i+1)); \
834  for (int k=0; k<p1; k++) \
835  f.fastAccessDx(k) = 2.0*(i+1); \
836  mins[i] = FadFadType(p2, f); \
837  for (int j=0; j<p2; j++) \
838  mins[i].fastAccessDx(j) = f; \
839  } \
840  for (int i=0; i<n; i++) { \
841  mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
842  for (int j=0; j<p2; j++) \
843  mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
844  } \
845  \
846  Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
847  bool success1 = checkFadArrays( \
848  mins, mins2, std::string(#FAD)+"<"+#FAD+"> Scan Min", out); \
849  success1 = checkResultOnAllProcs(*comm, out, success1); \
850  \
851  Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
852  bool success2 = checkFadArrays( \
853  mins, mins3, std::string(#FAD)+"<"+#FAD+"> Scan Min FTS", out); \
854  success2 = checkResultOnAllProcs(*comm, out, success2); \
855  \
856  success = success1 && success2; \
857 } \
858  \
859 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SendReceive ) { \
860  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
861  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
862  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
863  \
864  int num_proc = comm->getSize(); \
865  if (num_proc > 1) { \
866  int rank = comm->getRank(); \
867  int n = 7; \
868  int p1 = 5; \
869  int p2 = 5; \
870  RCP< ValueTypeSerializer<int,FadType> > fts = \
871  rcp(new ValueTypeSerializer<int,FadType>( \
872  rcp(new ValueTypeSerializer<int,double>), p1)); \
873  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
874  \
875  Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
876  for (int i=0; i<n; i++) { \
877  FadType f(p1, 1.0*(i+1)); \
878  for (int k=0; k<p1; k++) \
879  f.fastAccessDx(k) = 2.0*(i+1)*(k+1); \
880  x[i] = FadFadType(p2, f); \
881  for (int j=0; j<p2; j++) \
882  x[i].fastAccessDx(j) = f; \
883  } \
884  for (int i=0; i<n; i++) { \
885  x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
886  for (int j=0; j<p2; j++) \
887  x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
888  } \
889  if (rank != 1) { \
890  x2 = x; \
891  x3 = x; \
892  } \
893  \
894  if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
895  if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
896  bool success1 = checkFadArrays( \
897  x, x2, std::string(#FAD)+"<"+#FAD+"> Send/Receive", out); \
898  success1 = checkResultOnAllProcs(*comm, out, success1); \
899  \
900  if (rank == 0) Teuchos::send(*comm, ffts, n, &x[0], 1); \
901  if (rank == 1) Teuchos::receive(*comm, ffts, 0, n, &x3[0]); \
902  bool success2 = checkFadArrays( \
903  x, x3, std::string(#FAD)+"<"+#FAD+"> Send/Receive FTS", out); \
904  success2 = checkResultOnAllProcs(*comm, out, success2); \
905  \
906  success = success1 && success2; \
907  } \
908  else \
909  success = true; \
910 }
911 
912 #if defined(HAVE_SACADO_KOKKOS) && defined(HAVE_SACADO_TEUCHOSKOKKOSCOMM)
913 
914 #include "Kokkos_Core.hpp"
915 
916 #define FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Device) \
917 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_Broadcast ) { \
918  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
919  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
920  \
921  \
922  int n = 7; \
923  int p = 5; \
924  ValueTypeSerializer<int,FadType> fts( \
925  rcp(new ValueTypeSerializer<int,double>), p); \
926  \
927  typedef Kokkos::View<FadType*,Device> ViewType; \
928  typedef ViewType::HostMirror HostViewType; \
929  ViewType x("x",n,p+1), x2("x2",n,p+1), x3("x3",n,p+1); \
930  HostViewType h_x = Kokkos::create_mirror_view(x); \
931  HostViewType h_x2 = Kokkos::create_mirror_view(x2); \
932  HostViewType h_x3 = Kokkos::create_mirror_view(x3); \
933  for (int i=0; i<n; i++) { \
934  h_x[i] = FadType(p, rnd.number()); \
935  for (int j=0; j<p; j++) \
936  h_x[i].fastAccessDx(j) = rnd.number(); \
937  } \
938  for (int i=0; i<n; i++) { \
939  h_x2[i] = FadType(p, 0.0); \
940  } \
941  Kokkos::deep_copy(x, h_x); \
942  Kokkos::deep_copy(x2, h_x2); \
943  if (comm->getRank() == 0) { \
944  x2 = x; \
945  x3 = x; \
946  h_x2 = h_x; \
947  h_x3 = h_x; \
948  } \
949  \
950  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
951  /* so only do the communication on the host. This probably makes */ \
952  /* the deep copy unnecessary. */ \
953  const bool accessible = \
954  Kokkos::Impl::MemorySpaceAccess< \
955  Kokkos::HostSpace, \
956  typename Device::memory_space >::accessible; \
957  if (accessible) { \
958  Teuchos::broadcast(*comm, 0, n, x2); \
959  Kokkos::deep_copy(h_x2, x2); \
960  } \
961  else \
962  Teuchos::broadcast(*comm, 0, n, h_x2); \
963  bool success1 = checkFadArrays( \
964  h_x, h_x2, std::string(#FAD)+" Broadcast", out); \
965  success1 = checkResultOnAllProcs(*comm, out, success1); \
966  \
967  if (accessible) { \
968  Teuchos::broadcast(*comm, fts, 0, n, x3); \
969  Kokkos::deep_copy(h_x3, x3); \
970  } \
971  else \
972  Teuchos::broadcast(*comm, fts, 0, n, h_x3); \
973  bool success2 = checkFadArrays( \
974  h_x, h_x3, std::string(#FAD)+" Broadcast FTS", out); \
975  success2 = checkResultOnAllProcs(*comm, out, success2); \
976  \
977  success = success1 && success2; \
978 } \
979 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_SumAll ) { \
980  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
981  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
982  \
983  \
984  int n = 7; \
985  int p = 5; \
986  int num_proc = comm->getSize(); \
987  ValueTypeSerializer<int,FadType> fts( \
988  rcp(new ValueTypeSerializer<int,double>), p); \
989  \
990  typedef Kokkos::View<FadType*,Device> ViewType; \
991  typedef ViewType::HostMirror HostViewType; \
992  ViewType x("x",n,p+1), sums("sums",n,p+1), \
993  sums2("sums2",n,p+1), sums3("sums3",n,p+1); \
994  HostViewType h_x = Kokkos::create_mirror_view(x); \
995  HostViewType h_sums = Kokkos::create_mirror_view(sums); \
996  HostViewType h_sums2 = Kokkos::create_mirror_view(sums2); \
997  HostViewType h_sums3 = Kokkos::create_mirror_view(sums3); \
998  for (int i=0; i<n; i++) { \
999  h_x[i] = FadType(p, 1.0*(i+1)); \
1000  for (int j=0; j<p; j++) \
1001  h_x[i].fastAccessDx(j) = 2.0*(i+1); \
1002  } \
1003  for (int i=0; i<n; i++) { \
1004  h_sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
1005  for (int j=0; j<p; j++) \
1006  h_sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
1007  } \
1008  for (int i=0; i<n; i++) { \
1009  h_sums2[i] = FadType(p, 0.0); \
1010  } \
1011  Kokkos::deep_copy(x, h_x); \
1012  Kokkos::deep_copy(sums, h_sums); \
1013  Kokkos::deep_copy(sums2, h_sums2); \
1014  \
1015  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1016  /* so only do the communication on the host. This probably makes */ \
1017  /* the deep copy unnecessary. */ \
1018  const bool accessible = \
1019  Kokkos::Impl::MemorySpaceAccess< \
1020  Kokkos::HostSpace, \
1021  typename Device::memory_space >::accessible; \
1022  if (accessible) { \
1023  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, x, sums2); \
1024  Kokkos::deep_copy(h_sums2, sums2); \
1025  } \
1026  else \
1027  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, h_x, h_sums2); \
1028  bool success1 = checkFadArrays( \
1029  h_sums, h_sums2, std::string(#FAD)+" Sum All", out); \
1030  success1 = checkResultOnAllProcs(*comm, out, success1); \
1031  \
1032  if (accessible) { \
1033  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, x, sums3); \
1034  Kokkos::deep_copy(h_sums3, sums3); \
1035  } \
1036  else \
1037  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, h_x, h_sums3); \
1038  bool success2 = checkFadArrays( \
1039  h_sums, h_sums3, std::string(#FAD)+" Sum All FTS", out); \
1040  success2 = checkResultOnAllProcs(*comm, out, success2); \
1041  success = success1 && success2; \
1042  \
1043 } \
1044 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MaxAll ) { \
1045  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1046  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1047  \
1048  \
1049  int n = 7; \
1050  int p = 5; \
1051  int rank = comm->getRank(); \
1052  int num_proc = comm->getSize(); \
1053  ValueTypeSerializer<int,FadType> fts( \
1054  rcp(new ValueTypeSerializer<int,double>), p); \
1055  \
1056  typedef Kokkos::View<FadType*,Device> ViewType; \
1057  typedef ViewType::HostMirror HostViewType; \
1058  ViewType x("x",n,p+1), maxs("maxs",n,p+1), \
1059  maxs2("maxs2",n,p+1), maxs3("maxs3",n,p+1); \
1060  HostViewType h_x = Kokkos::create_mirror_view(x); \
1061  HostViewType h_maxs = Kokkos::create_mirror_view(maxs); \
1062  HostViewType h_maxs2 = Kokkos::create_mirror_view(maxs2); \
1063  HostViewType h_maxs3 = Kokkos::create_mirror_view(maxs3); \
1064  for (int i=0; i<n; i++) { \
1065  h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1066  for (int j=0; j<p; j++) \
1067  h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1068  } \
1069  for (int i=0; i<n; i++) { \
1070  h_maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
1071  for (int j=0; j<p; j++) \
1072  h_maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
1073  } \
1074  for (int i=0; i<n; i++) { \
1075  h_maxs2[i] = FadType(p, 0.0); \
1076  } \
1077  Kokkos::deep_copy(x, h_x); \
1078  Kokkos::deep_copy(maxs, h_maxs); \
1079  Kokkos::deep_copy(maxs2, h_maxs2); \
1080  \
1081  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1082  /* so only do the communication on the host. This probably makes */ \
1083  /* the deep copy unnecessary. */ \
1084  const bool accessible = \
1085  Kokkos::Impl::MemorySpaceAccess< \
1086  Kokkos::HostSpace, \
1087  typename Device::memory_space >::accessible; \
1088  if (accessible) { \
1089  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, x, maxs2); \
1090  Kokkos::deep_copy(h_maxs2, maxs2); \
1091  } \
1092  else \
1093  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, h_x, h_maxs2); \
1094  bool success1 = checkFadArrays( \
1095  h_maxs, h_maxs2, std::string(#FAD)+" Max All", out); \
1096  success1 = checkResultOnAllProcs(*comm, out, success1); \
1097  \
1098  if (accessible) { \
1099  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, x, maxs3); \
1100  Kokkos::deep_copy(h_maxs3, maxs3); \
1101  } \
1102  else \
1103  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, h_x, h_maxs3); \
1104  bool success2 = checkFadArrays( \
1105  h_maxs, h_maxs3, std::string(#FAD)+" Max All FTS", out); \
1106  success2 = checkResultOnAllProcs(*comm, out, success2); \
1107  success = success1 && success2; \
1108  \
1109 } \
1110 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MinAll ) { \
1111  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1112  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1113  \
1114  \
1115  int n = 7; \
1116  int p = 5; \
1117  int rank = comm->getRank(); \
1118  ValueTypeSerializer<int,FadType> fts( \
1119  rcp(new ValueTypeSerializer<int,double>), p); \
1120  \
1121  typedef Kokkos::View<FadType*,Device> ViewType; \
1122  typedef ViewType::HostMirror HostViewType; \
1123  ViewType x("x",n,p+1), mins("mins",n,p+1), \
1124  mins2("mins2",n,p+1), mins3("mins3",n,p+1); \
1125  HostViewType h_x = Kokkos::create_mirror_view(x); \
1126  HostViewType h_mins = Kokkos::create_mirror_view(mins); \
1127  HostViewType h_mins2 = Kokkos::create_mirror_view(mins2); \
1128  HostViewType h_mins3 = Kokkos::create_mirror_view(mins3); \
1129  for (int i=0; i<n; i++) { \
1130  h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1131  for (int j=0; j<p; j++) \
1132  h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1133  } \
1134  for (int i=0; i<n; i++) { \
1135  h_mins[i] = FadType(p, 1.0*(i+1)); \
1136  for (int j=0; j<p; j++) \
1137  h_mins[i].fastAccessDx(j) = 2.0*(i+1); \
1138  } \
1139  for (int i=0; i<n; i++) { \
1140  h_mins2[i] = FadType(p, 0.0); \
1141  } \
1142  Kokkos::deep_copy(x, h_x); \
1143  Kokkos::deep_copy(mins, h_mins); \
1144  Kokkos::deep_copy(mins2, h_mins2); \
1145  \
1146  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1147  /* so only do the communication on the host. This probably makes */ \
1148  /* the deep copy unnecessary. */ \
1149  const bool accessible = \
1150  Kokkos::Impl::MemorySpaceAccess< \
1151  Kokkos::HostSpace, \
1152  typename Device::memory_space >::accessible; \
1153  if (accessible) { \
1154  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, x, mins2); \
1155  Kokkos::deep_copy(h_mins2, mins2); \
1156  } \
1157  else \
1158  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, h_x, h_mins2); \
1159  bool success1 = checkFadArrays( \
1160  h_mins, h_mins2, std::string(#FAD)+" Min All", out); \
1161  success1 = checkResultOnAllProcs(*comm, out, success1); \
1162  \
1163  if (accessible) { \
1164  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, x, mins3); \
1165  Kokkos::deep_copy(h_mins3, mins3); \
1166  } \
1167  else \
1168  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, h_x, h_mins3); \
1169  bool success2 = checkFadArrays( \
1170  h_mins, h_mins3, std::string(#FAD)+" Min All FTS", out); \
1171  success2 = checkResultOnAllProcs(*comm, out, success2); \
1172  success = success1 && success2; \
1173  \
1174 }
1175 
1176 #ifdef KOKKOS_ENABLE_OPENMP
1177 #define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1178  using Kokkos::OpenMP; \
1179  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, OpenMP)
1180 #else
1181 #define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD)
1182 #endif
1183 
1184 #ifdef KOKKOS_ENABLE_THREADS
1185 #define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1186  using Kokkos::Threads; \
1187  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Threads)
1188 #else
1189 #define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD)
1190 #endif
1191 
1192 #ifdef KOKKOS_ENABLE_CUDA
1193 #define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1194  using Kokkos::Cuda; \
1195  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Cuda)
1196 #else
1197 #define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD)
1198 #endif
1199 
1200 #ifdef KOKKOS_ENABLE_HIP
1201 #define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD) \
1202  using Kokkos::HIP; \
1203  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, HIP)
1204 #else
1205 #define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD)
1206 #endif
1207 
1208 #ifdef KOKKOS_ENABLE_SERIAL
1209 #define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD) \
1210  using Kokkos::Serial; \
1211  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Serial)
1212 #else
1213 #define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1214 #endif
1215 
1216 #define FAD_KOKKOS_COMM_TESTS(FadType, FAD) \
1217  FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1218  FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1219  FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1220  FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1221 
1222 #else
1223 
1224 #define FAD_KOKKOS_COMM_TESTS(FadType, FAD)
1225 
1226 #endif
1227 
1228 #define FAD_COMM_TESTS(FadType, FAD) \
1229  FAD_BASE_COMM_TESTS(FadType, FAD)
Sacado::Fad::DFad< double > FadType
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
bool checkFadArrays(const ArrayType &x, const ArrayType &x2, const std::string &tag, Teuchos::FancyOStream &out)
bool checkResultOnAllProcs(const Teuchos::Comm< Ordinal > &comm, Teuchos::FancyOStream &out, const bool result)
static SACADO_INLINE_FUNCTION bool eval(const T &x, const T &y)
int Ordinal