Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Fad_CommTests.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
11 #include "Teuchos_CommHelpers.hpp"
12 #include "Teuchos_DefaultComm.hpp"
13 #include "Teuchos_Array.hpp"
14 #include "Teuchos_Comm.hpp"
15 
16 #include "Sacado_mpl_apply.hpp"
17 #include "Sacado_Random.hpp"
18 
19 using Teuchos::RCP;
20 using Teuchos::rcp;
22 
23 template <typename ArrayType>
24 bool checkFadArrays(const ArrayType& x,
25  const ArrayType& x2,
26  const std::string& tag,
27  Teuchos::FancyOStream& out) {
28  typedef typename ArrayType::value_type FadType;
29 
30  // Check sizes match
31  bool success = (x.size() == x2.size());
32  out << tag << " Fad array size test";
33  if (success)
34  out << " passed";
35  else
36  out << " failed";
37  out << ": \n\tExpected: " << x.size() << ", \n\tGot: " << x2.size()
38  << "." << std::endl;
39 
40  // Check Fads match
41  const int sz = x.size();
42  for (int i=0; i<sz; i++) {
43  bool success2 = Sacado::IsEqual<FadType>::eval(x[i], x2[i]);
44  out << tag << " Fad array comparison test " << i;
45  if (success2)
46  out << " passed";
47  else
48  out << " failed";
49  out << ": \n\tExpected: " << x[i] << ", \n\tGot: " << x2[i] << "."
50  << std::endl;
51  success = success && success2;
52  }
53 
54  return success;
55 }
56 
57 template<typename Ordinal>
59  const Teuchos::Comm<Ordinal> &comm,
61  const bool result
62  )
63 {
64  out << "\nChecking that the above test passed in all processes ...";
65  int thisResult = ( result ? 1 : 0 );
66  int sumResult = -1;
67  Teuchos::reduceAll(comm,Teuchos::REDUCE_SUM,Ordinal(1),&thisResult,
68  &sumResult);
69  const bool passed = sumResult==Teuchos::size(comm);
70  if(passed)
71  out << " passed\n";
72  else
73  out << " (sumResult="<<sumResult<<"!=numProcs="<<Teuchos::size(comm)<<") failed\n";
74  return passed;
75 }
76 
77 #define FAD_BASE_COMM_TESTS(FadType, FAD) \
78 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_Broadcast ) { \
79  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
80  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
81  \
82  int n = 7; \
83  int p = 5; \
84  ValueTypeSerializer<int,FadType> fts( \
85  rcp(new ValueTypeSerializer<int,double>), p); \
86  \
87  Teuchos::Array<FadType> x(n), x2(n), x3(n); \
88  for (int i=0; i<n; i++) { \
89  x[i] = FadType(p, rnd.number()); \
90  for (int j=0; j<p; j++) \
91  x[i].fastAccessDx(j) = rnd.number(); \
92  } \
93  for (int i=0; i<n; i++) { \
94  x2[i] = FadType(p, 0.0); \
95  } \
96  if (comm->getRank() == 0) { \
97  x2 = x; \
98  x3 = x; \
99  } \
100  \
101  Teuchos::broadcast(*comm, 0, n, &x2[0]); \
102  bool success1 = checkFadArrays( \
103  x, x2, std::string(#FAD)+" Broadcast", out); \
104  success1 = checkResultOnAllProcs(*comm, out, success1); \
105  \
106  Teuchos::broadcast(*comm, fts, 0, n, &x3[0]); \
107  bool success2 = checkFadArrays( \
108  x, x3, std::string(#FAD)+" Broadcast FTS", out); \
109  success2 = checkResultOnAllProcs(*comm, out, success2); \
110  \
111  success = success1 && success2; \
112 } \
113  \
114 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_GatherAll ) { \
115  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
116  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
117  \
118  int n = 7; \
119  int p = 5; \
120  int size = comm->getSize(); \
121  int rank = comm->getRank(); \
122  int N = n*size; \
123  ValueTypeSerializer<int,FadType> fts( \
124  rcp(new ValueTypeSerializer<int,double>), p); \
125  \
126  Teuchos::Array<FadType> x(n), x2(N), x3(N), x4(N); \
127  for (int i=0; i<n; i++) { \
128  x[i] = FadType(p, (rank+1)*(i+1)); \
129  for (int j=0; j<p; j++) \
130  x[i].fastAccessDx(j) = (rank+1)*(i+1)*(j+1); \
131  } \
132  for (int i=0; i<N; i++) { \
133  x2[i] = FadType(p, 0.0); \
134  } \
135  for (int j=0; j<size; j++) { \
136  for (int i=0; i<n; i++) { \
137  x3[n*j+i] = FadType(p, (j+1)*(i+1)); \
138  for (int k=0; k<p; k++) \
139  x3[n*j+i].fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
140  } \
141  } \
142  \
143  Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
144  bool success1 = checkFadArrays( \
145  x3, x2, std::string(#FAD)+" Gather All", out); \
146  success1 = checkResultOnAllProcs(*comm, out, success1); \
147  \
148  Teuchos::gatherAll(*comm, fts, n, &x[0], N, &x4[0]); \
149  bool success2 = checkFadArrays( \
150  x3, x4, std::string(#FAD)+" Gather All FTS", out); \
151  success2 = checkResultOnAllProcs(*comm, out, success2); \
152  \
153  success = success1 && success2; \
154 } \
155  \
156 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SumAll ) { \
157  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
158  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
159  \
160  int n = 7; \
161  int p = 5; \
162  int num_proc = comm->getSize(); \
163  ValueTypeSerializer<int,FadType> fts( \
164  rcp(new ValueTypeSerializer<int,double>), p); \
165  \
166  Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
167  for (int i=0; i<n; i++) { \
168  x[i] = FadType(p, 1.0*(i+1)); \
169  for (int j=0; j<p; j++) \
170  x[i].fastAccessDx(j) = 2.0*(i+1); \
171  } \
172  for (int i=0; i<n; i++) { \
173  sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
174  for (int j=0; j<p; j++) \
175  sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
176  } \
177  for (int i=0; i<n; i++) { \
178  sums2[i] = FadType(p, 0.0); \
179  } \
180  \
181  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
182  bool success1 = checkFadArrays( \
183  sums, sums2, std::string(#FAD)+" Sum All", out); \
184  success1 = checkResultOnAllProcs(*comm, out, success1); \
185  \
186  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
187  bool success2 = checkFadArrays( \
188  sums, sums3, std::string(#FAD)+" Sum All FTS", out); \
189  success2 = checkResultOnAllProcs(*comm, out, success2); \
190  \
191  success = success1 && success2; \
192 } \
193  \
194 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MaxAll ) { \
195  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
196  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
197  \
198  int n = 7; \
199  int p = 5; \
200  int rank = comm->getRank(); \
201  int num_proc = comm->getSize(); \
202  ValueTypeSerializer<int,FadType> fts( \
203  rcp(new ValueTypeSerializer<int,double>), p); \
204  \
205  Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
206  for (int i=0; i<n; i++) { \
207  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
208  for (int j=0; j<p; j++) \
209  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
210  } \
211  for (int i=0; i<n; i++) { \
212  maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
213  for (int j=0; j<p; j++) \
214  maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
215  } \
216  for (int i=0; i<n; i++) { \
217  maxs2[i] = FadType(p, 0.0); \
218  } \
219  \
220  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
221  bool success1 = checkFadArrays( \
222  maxs, maxs2, std::string(#FAD)+" Max All", out); \
223  success1 = checkResultOnAllProcs(*comm, out, success1); \
224  \
225  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
226  bool success2 = checkFadArrays( \
227  maxs, maxs3, std::string(#FAD)+" Max All FTS", out); \
228  success2 = checkResultOnAllProcs(*comm, out, success2); \
229  \
230  success = success1 && success2; \
231 } \
232  \
233 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MinAll ) { \
234  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
235  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
236  \
237  int n = 7; \
238  int p = 5; \
239  int rank = comm->getRank(); \
240  ValueTypeSerializer<int,FadType> fts( \
241  rcp(new ValueTypeSerializer<int,double>), p); \
242  \
243  Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
244  for (int i=0; i<n; i++) { \
245  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
246  for (int j=0; j<p; j++) \
247  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
248  } \
249  for (int i=0; i<n; i++) { \
250  mins[i] = FadType(p, 1.0*(i+1)); \
251  for (int j=0; j<p; j++) \
252  mins[i].fastAccessDx(j) = 2.0*(i+1); \
253  } \
254  for (int i=0; i<n; i++) { \
255  mins2[i] = FadType(p, 0.0); \
256  } \
257  \
258  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
259  bool success1 = checkFadArrays( \
260  mins, mins2, std::string(#FAD)+" Min All", out); \
261  success1 = checkResultOnAllProcs(*comm, out, success1); \
262  \
263  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
264  bool success2 = checkFadArrays( \
265  mins, mins3, std::string(#FAD)+" Min All FTS", out); \
266  success2 = checkResultOnAllProcs(*comm, out, success2); \
267  \
268  success = success1 && success2; \
269 } \
270  \
271 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanSum ) { \
272  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
273  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
274  \
275  int n = 7; \
276  int p = 5; \
277  int rank = comm->getRank(); \
278  ValueTypeSerializer<int,FadType> fts( \
279  rcp(new ValueTypeSerializer<int,double>), p); \
280  \
281  Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
282  for (int i=0; i<n; i++) { \
283  x[i] = FadType(p, 1.0*(i+1)); \
284  for (int j=0; j<p; j++) \
285  x[i].fastAccessDx(j) = 2.0*(i+1); \
286  } \
287  for (int i=0; i<n; i++) { \
288  sums[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
289  for (int j=0; j<p; j++) \
290  sums[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
291  } \
292  for (int i=0; i<n; i++) { \
293  sums2[i] = FadType(p, 0.0); \
294  } \
295  \
296  Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
297  bool success1 = checkFadArrays( \
298  sums, sums2, std::string(#FAD)+" Scan Sum", out); \
299  success1 = checkResultOnAllProcs(*comm, out, success1); \
300  \
301  Teuchos::scan(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
302  bool success2 = checkFadArrays( \
303  sums, sums3, std::string(#FAD)+" Scan Sum FTS", out); \
304  success2 = checkResultOnAllProcs(*comm, out, success2); \
305  \
306  success = success1 && success2; \
307 } \
308  \
309 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMax ) { \
310  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
311  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
312  \
313  int n = 7; \
314  int p = 5; \
315  int rank = comm->getRank(); \
316  ValueTypeSerializer<int,FadType> fts( \
317  rcp(new ValueTypeSerializer<int,double>), p); \
318  \
319  Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
320  for (int i=0; i<n; i++) { \
321  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
322  for (int j=0; j<p; j++) \
323  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
324  } \
325  for (int i=0; i<n; i++) { \
326  maxs[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
327  for (int j=0; j<p; j++) \
328  maxs[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
329  } \
330  for (int i=0; i<n; i++) { \
331  maxs2[i] = FadType(p, 0.0); \
332  } \
333  \
334  Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
335  bool success1 = checkFadArrays( \
336  maxs, maxs2, std::string(#FAD)+" Scan Max", out); \
337  success1 = checkResultOnAllProcs(*comm, out, success1); \
338  \
339  Teuchos::scan(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
340  bool success2 = checkFadArrays( \
341  maxs, maxs3, std::string(#FAD)+" Scan Max FTS", out); \
342  success2 = checkResultOnAllProcs(*comm, out, success2); \
343  \
344  success = success1 && success2; \
345 } \
346  \
347 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMin ) { \
348  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
349  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
350  \
351  int n = 7; \
352  int p = 5; \
353  int rank = comm->getRank(); \
354  ValueTypeSerializer<int,FadType> fts( \
355  rcp(new ValueTypeSerializer<int,double>), p); \
356  \
357  Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
358  for (int i=0; i<n; i++) { \
359  x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
360  for (int j=0; j<p; j++) \
361  x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
362  } \
363  for (int i=0; i<n; i++) { \
364  mins[i] = FadType(p, 1.0*(i+1)); \
365  for (int j=0; j<p; j++) \
366  mins[i].fastAccessDx(j) = 2.0*(i+1); \
367  } \
368  for (int i=0; i<n; i++) { \
369  mins2[i] = FadType(p, 0.0); \
370  } \
371  \
372  Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
373  bool success1 = checkFadArrays( \
374  mins, mins2, std::string(#FAD)+" Scan Min", out); \
375  success1 = checkResultOnAllProcs(*comm, out, success1); \
376  \
377  Teuchos::scan(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
378  bool success2 = checkFadArrays( \
379  mins, mins3, std::string(#FAD)+" Scan Min FTS", out); \
380  success2 = checkResultOnAllProcs(*comm, out, success2); \
381  \
382  success = success1 && success2; \
383 } \
384  \
385 TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SendReceive ) { \
386  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
387  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
388  \
389  int num_proc = comm->getSize(); \
390  if (num_proc > 1) { \
391  int rank = comm->getRank(); \
392  int n = 7; \
393  int p = 5; \
394  ValueTypeSerializer<int,FadType> fts( \
395  rcp(new ValueTypeSerializer<int,double>), p); \
396  \
397  Teuchos::Array<FadType> x(n), x2(n), x3(n); \
398  for (int i=0; i<n; i++) { \
399  x[i] = FadType(p, 1.0*(i+1)); \
400  for (int j=0; j<p; j++) \
401  x[i].fastAccessDx(j) = 2.0*(i+1)*(j+1); \
402  } \
403  for (int i=0; i<n; i++) { \
404  x2[i] = FadType(p, 0.0); \
405  } \
406  if (rank != 1) { \
407  x2 = x; \
408  x3 = x; \
409  } \
410  \
411  if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
412  if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
413  bool success1 = checkFadArrays( \
414  x, x2, std::string(#FAD)+" Send/Receive", out); \
415  success1 = checkResultOnAllProcs(*comm, out, success1); \
416  \
417  if (rank == 0) Teuchos::send(*comm, fts, n, &x[0], 1); \
418  if (rank == 1) Teuchos::receive(*comm, fts, 0, n, &x3[0]); \
419  bool success2 = checkFadArrays( \
420  x, x3, std::string(#FAD)+" Send/Receive FTS", out); \
421  success2 = checkResultOnAllProcs(*comm, out, success2); \
422  \
423  success = success1 && success2; \
424  } \
425  else \
426  success = true; \
427 } \
428  \
429 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_Broadcast ) { \
430  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
431  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
432  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
433  \
434  int n = 7; \
435  int p1 = 5; \
436  int p2 = 5; \
437  RCP< ValueTypeSerializer<int,FadType> > fts = \
438  rcp(new ValueTypeSerializer<int,FadType>( \
439  rcp(new ValueTypeSerializer<int,double>), p1)); \
440  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
441  \
442  Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
443  for (int i=0; i<n; i++) { \
444  FadType f(p1, rnd.number()); \
445  for (int k=0; k<p1; k++) \
446  f.fastAccessDx(k) = rnd.number(); \
447  x[i] = FadFadType(p2, f); \
448  for (int j=0; j<p2; j++) { \
449  FadType g(p1, rnd.number()); \
450  for (int k=0; k<p1; k++) \
451  g.fastAccessDx(k) = rnd.number(); \
452  x[i].fastAccessDx(j) = g; \
453  } \
454  } \
455  for (int i=0; i<n; i++) { \
456  x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
457  for (int j=0; j<p2; j++) \
458  x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
459  } \
460  if (comm->getRank() == 0) { \
461  x2 = x; \
462  x3 = x; \
463  } \
464  \
465  Teuchos::broadcast(*comm, 0, n, &x2[0]); \
466  bool success1 = checkFadArrays( \
467  x, x2, std::string(#FAD)+"<"+#FAD+"> Broadcast", out); \
468  success1 = checkResultOnAllProcs(*comm, out, success1); \
469  \
470  Teuchos::broadcast(*comm, ffts, 0, n, &x3[0]); \
471  bool success2 = checkFadArrays( \
472  x, x3, std::string(#FAD)+"<"+#FAD+"> Broadcast FTS", out); \
473  success2 = checkResultOnAllProcs(*comm, out, success2); \
474  \
475  success = success1 && success2; \
476 } \
477  \
478 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_GatherAll ) { \
479  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
480  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
481  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
482  \
483  int n = 7; \
484  int p1 = 5; \
485  int p2 = 5; \
486  int size = comm->getSize(); \
487  int rank = comm->getRank(); \
488  int N = n*size; \
489  RCP< ValueTypeSerializer<int,FadType> > fts = \
490  rcp(new ValueTypeSerializer<int,FadType>( \
491  rcp(new ValueTypeSerializer<int,double>), p1)); \
492  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
493  \
494  Teuchos::Array<FadFadType> x(n), x2(N), x3(N), x4(N); \
495  for (int i=0; i<n; i++) { \
496  FadType f(p1, (rank+1)*(i+1)); \
497  for (int k=0; k<p1; k++) \
498  f.fastAccessDx(k) = (rank+1)*(i+1)*(k+1); \
499  x[i] = FadFadType(p2, f); \
500  for (int j=0; j<p2; j++) { \
501  x[i].fastAccessDx(j) = f; \
502  } \
503  } \
504  for (int i=0; i<N; i++) { \
505  x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
506  for (int j=0; j<p2; j++) \
507  x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
508  } \
509  for (int j=0; j<size; j++) { \
510  for (int i=0; i<n; i++) { \
511  FadType f(p1, (j+1)*(i+1)); \
512  for (int k=0; k<p1; k++) \
513  f.fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
514  x3[n*j+i] = FadFadType(p2, f); \
515  for (int k=0; k<p2; k++) \
516  x3[n*j+i].fastAccessDx(k) = f; \
517  } \
518  } \
519  \
520  Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
521  bool success1 = checkFadArrays( \
522  x3, x2, std::string(#FAD)+"<"+#FAD+"> Gather All", out); \
523  success1 = checkResultOnAllProcs(*comm, out, success1); \
524  \
525  Teuchos::gatherAll(*comm, ffts, n, &x[0], N, &x4[0]); \
526  bool success2 = checkFadArrays( \
527  x3, x4, std::string(#FAD)+"<"+#FAD+"> Gather All FTS", out); \
528  success2 = checkResultOnAllProcs(*comm, out, success2); \
529  \
530  success = success1 && success2; \
531 } \
532  \
533 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SumAll ) { \
534  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
535  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
536  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
537  \
538  int n = 7; \
539  int p1 = 5; \
540  int p2 = 5; \
541  int num_proc = comm->getSize(); \
542  RCP< ValueTypeSerializer<int,FadType> > fts = \
543  rcp(new ValueTypeSerializer<int,FadType>( \
544  rcp(new ValueTypeSerializer<int,double>), p1)); \
545  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
546  \
547  Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
548  for (int i=0; i<n; i++) { \
549  FadType f(p1, 1.0*(i+1)); \
550  for (int k=0; k<p1; k++) \
551  f.fastAccessDx(k) = 2.0*(i+1); \
552  x[i] = FadFadType(p2, f); \
553  for (int j=0; j<p2; j++) { \
554  x[i].fastAccessDx(j) = f; \
555  } \
556  } \
557  for (int i=0; i<n; i++) { \
558  FadType f(p1, 1.0*(i+1)*num_proc); \
559  for (int k=0; k<p1; k++) \
560  f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
561  sums[i] = FadFadType(p2, f); \
562  for (int j=0; j<p2; j++) \
563  sums[i].fastAccessDx(j) = f; \
564  } \
565  for (int i=0; i<n; i++) { \
566  sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
567  for (int j=0; j<p2; j++) \
568  sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
569  } \
570  \
571  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
572  bool success1 = checkFadArrays( \
573  sums, sums2, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
574  success1 = checkResultOnAllProcs(*comm, out, success1); \
575  \
576  Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
577  bool success2 = checkFadArrays( \
578  sums, sums3, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
579  success2 = checkResultOnAllProcs(*comm, out, success2); \
580  \
581  success = success1 && success2; \
582 } \
583  \
584 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MaxAll ) { \
585  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
586  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
587  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
588  \
589  int n = 7; \
590  int p1 = 5; \
591  int p2 = 5; \
592  int rank = comm->getRank(); \
593  int num_proc = comm->getSize(); \
594  RCP< ValueTypeSerializer<int,FadType> > fts = \
595  rcp(new ValueTypeSerializer<int,FadType>( \
596  rcp(new ValueTypeSerializer<int,double>), p1)); \
597  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
598  \
599  Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
600  for (int i=0; i<n; i++) { \
601  FadType f(p1, 1.0*(i+1)*(rank+1)); \
602  for (int k=0; k<p1; k++) \
603  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
604  x[i] = FadFadType(p2, f); \
605  for (int j=0; j<p2; j++) { \
606  x[i].fastAccessDx(j) = f; \
607  } \
608  } \
609  for (int i=0; i<n; i++) { \
610  FadType f(p1, 1.0*(i+1)*num_proc); \
611  for (int k=0; k<p1; k++) \
612  f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
613  maxs[i] = FadFadType(p2, f); \
614  for (int j=0; j<p2; j++) \
615  maxs[i].fastAccessDx(j) = f; \
616  } \
617  for (int i=0; i<n; i++) { \
618  maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
619  for (int j=0; j<p2; j++) \
620  maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
621  } \
622  \
623  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
624  bool success1 = checkFadArrays( \
625  maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Max All", out); \
626  success1 = checkResultOnAllProcs(*comm, out, success1); \
627  \
628  Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
629  bool success2 = checkFadArrays( \
630  maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Max All FTS", out); \
631  success2 = checkResultOnAllProcs(*comm, out, success2); \
632  \
633  success = success1 && success2; \
634 } \
635  \
636 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MinAll ) { \
637  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
638  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
639  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
640  \
641  int n = 7; \
642  int p1 = 5; \
643  int p2 = 5; \
644  int rank = comm->getRank(); \
645  RCP< ValueTypeSerializer<int,FadType> > fts = \
646  rcp(new ValueTypeSerializer<int,FadType>( \
647  rcp(new ValueTypeSerializer<int,double>), p1)); \
648  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
649  \
650  Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
651  for (int i=0; i<n; i++) { \
652  FadType f(p1, 1.0*(i+1)*(rank+1)); \
653  for (int k=0; k<p1; k++) \
654  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
655  x[i] = FadFadType(p2, f); \
656  for (int j=0; j<p2; j++) { \
657  x[i].fastAccessDx(j) = f; \
658  } \
659  } \
660  for (int i=0; i<n; i++) { \
661  FadType f(p1, 1.0*(i+1)); \
662  for (int k=0; k<p1; k++) \
663  f.fastAccessDx(k) = 2.0*(i+1); \
664  mins[i] = FadFadType(p2, f); \
665  for (int j=0; j<p2; j++) \
666  mins[i].fastAccessDx(j) = f; \
667  } \
668  for (int i=0; i<n; i++) { \
669  mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
670  for (int j=0; j<p2; j++) \
671  mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
672  } \
673  \
674  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
675  bool success1 = checkFadArrays( \
676  mins, mins2, std::string(#FAD)+"<"+#FAD+"> Min All", out); \
677  success1 = checkResultOnAllProcs(*comm, out, success1); \
678  \
679  Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
680  bool success2 = checkFadArrays( \
681  mins, mins3, std::string(#FAD)+"<"+#FAD+"> Min All FTS", out); \
682  success2 = checkResultOnAllProcs(*comm, out, success2); \
683  \
684  success = success1 && success2; \
685 } \
686  \
687 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanSum ) { \
688  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
689  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
690  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
691  \
692  int n = 7; \
693  int p1 = 5; \
694  int p2 = 5; \
695  int rank = comm->getRank(); \
696  RCP< ValueTypeSerializer<int,FadType> > fts = \
697  rcp(new ValueTypeSerializer<int,FadType>( \
698  rcp(new ValueTypeSerializer<int,double>), p1)); \
699  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
700  \
701  Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
702  for (int i=0; i<n; i++) { \
703  FadType f(p1, 1.0*(i+1)); \
704  for (int k=0; k<p1; k++) \
705  f.fastAccessDx(k) = 2.0*(i+1); \
706  x[i] = FadFadType(p2, f); \
707  for (int j=0; j<p2; j++) { \
708  x[i].fastAccessDx(j) = f; \
709  } \
710  } \
711  for (int i=0; i<n; i++) { \
712  FadType f(p1, 1.0*(i+1)*(rank+1)); \
713  for (int k=0; k<p1; k++) \
714  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
715  sums[i] = FadFadType(p2, f); \
716  for (int j=0; j<p2; j++) \
717  sums[i].fastAccessDx(j) = f; \
718  } \
719  for (int i=0; i<n; i++) { \
720  sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
721  for (int j=0; j<p2; j++) \
722  sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
723  } \
724  \
725  Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
726  bool success1 = checkFadArrays( \
727  sums, sums2, std::string(#FAD)+"<"+#FAD+"> Scan Sum", out); \
728  success1 = checkResultOnAllProcs(*comm, out, success1); \
729  \
730  Teuchos::scan(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
731  bool success2 = checkFadArrays( \
732  sums, sums3, std::string(#FAD)+"<"+#FAD+"> Scan Sum FTS", out); \
733  success2 = checkResultOnAllProcs(*comm, out, success2); \
734  \
735  success = success1 && success2; \
736 } \
737  \
738 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMax ) { \
739  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
740  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
741  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
742  \
743  int n = 7; \
744  int p1 = 5; \
745  int p2 = 5; \
746  int rank = comm->getRank(); \
747  RCP< ValueTypeSerializer<int,FadType> > fts = \
748  rcp(new ValueTypeSerializer<int,FadType>( \
749  rcp(new ValueTypeSerializer<int,double>), p1)); \
750  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
751  \
752  Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
753  for (int i=0; i<n; i++) { \
754  FadType f(p1, 1.0*(i+1)*(rank+1)); \
755  for (int k=0; k<p1; k++) \
756  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
757  x[i] = FadFadType(p2, f); \
758  for (int j=0; j<p2; j++) { \
759  x[i].fastAccessDx(j) = f; \
760  } \
761  } \
762  for (int i=0; i<n; i++) { \
763  FadType f(p1, 1.0*(i+1)*(rank+1)); \
764  for (int k=0; k<p1; k++) \
765  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
766  maxs[i] = FadFadType(p2, f); \
767  for (int j=0; j<p2; j++) \
768  maxs[i].fastAccessDx(j) = f; \
769  } \
770  for (int i=0; i<n; i++) { \
771  maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
772  for (int j=0; j<p2; j++) \
773  maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
774  } \
775  \
776  Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
777  bool success1 = checkFadArrays( \
778  maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Scan Max", out); \
779  success1 = checkResultOnAllProcs(*comm, out, success1); \
780  \
781  Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
782  bool success2 = checkFadArrays( \
783  maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Scan Max FTS", out); \
784  success2 = checkResultOnAllProcs(*comm, out, success2); \
785  \
786  success = success1 && success2; \
787 } \
788  \
789 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMin ) { \
790  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
791  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
792  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
793  \
794  int n = 7; \
795  int p1 = 5; \
796  int p2 = 5; \
797  int rank = comm->getRank(); \
798  RCP< ValueTypeSerializer<int,FadType> > fts = \
799  rcp(new ValueTypeSerializer<int,FadType>( \
800  rcp(new ValueTypeSerializer<int,double>), p1)); \
801  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
802  \
803  Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
804  for (int i=0; i<n; i++) { \
805  FadType f(p1, 1.0*(i+1)*(rank+1)); \
806  for (int k=0; k<p1; k++) \
807  f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
808  x[i] = FadFadType(p2, f); \
809  for (int j=0; j<p2; j++) { \
810  x[i].fastAccessDx(j) = f; \
811  } \
812  } \
813  for (int i=0; i<n; i++) { \
814  FadType f(p1, 1.0*(i+1)); \
815  for (int k=0; k<p1; k++) \
816  f.fastAccessDx(k) = 2.0*(i+1); \
817  mins[i] = FadFadType(p2, f); \
818  for (int j=0; j<p2; j++) \
819  mins[i].fastAccessDx(j) = f; \
820  } \
821  for (int i=0; i<n; i++) { \
822  mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
823  for (int j=0; j<p2; j++) \
824  mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
825  } \
826  \
827  Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
828  bool success1 = checkFadArrays( \
829  mins, mins2, std::string(#FAD)+"<"+#FAD+"> Scan Min", out); \
830  success1 = checkResultOnAllProcs(*comm, out, success1); \
831  \
832  Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
833  bool success2 = checkFadArrays( \
834  mins, mins3, std::string(#FAD)+"<"+#FAD+"> Scan Min FTS", out); \
835  success2 = checkResultOnAllProcs(*comm, out, success2); \
836  \
837  success = success1 && success2; \
838 } \
839  \
840 TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SendReceive ) { \
841  typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
842  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
843  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
844  \
845  int num_proc = comm->getSize(); \
846  if (num_proc > 1) { \
847  int rank = comm->getRank(); \
848  int n = 7; \
849  int p1 = 5; \
850  int p2 = 5; \
851  RCP< ValueTypeSerializer<int,FadType> > fts = \
852  rcp(new ValueTypeSerializer<int,FadType>( \
853  rcp(new ValueTypeSerializer<int,double>), p1)); \
854  ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
855  \
856  Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
857  for (int i=0; i<n; i++) { \
858  FadType f(p1, 1.0*(i+1)); \
859  for (int k=0; k<p1; k++) \
860  f.fastAccessDx(k) = 2.0*(i+1)*(k+1); \
861  x[i] = FadFadType(p2, f); \
862  for (int j=0; j<p2; j++) \
863  x[i].fastAccessDx(j) = f; \
864  } \
865  for (int i=0; i<n; i++) { \
866  x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
867  for (int j=0; j<p2; j++) \
868  x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
869  } \
870  if (rank != 1) { \
871  x2 = x; \
872  x3 = x; \
873  } \
874  \
875  if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
876  if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
877  bool success1 = checkFadArrays( \
878  x, x2, std::string(#FAD)+"<"+#FAD+"> Send/Receive", out); \
879  success1 = checkResultOnAllProcs(*comm, out, success1); \
880  \
881  if (rank == 0) Teuchos::send(*comm, ffts, n, &x[0], 1); \
882  if (rank == 1) Teuchos::receive(*comm, ffts, 0, n, &x3[0]); \
883  bool success2 = checkFadArrays( \
884  x, x3, std::string(#FAD)+"<"+#FAD+"> Send/Receive FTS", out); \
885  success2 = checkResultOnAllProcs(*comm, out, success2); \
886  \
887  success = success1 && success2; \
888  } \
889  else \
890  success = true; \
891 }
892 
893 #if defined(HAVE_SACADO_KOKKOS) && defined(HAVE_SACADO_TEUCHOSKOKKOSCOMM)
894 
895 #include "Kokkos_Core.hpp"
896 
897 #define FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Device) \
898 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_Broadcast ) { \
899  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
900  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
901  \
902  \
903  int n = 7; \
904  int p = 5; \
905  ValueTypeSerializer<int,FadType> fts( \
906  rcp(new ValueTypeSerializer<int,double>), p); \
907  \
908  typedef Kokkos::View<FadType*,Device> ViewType; \
909  typedef ViewType::HostMirror HostViewType; \
910  ViewType x("x",n,p+1), x2("x2",n,p+1), x3("x3",n,p+1); \
911  HostViewType h_x = Kokkos::create_mirror_view(x); \
912  HostViewType h_x2 = Kokkos::create_mirror_view(x2); \
913  HostViewType h_x3 = Kokkos::create_mirror_view(x3); \
914  for (int i=0; i<n; i++) { \
915  h_x[i] = FadType(p, rnd.number()); \
916  for (int j=0; j<p; j++) \
917  h_x[i].fastAccessDx(j) = rnd.number(); \
918  } \
919  for (int i=0; i<n; i++) { \
920  h_x2[i] = FadType(p, 0.0); \
921  } \
922  Kokkos::deep_copy(x, h_x); \
923  Kokkos::deep_copy(x2, h_x2); \
924  if (comm->getRank() == 0) { \
925  x2 = x; \
926  x3 = x; \
927  h_x2 = h_x; \
928  h_x3 = h_x; \
929  } \
930  \
931  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
932  /* so only do the communication on the host. This probably makes */ \
933  /* the deep copy unnecessary. */ \
934  const bool accessible = \
935  Kokkos::Impl::MemorySpaceAccess< \
936  Kokkos::HostSpace, \
937  typename Device::memory_space >::accessible; \
938  if (accessible) { \
939  Teuchos::broadcast(*comm, 0, n, x2); \
940  Kokkos::deep_copy(h_x2, x2); \
941  } \
942  else \
943  Teuchos::broadcast(*comm, 0, n, h_x2); \
944  bool success1 = checkFadArrays( \
945  h_x, h_x2, std::string(#FAD)+" Broadcast", out); \
946  success1 = checkResultOnAllProcs(*comm, out, success1); \
947  \
948  if (accessible) { \
949  Teuchos::broadcast(*comm, fts, 0, n, x3); \
950  Kokkos::deep_copy(h_x3, x3); \
951  } \
952  else \
953  Teuchos::broadcast(*comm, fts, 0, n, h_x3); \
954  bool success2 = checkFadArrays( \
955  h_x, h_x3, std::string(#FAD)+" Broadcast FTS", out); \
956  success2 = checkResultOnAllProcs(*comm, out, success2); \
957  \
958  success = success1 && success2; \
959 } \
960 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_SumAll ) { \
961  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
962  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
963  \
964  \
965  int n = 7; \
966  int p = 5; \
967  int num_proc = comm->getSize(); \
968  ValueTypeSerializer<int,FadType> fts( \
969  rcp(new ValueTypeSerializer<int,double>), p); \
970  \
971  typedef Kokkos::View<FadType*,Device> ViewType; \
972  typedef ViewType::HostMirror HostViewType; \
973  ViewType x("x",n,p+1), sums("sums",n,p+1), \
974  sums2("sums2",n,p+1), sums3("sums3",n,p+1); \
975  HostViewType h_x = Kokkos::create_mirror_view(x); \
976  HostViewType h_sums = Kokkos::create_mirror_view(sums); \
977  HostViewType h_sums2 = Kokkos::create_mirror_view(sums2); \
978  HostViewType h_sums3 = Kokkos::create_mirror_view(sums3); \
979  for (int i=0; i<n; i++) { \
980  h_x[i] = FadType(p, 1.0*(i+1)); \
981  for (int j=0; j<p; j++) \
982  h_x[i].fastAccessDx(j) = 2.0*(i+1); \
983  } \
984  for (int i=0; i<n; i++) { \
985  h_sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
986  for (int j=0; j<p; j++) \
987  h_sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
988  } \
989  for (int i=0; i<n; i++) { \
990  h_sums2[i] = FadType(p, 0.0); \
991  } \
992  Kokkos::deep_copy(x, h_x); \
993  Kokkos::deep_copy(sums, h_sums); \
994  Kokkos::deep_copy(sums2, h_sums2); \
995  \
996  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
997  /* so only do the communication on the host. This probably makes */ \
998  /* the deep copy unnecessary. */ \
999  const bool accessible = \
1000  Kokkos::Impl::MemorySpaceAccess< \
1001  Kokkos::HostSpace, \
1002  typename Device::memory_space >::accessible; \
1003  if (accessible) { \
1004  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, x, sums2); \
1005  Kokkos::deep_copy(h_sums2, sums2); \
1006  } \
1007  else \
1008  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, h_x, h_sums2); \
1009  bool success1 = checkFadArrays( \
1010  h_sums, h_sums2, std::string(#FAD)+" Sum All", out); \
1011  success1 = checkResultOnAllProcs(*comm, out, success1); \
1012  \
1013  if (accessible) { \
1014  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, x, sums3); \
1015  Kokkos::deep_copy(h_sums3, sums3); \
1016  } \
1017  else \
1018  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, h_x, h_sums3); \
1019  bool success2 = checkFadArrays( \
1020  h_sums, h_sums3, std::string(#FAD)+" Sum All FTS", out); \
1021  success2 = checkResultOnAllProcs(*comm, out, success2); \
1022  success = success1 && success2; \
1023  \
1024 } \
1025 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MaxAll ) { \
1026  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1027  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1028  \
1029  \
1030  int n = 7; \
1031  int p = 5; \
1032  int rank = comm->getRank(); \
1033  int num_proc = comm->getSize(); \
1034  ValueTypeSerializer<int,FadType> fts( \
1035  rcp(new ValueTypeSerializer<int,double>), p); \
1036  \
1037  typedef Kokkos::View<FadType*,Device> ViewType; \
1038  typedef ViewType::HostMirror HostViewType; \
1039  ViewType x("x",n,p+1), maxs("maxs",n,p+1), \
1040  maxs2("maxs2",n,p+1), maxs3("maxs3",n,p+1); \
1041  HostViewType h_x = Kokkos::create_mirror_view(x); \
1042  HostViewType h_maxs = Kokkos::create_mirror_view(maxs); \
1043  HostViewType h_maxs2 = Kokkos::create_mirror_view(maxs2); \
1044  HostViewType h_maxs3 = Kokkos::create_mirror_view(maxs3); \
1045  for (int i=0; i<n; i++) { \
1046  h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1047  for (int j=0; j<p; j++) \
1048  h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1049  } \
1050  for (int i=0; i<n; i++) { \
1051  h_maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
1052  for (int j=0; j<p; j++) \
1053  h_maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
1054  } \
1055  for (int i=0; i<n; i++) { \
1056  h_maxs2[i] = FadType(p, 0.0); \
1057  } \
1058  Kokkos::deep_copy(x, h_x); \
1059  Kokkos::deep_copy(maxs, h_maxs); \
1060  Kokkos::deep_copy(maxs2, h_maxs2); \
1061  \
1062  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1063  /* so only do the communication on the host. This probably makes */ \
1064  /* the deep copy unnecessary. */ \
1065  const bool accessible = \
1066  Kokkos::Impl::MemorySpaceAccess< \
1067  Kokkos::HostSpace, \
1068  typename Device::memory_space >::accessible; \
1069  if (accessible) { \
1070  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, x, maxs2); \
1071  Kokkos::deep_copy(h_maxs2, maxs2); \
1072  } \
1073  else \
1074  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, h_x, h_maxs2); \
1075  bool success1 = checkFadArrays( \
1076  h_maxs, h_maxs2, std::string(#FAD)+" Max All", out); \
1077  success1 = checkResultOnAllProcs(*comm, out, success1); \
1078  \
1079  if (accessible) { \
1080  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, x, maxs3); \
1081  Kokkos::deep_copy(h_maxs3, maxs3); \
1082  } \
1083  else \
1084  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, h_x, h_maxs3); \
1085  bool success2 = checkFadArrays( \
1086  h_maxs, h_maxs3, std::string(#FAD)+" Max All FTS", out); \
1087  success2 = checkResultOnAllProcs(*comm, out, success2); \
1088  success = success1 && success2; \
1089  \
1090 } \
1091 TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MinAll ) { \
1092  Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1093  comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1094  \
1095  \
1096  int n = 7; \
1097  int p = 5; \
1098  int rank = comm->getRank(); \
1099  ValueTypeSerializer<int,FadType> fts( \
1100  rcp(new ValueTypeSerializer<int,double>), p); \
1101  \
1102  typedef Kokkos::View<FadType*,Device> ViewType; \
1103  typedef ViewType::HostMirror HostViewType; \
1104  ViewType x("x",n,p+1), mins("mins",n,p+1), \
1105  mins2("mins2",n,p+1), mins3("mins3",n,p+1); \
1106  HostViewType h_x = Kokkos::create_mirror_view(x); \
1107  HostViewType h_mins = Kokkos::create_mirror_view(mins); \
1108  HostViewType h_mins2 = Kokkos::create_mirror_view(mins2); \
1109  HostViewType h_mins3 = Kokkos::create_mirror_view(mins3); \
1110  for (int i=0; i<n; i++) { \
1111  h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1112  for (int j=0; j<p; j++) \
1113  h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1114  } \
1115  for (int i=0; i<n; i++) { \
1116  h_mins[i] = FadType(p, 1.0*(i+1)); \
1117  for (int j=0; j<p; j++) \
1118  h_mins[i].fastAccessDx(j) = 2.0*(i+1); \
1119  } \
1120  for (int i=0; i<n; i++) { \
1121  h_mins2[i] = FadType(p, 0.0); \
1122  } \
1123  Kokkos::deep_copy(x, h_x); \
1124  Kokkos::deep_copy(mins, h_mins); \
1125  Kokkos::deep_copy(mins2, h_mins2); \
1126  \
1127  /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1128  /* so only do the communication on the host. This probably makes */ \
1129  /* the deep copy unnecessary. */ \
1130  const bool accessible = \
1131  Kokkos::Impl::MemorySpaceAccess< \
1132  Kokkos::HostSpace, \
1133  typename Device::memory_space >::accessible; \
1134  if (accessible) { \
1135  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, x, mins2); \
1136  Kokkos::deep_copy(h_mins2, mins2); \
1137  } \
1138  else \
1139  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, h_x, h_mins2); \
1140  bool success1 = checkFadArrays( \
1141  h_mins, h_mins2, std::string(#FAD)+" Min All", out); \
1142  success1 = checkResultOnAllProcs(*comm, out, success1); \
1143  \
1144  if (accessible) { \
1145  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, x, mins3); \
1146  Kokkos::deep_copy(h_mins3, mins3); \
1147  } \
1148  else \
1149  Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, h_x, h_mins3); \
1150  bool success2 = checkFadArrays( \
1151  h_mins, h_mins3, std::string(#FAD)+" Min All FTS", out); \
1152  success2 = checkResultOnAllProcs(*comm, out, success2); \
1153  success = success1 && success2; \
1154  \
1155 }
1156 
1157 #ifdef KOKKOS_ENABLE_OPENMP
1158 #define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1159  using Kokkos::OpenMP; \
1160  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, OpenMP)
1161 #else
1162 #define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD)
1163 #endif
1164 
1165 #ifdef KOKKOS_ENABLE_THREADS
1166 #define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1167  using Kokkos::Threads; \
1168  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Threads)
1169 #else
1170 #define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD)
1171 #endif
1172 
1173 #ifdef KOKKOS_ENABLE_CUDA
1174 #define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1175  using Kokkos::Cuda; \
1176  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Cuda)
1177 #else
1178 #define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD)
1179 #endif
1180 
1181 #ifdef KOKKOS_ENABLE_HIP
1182 #define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD) \
1183  using Kokkos::HIP; \
1184  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, HIP)
1185 #else
1186 #define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD)
1187 #endif
1188 
1189 #ifdef KOKKOS_ENABLE_SERIAL
1190 #define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD) \
1191  using Kokkos::Serial; \
1192  FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Serial)
1193 #else
1194 #define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1195 #endif
1196 
1197 #define FAD_KOKKOS_COMM_TESTS(FadType, FAD) \
1198  FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1199  FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1200  FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1201  FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1202 
1203 #else
1204 
1205 #define FAD_KOKKOS_COMM_TESTS(FadType, FAD)
1206 
1207 #endif
1208 
1209 #define FAD_COMM_TESTS(FadType, FAD) \
1210  FAD_BASE_COMM_TESTS(FadType, FAD)
Sacado::Fad::DFad< double > FadType
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
bool checkFadArrays(const ArrayType &x, const ArrayType &x2, const std::string &tag, Teuchos::FancyOStream &out)
bool checkResultOnAllProcs(const Teuchos::Comm< Ordinal > &comm, Teuchos::FancyOStream &out, const bool result)
static SACADO_INLINE_FUNCTION bool eval(const T &x, const T &y)
int Ordinal