Teuchos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RCP_Performance_UnitTests.cpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Teuchos: Common Tools Package
6 // Copyright (2004) Sandia Corporation
7 //
8 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9 // license for use of this work by or on behalf of the U.S. Government.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 // @HEADER
42 */
43 
45 #include "Teuchos_RCP.hpp"
47 #ifdef HAVE_TEUCHOSCORE_CXX11
48 # include <memory>
49 #endif // HAVE_TEUCHOSCORE_CXX11
50 
51 namespace {
52 
53 
54 using Teuchos::null;
55 using Teuchos::RCP;
56 using Teuchos::rcp;
58 
59 
60 double relCpuSpeed = 1e-2;
61 int maxArraySize = 10000;
62 double maxRcpRawCreateDestroyRatio = 10.0;
63 double maxRcpRawAdjustRefCountRatio = 100.0;
64 #ifdef HAVE_TEUCHOSCORE_CXX11
65 double maxRcpSpAdjustRefCountRatio = 5.0;
66 #endif
67 double maxRcpRawObjAccessRatio = 13.5;
68 
69 const int intPrec = 8;
70 const int dblPrec = 6;
71 
72 
74 {
77  clp.setOption(
78  "rel-cpu-speed", &relCpuSpeed,
79  "The relative speed of the CPU (higher means the machine runs faster)"
80  );
81  clp.setOption(
82  "max-array-size", &maxArraySize,
83  "The maximum size of the arrays created"
84  );
85  clp.setOption(
86  "max-rcp-create-destroy-ratio", &maxRcpRawCreateDestroyRatio,
87  "The ratio of the final CPU time ratio of creating and destroying"
88  "std::vector<char>(size) objects wrapped in an RCP object versus"
89  "using just raw new and delete."
90  );
91  clp.setOption(
92  "max-rcp-raw-adjust-ref-count-ratio", &maxRcpRawAdjustRefCountRatio,
93  "The ratio of the final CPU time ratio for adjusting the reference"
94  "count of RCP objects versus a raw pointer."
95  );
96 #ifdef HAVE_TEUCHOSCORE_CXX11
97  clp.setOption(
98  "max-rcp-sp-adjust-ref-count-ratio", &maxRcpSpAdjustRefCountRatio,
99  "The ratio of the final CPU time ratio for adjusting the reference"
100  "count of RCP objects versus std::shared_ptr objects."
101  );
102 #endif
103  clp.setOption(
104  "max-rcp-raw-obj-access-ratio", &maxRcpRawObjAccessRatio,
105  "The ratio of the final CPU time ratio for accessing the object for RCP"
106  "versus a raw pointer."
107  );
108 
109 }
110 
111 
112 template<typename T>
113 struct DeleteDeleter {};
114 
115 
116 TEUCHOS_UNIT_TEST( RCP, _sizeofObjects )
117 {
118  out << "\nPrinting the size the RCP and RCPNodeImpl objects ...\n";
119  TEST_INEQUALITY_CONST(sizeof(bool), 0);
120  TEST_INEQUALITY_CONST(sizeof(double), 0);
121  TEST_INEQUALITY_CONST(sizeof(double*), 0);
122  TEST_INEQUALITY_CONST(sizeof(std::vector<double>), 0);
126  TEST_INEQUALITY_CONST(sizeof(Teuchos::RCP<std::vector<double> >), 0);
128  sizeof(Teuchos::RCPNodeTmpl<std::vector<double>,
129  Teuchos::DeallocDelete<std::vector<double> > >),
130  0);
131 }
132 
133 
134 TEUCHOS_UNIT_TEST( RCP, createDestroyOverhead )
135 {
136 
137  typedef Teuchos::TabularOutputter TO;
138 
139  const int maxLoopIters = 1000;
140  const double relTestCost = 1e-3;
141  const double numInnerLoops = relCpuSpeed / relTestCost;
142 
143  out << "\n"
144  << "Messuring the overhead of creating and destorying objects of different sizes\n"
145  << "using raw C++ pointers,"
146 #ifdef HAVE_TEUCHOSCORE_CXX11
147  << " shared_ptr,"
148 #endif
149  << " and using RCP.\n"
150  << "\n"
151  << "Number of loops = relCpuSpeed/relTestCost = "
152  << relCpuSpeed << "/" << relTestCost << " = " << numInnerLoops << "\n"
153  << "\n";
154 
155  TabularOutputter outputter(out);
156  outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
157  outputter.setFieldTypePrecision(TO::INT, intPrec);
158 
159  outputter.pushFieldSpec("obj size", TO::INT);
160  outputter.pushFieldSpec("num loops", TO::INT);
161  outputter.pushFieldSpec("raw", TO::DOUBLE);
162 #ifdef HAVE_TEUCHOSCORE_CXX11
163  outputter.pushFieldSpec("shared_ptr", TO::DOUBLE);
164 #endif
165  outputter.pushFieldSpec("RCP", TO::DOUBLE);
166 #ifdef HAVE_TEUCHOSCORE_CXX11
167  outputter.pushFieldSpec("shared_ptr/raw", TO::DOUBLE);
168 #endif
169  outputter.pushFieldSpec("RCP/raw", TO::DOUBLE);
170 
171  outputter.outputHeader();
172 
173  double finalRcpRawRatio = 100000.0;
174 
175  int arraySize = 1;
176  for (int test_case_k = 0;
177  test_case_k < maxLoopIters && arraySize <= maxArraySize;
178  ++test_case_k
179  )
180  {
181 
182  // obj size
183  outputter.outputField(arraySize);
184 
185  // num loops
186  const int numActualLoops =
187  TEUCHOS_MAX(
188  static_cast<int>(
189  (numInnerLoops / arraySize)
190  * std::log(static_cast<double>(arraySize+1))
191  ),
192  1
193  );
194  outputter.outputField(numActualLoops);
195 
196  // raw
197  {
198  std::vector<std::vector<char>*> p_raw_vec(numActualLoops);
199  int i = 0;
200  TEUCHOS_START_PERF_OUTPUT_TIMER(outputter, numActualLoops)
201  {
202  p_raw_vec[i] = new std::vector<char>(arraySize, 1);
203  delete p_raw_vec[i];
204  ++i;
205  }
206  }
207  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rawPtrTime);
208 
209 #ifdef HAVE_TEUCHOSCORE_CXX11
210  // shared_ptr
211  {
212  typedef std::shared_ptr<std::vector<char> > shared_ptr_t;
213  std::vector<shared_ptr_t > sp_vec(numActualLoops);
214  int i = 0;
215  TEUCHOS_START_PERF_OUTPUT_TIMER(outputter, numActualLoops)
216  {
217  sp_vec[i] = shared_ptr_t(new std::vector<char>(arraySize, 1));
218  sp_vec[i].reset();
219  ++i;
220  }
221  }
222  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, spTime);
223 #endif
224 
225  // RCP
226  {
227  std::vector<RCP<std::vector<char> > > p_vec(numActualLoops);
228  int i = 0;
229  TEUCHOS_START_PERF_OUTPUT_TIMER(outputter, numActualLoops)
230  {
231  p_vec[i] = rcp(new std::vector<char>(arraySize, 1));
232  p_vec[i] = null;
233  }
234  }
235  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rcpTime);
236 
237 #ifdef HAVE_TEUCHOSCORE_CXX11
238  // shared_ptr/rawPtr
239  const double spRatio = spTime / rawPtrTime;
240  outputter.outputField(spRatio);
241 #endif
242 
243  // RCP/rawPtr
244  const double rcpRatio = rcpTime / rawPtrTime;
245  outputter.outputField(rcpRatio);
246 
247  outputter.nextRow();
248 
249  arraySize *= 4;
250  finalRcpRawRatio = TEUCHOS_MIN(rcpRatio, finalRcpRawRatio);
251 
252  }
253 
254  out << "\n";
255  TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawCreateDestroyRatio );
256  out << "\n";
257 
258 }
259 
260 
261 TEUCHOS_UNIT_TEST( RCP, referenceCountManipulationOverhead )
262 {
263 
264  typedef Teuchos::TabularOutputter TO;
265 
266  const double relTestCost = 5e-3;
267  const int maxLoopIters = 1000;
268  const double numInnerLoops = relCpuSpeed / relTestCost;
269 
270  out << "\n"
271  << "Messuring the overhead of incrementing and deincrementing the reference count\n"
272  << "comparing RCP to raw pointer"
273 #ifdef HAVE_TEUCHOSCORE_CXX11
274  << " and std::shared_ptr"
275 #endif
276  << ".\n"
277  << "\n";
278 
279  TabularOutputter outputter(out);
280  outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
281  outputter.setFieldTypePrecision(TO::INT, intPrec);
282 
283  outputter.pushFieldSpec("array dim", TO::INT);
284  outputter.pushFieldSpec("num loops", TO::INT);
285  outputter.pushFieldSpec("raw", TO::DOUBLE);
286 #ifdef HAVE_TEUCHOSCORE_CXX11
287  outputter.pushFieldSpec("shared_ptr", TO::DOUBLE);
288 #endif
289  outputter.pushFieldSpec("RCP", TO::DOUBLE);
290  outputter.pushFieldSpec("RCP/raw", TO::DOUBLE);
291 #ifdef HAVE_TEUCHOSCORE_CXX11
292  outputter.pushFieldSpec("RCP/shared_ptr", TO::DOUBLE);
293 #endif
294 
295  outputter.outputHeader();
296 
297  double finalRcpRawRatio = 100000.0;
298 #ifdef HAVE_TEUCHOSCORE_CXX11
299  double finalRcpSpRatio = 100000.0;
300 #endif
301  int arraySize = 64;
302 
303  for (
304  int test_case_k = 0;
305  test_case_k < maxLoopIters && arraySize <= maxArraySize;
306  ++test_case_k
307  )
308  {
309 
310  // array dim
311  outputter.outputField(arraySize);
312 
313  // num loops
314  const int numActualLoops =
315  TEUCHOS_MAX(
316  static_cast<int>(
317  (numInnerLoops / arraySize)
318  * std::log(static_cast<double>(arraySize+1))
319  ),
320  1
321  );
322  outputter.outputField(numActualLoops);
323 
324  // Note on std::shared_ptr and modification to the test
325  // Originally this test copied a single ptr
326  // Added 1 and 2 types ('n' and 'o') so that each copy would be unique
327  // std::shared_ptr for gcc (but not clang) will handle the case of setting
328  // a = b with b already equal to a in an optimized way and the original
329  // test format spent most of it's time in this case.
330 
331  // raw
332  {
333  char dummy_char1 = 'n';
334  char dummy_char2 = 'o'; // See above note for std::shared_ptr
335  std::vector<char*> p_raw_vec(arraySize);
336  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
337  {
338  for (int i=0; i < arraySize; ++i) {
339  p_raw_vec[i] = &dummy_char1;
340  p_raw_vec[i] = &dummy_char2; // See above note for std::shared_ptr
341  }
342  }
343  }
344  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rawPtrTime);
345 
346 #ifdef HAVE_TEUCHOSCORE_CXX11
347  // shared_ptr
348  {
349  typedef std::shared_ptr<char> shared_ptr_t;
350  shared_ptr_t sp1(new char('n'));
351  shared_ptr_t sp2(new char('o')); // See above note for std::shared_ptr
352  std::vector<shared_ptr_t> sp_vec(arraySize);
353  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
354  {
355  for (int i=0; i < arraySize; ++i) {
356  sp_vec[i] = sp1;
357  sp_vec[i] = sp2; // See above note for std::shared_ptr
358  }
359  }
360  }
361  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, spTime);
362 #endif
363 
364  // RCP
365  {
366  RCP<char> p1(new char('n'));
367  RCP<char> p2(new char('o')); // See above note for std::shared_ptr
368  std::vector<RCP<char> > p_vec(arraySize);
369  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
370  {
371  for (int i=0; i < arraySize; ++i) {
372  p_vec[i] = p1;
373  p_vec[i] = p2; // See above note for std::shared_ptr
374  // NOTE: This assignment operation tests the copy constructor and
375  // the swap function. This calls both bind() and unbind()
376  // underneath.
377  }
378  }
379  }
380  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rcpTime);
381 
382  // RCP/raw
383  const double rcpRawRatio = rcpTime / rawPtrTime;
384  finalRcpRawRatio = TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
385  outputter.outputField(rcpRawRatio);
386 
387 #ifdef HAVE_TEUCHOSCORE_CXX11
388  // RCP/shared_ptr
389  const double rcpSpRatio = rcpTime / spTime;
390  finalRcpSpRatio = TEUCHOS_MIN(rcpSpRatio, finalRcpSpRatio);
391  outputter.outputField(rcpSpRatio);
392 #endif
393 
394  outputter.nextRow();
395 
396  arraySize *= 4;
397 
398  }
399 
400  out << "\n";
401  TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawAdjustRefCountRatio );
402  out << "\n";
403 #ifdef HAVE_TEUCHOSCORE_CXX11
404  TEST_COMPARE( finalRcpSpRatio, <=, maxRcpSpAdjustRefCountRatio );
405  out << "\n";
406 #endif
407 
408 }
409 
410 
411 TEUCHOS_UNIT_TEST( RCP, dereferenceOverhead )
412 {
413 
414  typedef Teuchos::TabularOutputter TO;
415 
416  const double relTestCost = 1e-4;
417  const int maxLoopIters = 1000;
418  const double numInnerLoops = relCpuSpeed / relTestCost;
419 
420  out << "\n"
421  << "Measuring the overhead of dereferencing RCP"
422 #ifdef HAVE_TEUCHOSCORE_CXX11
423  << ", shared_ptr"
424 #endif
425  << " and a raw pointer.\n"
426  << "\n";
427 
428  TabularOutputter outputter(out);
429  outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
430  outputter.setFieldTypePrecision(TO::INT, intPrec);
431 
432  outputter.pushFieldSpec("array dim", TO::INT);
433  outputter.pushFieldSpec("num loops", TO::INT);
434  outputter.pushFieldSpec("raw", TO::DOUBLE);
435 #ifdef HAVE_TEUCHOSCORE_CXX11
436  outputter.pushFieldSpec("shared_ptr", TO::DOUBLE);
437 #endif
438  outputter.pushFieldSpec("RCP", TO::DOUBLE);
439  outputter.pushFieldSpec("RCP/raw", TO::DOUBLE);
440 #ifdef HAVE_TEUCHOSCORE_CXX11
441  outputter.pushFieldSpec("RCP/shared_ptr", TO::DOUBLE);
442 #endif
443 
444  outputter.outputHeader();
445 
446  double finalRcpRawRatio = 100000.0;
447  int arraySize = 64;
448  const int dummy_int_val = 1;
449  int overall_dummy_int_out = 0;
450 
451 
452  for (
453  int test_case_k = 0;
454  test_case_k < maxLoopIters && arraySize <= maxArraySize;
455  ++test_case_k
456  )
457  {
458 
459  // array dim
460  outputter.outputField(arraySize);
461 
462  // num loops
463  const int numActualLoops =
464  TEUCHOS_MAX(
465  static_cast<int>(
466  (numInnerLoops / arraySize)
467  * std::log(static_cast<double>(arraySize+1))
468  ),
469  1
470  );
471  outputter.outputField(numActualLoops);
472 
473  int dummy_int_out = 0;
474 
475  // raw
476  {
477  int dummy_int = dummy_int_val;
478  std::vector<int*> p_raw_vec(arraySize);
479  for (int i=0; i < arraySize; ++i) {
480  p_raw_vec[i] = &dummy_int;
481  }
482  dummy_int_out = 0;
483  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
484  {
485  for (int i=0; i < arraySize; ++i) {
486  dummy_int_out += *p_raw_vec[i];
487  }
488  }
489  }
490  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rawPtrTime);
491  overall_dummy_int_out += dummy_int_out;
492 
493 #ifdef HAVE_TEUCHOSCORE_CXX11
494  // shared_ptr
495  {
496  typedef std::shared_ptr<int> shared_ptr_t;
497  shared_ptr_t sp(new int(dummy_int_val));
498  std::vector<shared_ptr_t> sp_vec(arraySize);
499  for (int i=0; i < arraySize; ++i) {
500  sp_vec[i] = sp;
501  }
502  dummy_int_out = 0;
503  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
504  {
505  for (int i=0; i < arraySize; ++i) {
506  dummy_int_out += *sp_vec[i];
507  }
508  }
509  }
510  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, spTime);
511  overall_dummy_int_out += dummy_int_out;
512 #endif
513 
514  // RCP
515  {
516  RCP<int> p(new int(dummy_int_val));
517  std::vector<RCP<int> > p_vec(arraySize);
518  for (int i=0; i < arraySize; ++i) {
519  p_vec[i] = p;
520  }
521  dummy_int_out = 0;
522  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
523  {
524  for (int i=0; i < arraySize; ++i) {
525  dummy_int_out += *p_vec[i];
526  }
527  }
528  }
529  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rcpTime);
530  overall_dummy_int_out += dummy_int_out;
531 
532  // RCP/raw
533  const double rcpRawRatio = rcpTime / rawPtrTime;
534  finalRcpRawRatio = TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
535  outputter.outputField(rcpRawRatio);
536 
537 #ifdef HAVE_TEUCHOSCORE_CXX11
538  // RCP/shared_ptr
539  const double rcpSpRatio = rcpTime / spTime;
540  outputter.outputField(rcpSpRatio);
541 #endif
542 
543  outputter.nextRow();
544 
545  arraySize *= 4;
546 
547  }
548 
549  out << "\n";
550  TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
551  out << "\n";
552 
553  // This silly variable must be accumulated or compilers like MSVC++ will
554  // optimize away the loops!
555  if (overall_dummy_int_out == 0)
556  success = false;
557 
558 }
559 
560 
561 struct SomeStruct {
562  SomeStruct(int member_in) : member(member_in) {}
563  int member;
564 };
565 
566 
567 TEUCHOS_UNIT_TEST( RCP, memberAccessOverhead )
568 {
569 
570  typedef Teuchos::TabularOutputter TO;
571 
572  const double relTestCost = 1e-4;
573  const int maxLoopIters = 1000;
574  const double numInnerLoops = relCpuSpeed / relTestCost;
575 
576  out << "\n"
577  << "Measuring the overhead of dereferencing RCP"
578 #ifdef HAVE_TEUCHOSCORE_CXX11
579  << ", shared_ptr"
580 #endif
581  << " and a raw pointer.\n"
582  << "\n";
583 
584  TabularOutputter outputter(out);
585  outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
586  outputter.setFieldTypePrecision(TO::INT, intPrec);
587 
588  outputter.pushFieldSpec("array dim", TO::INT);
589  outputter.pushFieldSpec("num loops", TO::INT);
590  outputter.pushFieldSpec("raw", TO::DOUBLE);
591 #ifdef HAVE_TEUCHOSCORE_CXX11
592  outputter.pushFieldSpec("shared_ptr", TO::DOUBLE);
593 #endif
594  outputter.pushFieldSpec("RCP", TO::DOUBLE);
595  outputter.pushFieldSpec("RCP/raw", TO::DOUBLE);
596 #ifdef HAVE_TEUCHOSCORE_CXX11
597  outputter.pushFieldSpec("RCP/shared_ptr", TO::DOUBLE);
598 #endif
599 
600  outputter.outputHeader();
601 
602  double finalRcpRawRatio = 100000.0;
603  int arraySize = 64;
604  const int dummy_int_val = 1;
605  int overall_dummy_int_out = 0;
606 
607  for (
608  int test_case_k = 0;
609  test_case_k < maxLoopIters && arraySize <= maxArraySize;
610  ++test_case_k
611  )
612  {
613 
614  // array dim
615  outputter.outputField(arraySize);
616 
617  // num loops
618  const int numActualLoops =
619  TEUCHOS_MAX(
620  static_cast<int>(
621  (numInnerLoops / arraySize)
622  * std::log(static_cast<double>(arraySize+1))
623  ),
624  1
625  );
626  outputter.outputField(numActualLoops);
627 
628  int dummy_int_out = 0;
629 
630  // raw
631  {
632  SomeStruct dummy_SomeStruct(dummy_int_val);
633  std::vector<SomeStruct*> p_raw_vec(arraySize);
634  for (int i=0; i < arraySize; ++i) {
635  p_raw_vec[i] = &dummy_SomeStruct;
636  }
637  dummy_int_out = 0;
638  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
639  {
640  for (int i=0; i < arraySize; ++i) {
641  dummy_int_out += p_raw_vec[i]->member;
642  }
643  }
644  }
645  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rawPtrTime);
646  overall_dummy_int_out += dummy_int_out;
647 
648 #ifdef HAVE_TEUCHOSCORE_CXX11
649  // shared_ptr
650  {
651  typedef std::shared_ptr<SomeStruct> shared_ptr_t;
652  shared_ptr_t sp(new SomeStruct(dummy_int_val));
653  std::vector<shared_ptr_t> sp_vec(arraySize);
654  for (int i=0; i < arraySize; ++i) {
655  sp_vec[i] = sp;
656  }
657  dummy_int_out = 0;
658  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
659  {
660  for (int i=0; i < arraySize; ++i) {
661  dummy_int_out += sp_vec[i]->member;
662  }
663  }
664  }
665  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, spTime);
666  overall_dummy_int_out += dummy_int_out;
667 #endif
668 
669  // RCP
670  {
671  RCP<SomeStruct> p(new SomeStruct(dummy_int_val));
672  std::vector<RCP<SomeStruct> > p_vec(arraySize);
673  for (int i=0; i < arraySize; ++i) {
674  p_vec[i] = p;
675  }
676  dummy_int_out = 0;
677  TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(outputter, numActualLoops, arraySize)
678  {
679  for (int i=0; i < arraySize; ++i) {
680  dummy_int_out += p_vec[i]->member;
681  }
682  }
683  }
684  TEUCHOS_END_PERF_OUTPUT_TIMER(outputter, rcpTime);
685  overall_dummy_int_out += dummy_int_out;
686 
687  // RCP/raw
688  const double rcpRawRatio = rcpTime / rawPtrTime;
689  finalRcpRawRatio = TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
690  outputter.outputField(rcpRawRatio);
691 
692 #ifdef HAVE_TEUCHOSCORE_CXX11
693  // RCP/shared_ptr
694  const double rcpSpRatio = rcpTime / spTime;
695  outputter.outputField(rcpSpRatio);
696 #endif
697 
698  outputter.nextRow();
699 
700  arraySize *= 4;
701 
702  }
703 
704  out << "\n";
705  TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
706  out << "\n";
707 
708  // This silly variable must be accumulated or compilers like MSVC++ will
709  // optimize away the loops!
710  if (overall_dummy_int_out == 0)
711  success = false;
712 
713 }
714 
715 
716 
717 
718 
719 
720 
721 } // namespace
RCP< T > rcp(const boost::shared_ptr< T > &sptr)
Conversion function that takes in a boost::shared_ptr object and spits out a Teuchos::RCP object...
ERCPStrength
Used to specify if the pointer is weak or strong.
#define TEST_INEQUALITY_CONST(v1, v2)
Assert the inequality of v1 and constant v2.
static CommandLineProcessor & getCLP()
Return the CLP to add options to.
#define TEST_COMPARE(v1, comp, v2)
Assert that v1 comp v2 (where comp = &#39;==&#39;, &#39;&gt;=&quot;, &quot;!=", etc).
#define TEUCHOS_UNIT_TEST(TEST_GROUP, TEST_NAME)
Macro for defining a (non-templated) unit test.
Utility class that makes it easy to create formatted tables of output.
Policy class for deallocator that uses delete to delete a pointer which is used by RCP...
Node class to keep track of address and the reference count for a reference-counted utility class and...
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
Templated implementation class of RCPNode that has the responsibility for deleting the reference-coun...
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
Set a boolean option.
Unit testing support.
#define TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(OUTPUTTER, NUMLOOPS, NUMINNERLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_END_PERF_OUTPUT_TIMER(OUTPUTTER, VARNAME)
End a timer block, output the time field to a TabularOutputter object, and set a variable with the ti...
#define TEUCHOS_MAX(x, y)
Handle class that manages the RCPNode&#39;s reference counting.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_START_PERF_OUTPUT_TIMER(OUTPUTTER, NUMLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_MIN(x, y)
Reference-counted pointer class and non-member templated function implementations.
Class that helps parse command line input arguments from (argc,argv[]) and set options.
TEUCHOS_STATIC_SETUP()