47 #ifdef HAVE_TEUCHOSCORE_CXX11
49 #endif // HAVE_TEUCHOSCORE_CXX11
60 double relCpuSpeed = 1e-2;
61 int maxArraySize = 10000;
62 double maxRcpRawCreateDestroyRatio = 10.0;
63 double maxRcpRawAdjustRefCountRatio = 100.0;
64 #ifdef HAVE_TEUCHOSCORE_CXX11
65 double maxRcpSpAdjustRefCountRatio = 5.0;
67 double maxRcpRawObjAccessRatio = 13.5;
69 const int intPrec = 8;
70 const int dblPrec = 6;
78 "rel-cpu-speed", &relCpuSpeed,
79 "The relative speed of the CPU (higher means the machine runs faster)"
82 "max-array-size", &maxArraySize,
83 "The maximum size of the arrays created"
86 "max-rcp-create-destroy-ratio", &maxRcpRawCreateDestroyRatio,
87 "The ratio of the final CPU time ratio of creating and destroying"
88 "std::vector<char>(size) objects wrapped in an RCP object versus"
89 "using just raw new and delete."
92 "max-rcp-raw-adjust-ref-count-ratio", &maxRcpRawAdjustRefCountRatio,
93 "The ratio of the final CPU time ratio for adjusting the reference"
94 "count of RCP objects versus a raw pointer."
96 #ifdef HAVE_TEUCHOSCORE_CXX11
98 "max-rcp-sp-adjust-ref-count-ratio", &maxRcpSpAdjustRefCountRatio,
99 "The ratio of the final CPU time ratio for adjusting the reference"
100 "count of RCP objects versus std::shared_ptr objects."
104 "max-rcp-raw-obj-access-ratio", &maxRcpRawObjAccessRatio,
105 "The ratio of the final CPU time ratio for accessing the object for RCP"
106 "versus a raw pointer."
113 struct DeleteDeleter {};
118 out <<
"\nPrinting the size the RCP and RCPNodeImpl objects ...\n";
139 const int maxLoopIters = 1000;
140 const double relTestCost = 1e-3;
141 const double numInnerLoops = relCpuSpeed / relTestCost;
144 <<
"Messuring the overhead of creating and destorying objects of different sizes\n"
145 <<
"using raw C++ pointers,"
146 #ifdef HAVE_TEUCHOSCORE_CXX11
149 <<
" and using RCP.\n"
151 <<
"Number of loops = relCpuSpeed/relTestCost = "
152 << relCpuSpeed <<
"/" << relTestCost <<
" = " << numInnerLoops <<
"\n"
155 TabularOutputter outputter(out);
156 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
157 outputter.setFieldTypePrecision(TO::INT, intPrec);
159 outputter.pushFieldSpec(
"obj size", TO::INT);
160 outputter.pushFieldSpec(
"num loops", TO::INT);
161 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
162 #ifdef HAVE_TEUCHOSCORE_CXX11
163 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
165 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
166 #ifdef HAVE_TEUCHOSCORE_CXX11
167 outputter.pushFieldSpec(
"shared_ptr/raw", TO::DOUBLE);
169 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
171 outputter.outputHeader();
173 double finalRcpRawRatio = 100000.0;
176 for (
int test_case_k = 0;
177 test_case_k < maxLoopIters && arraySize <= maxArraySize;
183 outputter.outputField(arraySize);
186 const int numActualLoops =
189 (numInnerLoops / arraySize)
190 * std::log(static_cast<double>(arraySize+1))
194 outputter.outputField(numActualLoops);
198 std::vector<std::vector<char>*> p_raw_vec(numActualLoops);
202 p_raw_vec[i] =
new std::vector<char>(arraySize, 1);
209 #ifdef HAVE_TEUCHOSCORE_CXX11
212 typedef std::shared_ptr<std::vector<char> > shared_ptr_t;
213 std::vector<shared_ptr_t > sp_vec(numActualLoops);
217 sp_vec[i] = shared_ptr_t(
new std::vector<char>(arraySize, 1));
227 std::vector<RCP<std::vector<char> > > p_vec(numActualLoops);
231 p_vec[i] =
rcp(
new std::vector<char>(arraySize, 1));
237 #ifdef HAVE_TEUCHOSCORE_CXX11
239 const double spRatio = spTime / rawPtrTime;
240 outputter.outputField(spRatio);
244 const double rcpRatio = rcpTime / rawPtrTime;
245 outputter.outputField(rcpRatio);
250 finalRcpRawRatio =
TEUCHOS_MIN(rcpRatio, finalRcpRawRatio);
255 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawCreateDestroyRatio );
266 const double relTestCost = 5e-3;
267 const int maxLoopIters = 1000;
268 const double numInnerLoops = relCpuSpeed / relTestCost;
271 <<
"Messuring the overhead of incrementing and deincrementing the reference count\n"
272 <<
"comparing RCP to raw pointer"
273 #ifdef HAVE_TEUCHOSCORE_CXX11
274 <<
" and std::shared_ptr"
279 TabularOutputter outputter(out);
280 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
281 outputter.setFieldTypePrecision(TO::INT, intPrec);
283 outputter.pushFieldSpec(
"array dim", TO::INT);
284 outputter.pushFieldSpec(
"num loops", TO::INT);
285 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
286 #ifdef HAVE_TEUCHOSCORE_CXX11
287 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
289 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
290 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
291 #ifdef HAVE_TEUCHOSCORE_CXX11
292 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
295 outputter.outputHeader();
297 double finalRcpRawRatio = 100000.0;
298 #ifdef HAVE_TEUCHOSCORE_CXX11
299 double finalRcpSpRatio = 100000.0;
305 test_case_k < maxLoopIters && arraySize <= maxArraySize;
311 outputter.outputField(arraySize);
314 const int numActualLoops =
317 (numInnerLoops / arraySize)
318 * std::log(static_cast<double>(arraySize+1))
322 outputter.outputField(numActualLoops);
333 char dummy_char1 =
'n';
334 char dummy_char2 =
'o';
335 std::vector<char*> p_raw_vec(arraySize);
338 for (
int i=0; i < arraySize; ++i) {
339 p_raw_vec[i] = &dummy_char1;
340 p_raw_vec[i] = &dummy_char2;
346 #ifdef HAVE_TEUCHOSCORE_CXX11
349 typedef std::shared_ptr<char> shared_ptr_t;
350 shared_ptr_t sp1(
new char(
'n'));
351 shared_ptr_t sp2(
new char(
'o'));
352 std::vector<shared_ptr_t> sp_vec(arraySize);
355 for (
int i=0; i < arraySize; ++i) {
366 RCP<char> p1(
new char(
'n'));
367 RCP<char> p2(
new char(
'o'));
368 std::vector<RCP<char> > p_vec(arraySize);
371 for (
int i=0; i < arraySize; ++i) {
383 const double rcpRawRatio = rcpTime / rawPtrTime;
384 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
385 outputter.outputField(rcpRawRatio);
387 #ifdef HAVE_TEUCHOSCORE_CXX11
389 const double rcpSpRatio = rcpTime / spTime;
390 finalRcpSpRatio =
TEUCHOS_MIN(rcpSpRatio, finalRcpSpRatio);
391 outputter.outputField(rcpSpRatio);
401 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawAdjustRefCountRatio );
403 #ifdef HAVE_TEUCHOSCORE_CXX11
404 TEST_COMPARE( finalRcpSpRatio, <=, maxRcpSpAdjustRefCountRatio );
416 const double relTestCost = 1e-4;
417 const int maxLoopIters = 1000;
418 const double numInnerLoops = relCpuSpeed / relTestCost;
421 <<
"Measuring the overhead of dereferencing RCP"
422 #ifdef HAVE_TEUCHOSCORE_CXX11
425 <<
" and a raw pointer.\n"
428 TabularOutputter outputter(out);
429 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
430 outputter.setFieldTypePrecision(TO::INT, intPrec);
432 outputter.pushFieldSpec(
"array dim", TO::INT);
433 outputter.pushFieldSpec(
"num loops", TO::INT);
434 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
435 #ifdef HAVE_TEUCHOSCORE_CXX11
436 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
438 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
439 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
440 #ifdef HAVE_TEUCHOSCORE_CXX11
441 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
444 outputter.outputHeader();
446 double finalRcpRawRatio = 100000.0;
448 const int dummy_int_val = 1;
449 int overall_dummy_int_out = 0;
454 test_case_k < maxLoopIters && arraySize <= maxArraySize;
460 outputter.outputField(arraySize);
463 const int numActualLoops =
466 (numInnerLoops / arraySize)
467 * std::log(static_cast<double>(arraySize+1))
471 outputter.outputField(numActualLoops);
473 int dummy_int_out = 0;
477 int dummy_int = dummy_int_val;
478 std::vector<int*> p_raw_vec(arraySize);
479 for (
int i=0; i < arraySize; ++i) {
480 p_raw_vec[i] = &dummy_int;
485 for (
int i=0; i < arraySize; ++i) {
486 dummy_int_out += *p_raw_vec[i];
491 overall_dummy_int_out += dummy_int_out;
493 #ifdef HAVE_TEUCHOSCORE_CXX11
496 typedef std::shared_ptr<int> shared_ptr_t;
497 shared_ptr_t sp(
new int(dummy_int_val));
498 std::vector<shared_ptr_t> sp_vec(arraySize);
499 for (
int i=0; i < arraySize; ++i) {
505 for (
int i=0; i < arraySize; ++i) {
506 dummy_int_out += *sp_vec[i];
511 overall_dummy_int_out += dummy_int_out;
516 RCP<int> p(
new int(dummy_int_val));
517 std::vector<RCP<int> > p_vec(arraySize);
518 for (
int i=0; i < arraySize; ++i) {
524 for (
int i=0; i < arraySize; ++i) {
525 dummy_int_out += *p_vec[i];
530 overall_dummy_int_out += dummy_int_out;
533 const double rcpRawRatio = rcpTime / rawPtrTime;
534 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
535 outputter.outputField(rcpRawRatio);
537 #ifdef HAVE_TEUCHOSCORE_CXX11
539 const double rcpSpRatio = rcpTime / spTime;
540 outputter.outputField(rcpSpRatio);
550 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
555 if (overall_dummy_int_out == 0)
562 SomeStruct(
int member_in) : member(member_in) {}
572 const double relTestCost = 1e-4;
573 const int maxLoopIters = 1000;
574 const double numInnerLoops = relCpuSpeed / relTestCost;
577 <<
"Measuring the overhead of dereferencing RCP"
578 #ifdef HAVE_TEUCHOSCORE_CXX11
581 <<
" and a raw pointer.\n"
584 TabularOutputter outputter(out);
585 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
586 outputter.setFieldTypePrecision(TO::INT, intPrec);
588 outputter.pushFieldSpec(
"array dim", TO::INT);
589 outputter.pushFieldSpec(
"num loops", TO::INT);
590 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
591 #ifdef HAVE_TEUCHOSCORE_CXX11
592 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
594 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
595 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
596 #ifdef HAVE_TEUCHOSCORE_CXX11
597 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
600 outputter.outputHeader();
602 double finalRcpRawRatio = 100000.0;
604 const int dummy_int_val = 1;
605 int overall_dummy_int_out = 0;
609 test_case_k < maxLoopIters && arraySize <= maxArraySize;
615 outputter.outputField(arraySize);
618 const int numActualLoops =
621 (numInnerLoops / arraySize)
622 * std::log(static_cast<double>(arraySize+1))
626 outputter.outputField(numActualLoops);
628 int dummy_int_out = 0;
632 SomeStruct dummy_SomeStruct(dummy_int_val);
633 std::vector<SomeStruct*> p_raw_vec(arraySize);
634 for (
int i=0; i < arraySize; ++i) {
635 p_raw_vec[i] = &dummy_SomeStruct;
640 for (
int i=0; i < arraySize; ++i) {
641 dummy_int_out += p_raw_vec[i]->member;
646 overall_dummy_int_out += dummy_int_out;
648 #ifdef HAVE_TEUCHOSCORE_CXX11
651 typedef std::shared_ptr<SomeStruct> shared_ptr_t;
652 shared_ptr_t sp(
new SomeStruct(dummy_int_val));
653 std::vector<shared_ptr_t> sp_vec(arraySize);
654 for (
int i=0; i < arraySize; ++i) {
660 for (
int i=0; i < arraySize; ++i) {
661 dummy_int_out += sp_vec[i]->member;
666 overall_dummy_int_out += dummy_int_out;
671 RCP<SomeStruct> p(
new SomeStruct(dummy_int_val));
672 std::vector<RCP<SomeStruct> > p_vec(arraySize);
673 for (
int i=0; i < arraySize; ++i) {
679 for (
int i=0; i < arraySize; ++i) {
680 dummy_int_out += p_vec[i]->member;
685 overall_dummy_int_out += dummy_int_out;
688 const double rcpRawRatio = rcpTime / rawPtrTime;
689 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
690 outputter.outputField(rcpRawRatio);
692 #ifdef HAVE_TEUCHOSCORE_CXX11
694 const double rcpSpRatio = rcpTime / spTime;
695 outputter.outputField(rcpSpRatio);
705 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
710 if (overall_dummy_int_out == 0)
RCP< T > rcp(const boost::shared_ptr< T > &sptr)
Conversion function that takes in a boost::shared_ptr object and spits out a Teuchos::RCP object...
ERCPStrength
Used to specify if the pointer is weak or strong.
#define TEST_INEQUALITY_CONST(v1, v2)
Assert the inequality of v1 and constant v2.
static CommandLineProcessor & getCLP()
Return the CLP to add options to.
#define TEST_COMPARE(v1, comp, v2)
Assert that v1 comp v2 (where comp = '==', '>=", "!=", etc).
#define TEUCHOS_UNIT_TEST(TEST_GROUP, TEST_NAME)
Macro for defining a (non-templated) unit test.
Utility class that makes it easy to create formatted tables of output.
Policy class for deallocator that uses delete to delete a pointer which is used by RCP...
Node class to keep track of address and the reference count for a reference-counted utility class and...
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
Templated implementation class of RCPNode that has the responsibility for deleting the reference-coun...
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
Set a boolean option.
#define TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(OUTPUTTER, NUMLOOPS, NUMINNERLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_END_PERF_OUTPUT_TIMER(OUTPUTTER, VARNAME)
End a timer block, output the time field to a TabularOutputter object, and set a variable with the ti...
#define TEUCHOS_MAX(x, y)
Handle class that manages the RCPNode's reference counting.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_START_PERF_OUTPUT_TIMER(OUTPUTTER, NUMLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_MIN(x, y)
Reference-counted pointer class and non-member templated function implementations.
Class that helps parse command line input arguments from (argc,argv[]) and set options.