13 #ifdef HAVE_TEUCHOSCORE_CXX11
15 #endif // HAVE_TEUCHOSCORE_CXX11
26 double relCpuSpeed = 1e-2;
27 int maxArraySize = 10000;
28 double maxRcpRawCreateDestroyRatio = 10.0;
29 double maxRcpRawAdjustRefCountRatio = 100.0;
30 #ifdef HAVE_TEUCHOSCORE_CXX11
31 double maxRcpSpAdjustRefCountRatio = 5.0;
33 double maxRcpRawObjAccessRatio = 13.5;
35 const int intPrec = 8;
36 const int dblPrec = 6;
44 "rel-cpu-speed", &relCpuSpeed,
45 "The relative speed of the CPU (higher means the machine runs faster)"
48 "max-array-size", &maxArraySize,
49 "The maximum size of the arrays created"
52 "max-rcp-create-destroy-ratio", &maxRcpRawCreateDestroyRatio,
53 "The ratio of the final CPU time ratio of creating and destroying"
54 "std::vector<char>(size) objects wrapped in an RCP object versus"
55 "using just raw new and delete."
58 "max-rcp-raw-adjust-ref-count-ratio", &maxRcpRawAdjustRefCountRatio,
59 "The ratio of the final CPU time ratio for adjusting the reference"
60 "count of RCP objects versus a raw pointer."
62 #ifdef HAVE_TEUCHOSCORE_CXX11
64 "max-rcp-sp-adjust-ref-count-ratio", &maxRcpSpAdjustRefCountRatio,
65 "The ratio of the final CPU time ratio for adjusting the reference"
66 "count of RCP objects versus std::shared_ptr objects."
70 "max-rcp-raw-obj-access-ratio", &maxRcpRawObjAccessRatio,
71 "The ratio of the final CPU time ratio for accessing the object for RCP"
72 "versus a raw pointer."
79 struct DeleteDeleter {};
84 out <<
"\nPrinting the size the RCP and RCPNodeImpl objects ...\n";
105 const int maxLoopIters = 1000;
106 const double relTestCost = 1e-3;
107 const double numInnerLoops = relCpuSpeed / relTestCost;
110 <<
"Messuring the overhead of creating and destorying objects of different sizes\n"
111 <<
"using raw C++ pointers,"
112 #ifdef HAVE_TEUCHOSCORE_CXX11
115 <<
" and using RCP.\n"
117 <<
"Number of loops = relCpuSpeed/relTestCost = "
118 << relCpuSpeed <<
"/" << relTestCost <<
" = " << numInnerLoops <<
"\n"
121 TabularOutputter outputter(out);
122 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
123 outputter.setFieldTypePrecision(TO::INT, intPrec);
125 outputter.pushFieldSpec(
"obj size", TO::INT);
126 outputter.pushFieldSpec(
"num loops", TO::INT);
127 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
128 #ifdef HAVE_TEUCHOSCORE_CXX11
129 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
131 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
132 #ifdef HAVE_TEUCHOSCORE_CXX11
133 outputter.pushFieldSpec(
"shared_ptr/raw", TO::DOUBLE);
135 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
137 outputter.outputHeader();
139 double finalRcpRawRatio = 100000.0;
142 for (
int test_case_k = 0;
143 test_case_k < maxLoopIters && arraySize <= maxArraySize;
149 outputter.outputField(arraySize);
152 const int numActualLoops =
155 (numInnerLoops / arraySize)
156 * std::log(static_cast<double>(arraySize+1))
160 outputter.outputField(numActualLoops);
164 std::vector<std::vector<char>*> p_raw_vec(numActualLoops);
168 p_raw_vec[i] =
new std::vector<char>(arraySize, 1);
175 #ifdef HAVE_TEUCHOSCORE_CXX11
178 typedef std::shared_ptr<std::vector<char> > shared_ptr_t;
179 std::vector<shared_ptr_t > sp_vec(numActualLoops);
183 sp_vec[i] = shared_ptr_t(
new std::vector<char>(arraySize, 1));
193 std::vector<RCP<std::vector<char> > > p_vec(numActualLoops);
197 p_vec[i] =
rcp(
new std::vector<char>(arraySize, 1));
203 #ifdef HAVE_TEUCHOSCORE_CXX11
205 const double spRatio = spTime / rawPtrTime;
206 outputter.outputField(spRatio);
210 const double rcpRatio = rcpTime / rawPtrTime;
211 outputter.outputField(rcpRatio);
216 finalRcpRawRatio =
TEUCHOS_MIN(rcpRatio, finalRcpRawRatio);
221 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawCreateDestroyRatio );
232 const double relTestCost = 5e-3;
233 const int maxLoopIters = 1000;
234 const double numInnerLoops = relCpuSpeed / relTestCost;
237 <<
"Messuring the overhead of incrementing and deincrementing the reference count\n"
238 <<
"comparing RCP to raw pointer"
239 #ifdef HAVE_TEUCHOSCORE_CXX11
240 <<
" and std::shared_ptr"
245 TabularOutputter outputter(out);
246 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
247 outputter.setFieldTypePrecision(TO::INT, intPrec);
249 outputter.pushFieldSpec(
"array dim", TO::INT);
250 outputter.pushFieldSpec(
"num loops", TO::INT);
251 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
252 #ifdef HAVE_TEUCHOSCORE_CXX11
253 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
255 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
256 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
257 #ifdef HAVE_TEUCHOSCORE_CXX11
258 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
261 outputter.outputHeader();
263 double finalRcpRawRatio = 100000.0;
264 #ifdef HAVE_TEUCHOSCORE_CXX11
265 double finalRcpSpRatio = 100000.0;
271 test_case_k < maxLoopIters && arraySize <= maxArraySize;
277 outputter.outputField(arraySize);
280 const int numActualLoops =
283 (numInnerLoops / arraySize)
284 * std::log(static_cast<double>(arraySize+1))
288 outputter.outputField(numActualLoops);
299 char dummy_char1 =
'n';
300 char dummy_char2 =
'o';
301 std::vector<char*> p_raw_vec(arraySize);
304 for (
int i=0; i < arraySize; ++i) {
305 p_raw_vec[i] = &dummy_char1;
306 p_raw_vec[i] = &dummy_char2;
312 #ifdef HAVE_TEUCHOSCORE_CXX11
315 typedef std::shared_ptr<char> shared_ptr_t;
316 shared_ptr_t sp1(
new char(
'n'));
317 shared_ptr_t sp2(
new char(
'o'));
318 std::vector<shared_ptr_t> sp_vec(arraySize);
321 for (
int i=0; i < arraySize; ++i) {
332 RCP<char> p1(
new char(
'n'));
333 RCP<char> p2(
new char(
'o'));
334 std::vector<RCP<char> > p_vec(arraySize);
337 for (
int i=0; i < arraySize; ++i) {
349 const double rcpRawRatio = rcpTime / rawPtrTime;
350 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
351 outputter.outputField(rcpRawRatio);
353 #ifdef HAVE_TEUCHOSCORE_CXX11
355 const double rcpSpRatio = rcpTime / spTime;
356 finalRcpSpRatio =
TEUCHOS_MIN(rcpSpRatio, finalRcpSpRatio);
357 outputter.outputField(rcpSpRatio);
367 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawAdjustRefCountRatio );
369 #ifdef HAVE_TEUCHOSCORE_CXX11
370 TEST_COMPARE( finalRcpSpRatio, <=, maxRcpSpAdjustRefCountRatio );
382 const double relTestCost = 1e-4;
383 const int maxLoopIters = 1000;
384 const double numInnerLoops = relCpuSpeed / relTestCost;
387 <<
"Measuring the overhead of dereferencing RCP"
388 #ifdef HAVE_TEUCHOSCORE_CXX11
391 <<
" and a raw pointer.\n"
394 TabularOutputter outputter(out);
395 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
396 outputter.setFieldTypePrecision(TO::INT, intPrec);
398 outputter.pushFieldSpec(
"array dim", TO::INT);
399 outputter.pushFieldSpec(
"num loops", TO::INT);
400 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
401 #ifdef HAVE_TEUCHOSCORE_CXX11
402 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
404 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
405 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
406 #ifdef HAVE_TEUCHOSCORE_CXX11
407 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
410 outputter.outputHeader();
412 double finalRcpRawRatio = 100000.0;
414 const int dummy_int_val = 1;
415 int overall_dummy_int_out = 0;
420 test_case_k < maxLoopIters && arraySize <= maxArraySize;
426 outputter.outputField(arraySize);
429 const int numActualLoops =
432 (numInnerLoops / arraySize)
433 * std::log(static_cast<double>(arraySize+1))
437 outputter.outputField(numActualLoops);
439 int dummy_int_out = 0;
443 int dummy_int = dummy_int_val;
444 std::vector<int*> p_raw_vec(arraySize);
445 for (
int i=0; i < arraySize; ++i) {
446 p_raw_vec[i] = &dummy_int;
451 for (
int i=0; i < arraySize; ++i) {
452 dummy_int_out += *p_raw_vec[i];
457 overall_dummy_int_out += dummy_int_out;
459 #ifdef HAVE_TEUCHOSCORE_CXX11
462 typedef std::shared_ptr<int> shared_ptr_t;
463 shared_ptr_t sp(
new int(dummy_int_val));
464 std::vector<shared_ptr_t> sp_vec(arraySize);
465 for (
int i=0; i < arraySize; ++i) {
471 for (
int i=0; i < arraySize; ++i) {
472 dummy_int_out += *sp_vec[i];
477 overall_dummy_int_out += dummy_int_out;
482 RCP<int> p(
new int(dummy_int_val));
483 std::vector<RCP<int> > p_vec(arraySize);
484 for (
int i=0; i < arraySize; ++i) {
490 for (
int i=0; i < arraySize; ++i) {
491 dummy_int_out += *p_vec[i];
496 overall_dummy_int_out += dummy_int_out;
499 const double rcpRawRatio = rcpTime / rawPtrTime;
500 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
501 outputter.outputField(rcpRawRatio);
503 #ifdef HAVE_TEUCHOSCORE_CXX11
505 const double rcpSpRatio = rcpTime / spTime;
506 outputter.outputField(rcpSpRatio);
516 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
521 if (overall_dummy_int_out == 0)
528 SomeStruct(
int member_in) : member(member_in) {}
538 const double relTestCost = 1e-4;
539 const int maxLoopIters = 1000;
540 const double numInnerLoops = relCpuSpeed / relTestCost;
543 <<
"Measuring the overhead of dereferencing RCP"
544 #ifdef HAVE_TEUCHOSCORE_CXX11
547 <<
" and a raw pointer.\n"
550 TabularOutputter outputter(out);
551 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
552 outputter.setFieldTypePrecision(TO::INT, intPrec);
554 outputter.pushFieldSpec(
"array dim", TO::INT);
555 outputter.pushFieldSpec(
"num loops", TO::INT);
556 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
557 #ifdef HAVE_TEUCHOSCORE_CXX11
558 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
560 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
561 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
562 #ifdef HAVE_TEUCHOSCORE_CXX11
563 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
566 outputter.outputHeader();
568 double finalRcpRawRatio = 100000.0;
570 const int dummy_int_val = 1;
571 int overall_dummy_int_out = 0;
575 test_case_k < maxLoopIters && arraySize <= maxArraySize;
581 outputter.outputField(arraySize);
584 const int numActualLoops =
587 (numInnerLoops / arraySize)
588 * std::log(static_cast<double>(arraySize+1))
592 outputter.outputField(numActualLoops);
594 int dummy_int_out = 0;
598 SomeStruct dummy_SomeStruct(dummy_int_val);
599 std::vector<SomeStruct*> p_raw_vec(arraySize);
600 for (
int i=0; i < arraySize; ++i) {
601 p_raw_vec[i] = &dummy_SomeStruct;
606 for (
int i=0; i < arraySize; ++i) {
607 dummy_int_out += p_raw_vec[i]->member;
612 overall_dummy_int_out += dummy_int_out;
614 #ifdef HAVE_TEUCHOSCORE_CXX11
617 typedef std::shared_ptr<SomeStruct> shared_ptr_t;
618 shared_ptr_t sp(
new SomeStruct(dummy_int_val));
619 std::vector<shared_ptr_t> sp_vec(arraySize);
620 for (
int i=0; i < arraySize; ++i) {
626 for (
int i=0; i < arraySize; ++i) {
627 dummy_int_out += sp_vec[i]->member;
632 overall_dummy_int_out += dummy_int_out;
637 RCP<SomeStruct> p(
new SomeStruct(dummy_int_val));
638 std::vector<RCP<SomeStruct> > p_vec(arraySize);
639 for (
int i=0; i < arraySize; ++i) {
645 for (
int i=0; i < arraySize; ++i) {
646 dummy_int_out += p_vec[i]->member;
651 overall_dummy_int_out += dummy_int_out;
654 const double rcpRawRatio = rcpTime / rawPtrTime;
655 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
656 outputter.outputField(rcpRawRatio);
658 #ifdef HAVE_TEUCHOSCORE_CXX11
660 const double rcpSpRatio = rcpTime / spTime;
661 outputter.outputField(rcpSpRatio);
671 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
676 if (overall_dummy_int_out == 0)
RCP< T > rcp(const boost::shared_ptr< T > &sptr)
Conversion function that takes in a boost::shared_ptr object and spits out a Teuchos::RCP object...
ERCPStrength
Used to specify if the pointer is weak or strong.
#define TEST_INEQUALITY_CONST(v1, v2)
Assert the inequality of v1 and constant v2.
static CommandLineProcessor & getCLP()
Return the CLP to add options to.
#define TEST_COMPARE(v1, comp, v2)
Assert that v1 comp v2 (where comp = '==', '>=", "!=", etc).
#define TEUCHOS_UNIT_TEST(TEST_GROUP, TEST_NAME)
Macro for defining a (non-templated) unit test.
Utility class that makes it easy to create formatted tables of output.
Policy class for deallocator that uses delete to delete a pointer which is used by RCP...
Node class to keep track of address and the reference count for a reference-counted utility class and...
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
Templated implementation class of RCPNode that has the responsibility for deleting the reference-coun...
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
Set a boolean option.
#define TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(OUTPUTTER, NUMLOOPS, NUMINNERLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_END_PERF_OUTPUT_TIMER(OUTPUTTER, VARNAME)
End a timer block, output the time field to a TabularOutputter object, and set a variable with the ti...
#define TEUCHOS_MAX(x, y)
Handle class that manages the RCPNode's reference counting.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_START_PERF_OUTPUT_TIMER(OUTPUTTER, NUMLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_MIN(x, y)
Reference-counted pointer class and non-member templated function implementations.
Class that helps parse command line input arguments from (argc,argv[]) and set options.