47 #ifdef HAVE_TEUCHOSCORE_CXX11 
   49 #endif // HAVE_TEUCHOSCORE_CXX11 
   60 double relCpuSpeed = 1e-2;
 
   61 int maxArraySize = 10000;
 
   62 double maxRcpRawCreateDestroyRatio = 10.0;
 
   63 double maxRcpRawAdjustRefCountRatio = 100.0;
 
   64 #ifdef HAVE_TEUCHOSCORE_CXX11 
   65 double maxRcpSpAdjustRefCountRatio = 5.0;
 
   67 double maxRcpRawObjAccessRatio = 13.5;
 
   69 const int intPrec = 8;
 
   70 const int dblPrec = 6;
 
   78     "rel-cpu-speed", &relCpuSpeed,
 
   79     "The relative speed of the CPU (higher means the machine runs faster)" 
   82     "max-array-size", &maxArraySize,
 
   83     "The maximum size of the arrays created" 
   86     "max-rcp-create-destroy-ratio", &maxRcpRawCreateDestroyRatio,
 
   87     "The ratio of the final CPU time ratio of creating and destroying" 
   88     "std::vector<char>(size) objects wrapped in an RCP object versus" 
   89     "using just raw new and delete." 
   92     "max-rcp-raw-adjust-ref-count-ratio", &maxRcpRawAdjustRefCountRatio,
 
   93     "The ratio of the final CPU time ratio for adjusting the reference" 
   94     "count of RCP objects versus a raw pointer." 
   96 #ifdef HAVE_TEUCHOSCORE_CXX11 
   98     "max-rcp-sp-adjust-ref-count-ratio", &maxRcpSpAdjustRefCountRatio,
 
   99     "The ratio of the final CPU time ratio for adjusting the reference" 
  100     "count of RCP objects versus std::shared_ptr objects." 
  104     "max-rcp-raw-obj-access-ratio", &maxRcpRawObjAccessRatio,
 
  105     "The ratio of the final CPU time ratio for accessing the object for RCP" 
  106     "versus a raw pointer." 
  113 struct DeleteDeleter {};
 
  118   out << 
"\nPrinting the size the RCP and RCPNodeImpl objects ...\n";
 
  139   const int maxLoopIters = 1000;
 
  140   const double relTestCost = 1e-3;
 
  141   const double numInnerLoops = relCpuSpeed / relTestCost;
 
  144       << 
"Messuring the overhead of creating and destorying objects of different sizes\n" 
  145       << 
"using raw C++ pointers," 
  146 #ifdef HAVE_TEUCHOSCORE_CXX11 
  149       << 
" and using RCP.\n" 
  151       << 
"Number of loops = relCpuSpeed/relTestCost = " 
  152       << relCpuSpeed << 
"/" << relTestCost << 
" = " << numInnerLoops << 
"\n" 
  155   TabularOutputter outputter(out);
 
  156   outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
 
  157   outputter.setFieldTypePrecision(TO::INT, intPrec);
 
  159   outputter.pushFieldSpec(
"obj size", TO::INT);
 
  160   outputter.pushFieldSpec(
"num loops", TO::INT);
 
  161   outputter.pushFieldSpec(
"raw", TO::DOUBLE);
 
  162 #ifdef HAVE_TEUCHOSCORE_CXX11 
  163   outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
 
  165   outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
 
  166 #ifdef HAVE_TEUCHOSCORE_CXX11 
  167   outputter.pushFieldSpec(
"shared_ptr/raw", TO::DOUBLE);
 
  169   outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
 
  171   outputter.outputHeader();
 
  173   double finalRcpRawRatio = 100000.0;
 
  176   for (
int test_case_k = 0;
 
  177     test_case_k < maxLoopIters && arraySize <= maxArraySize;
 
  183     outputter.outputField(arraySize);
 
  186     const int numActualLoops =
 
  189           (numInnerLoops / arraySize)
 
  190           * std::log(static_cast<double>(arraySize+1))
 
  194     outputter.outputField(numActualLoops);
 
  198       std::vector<std::vector<char>*> p_raw_vec(numActualLoops);
 
  202         p_raw_vec[i] = 
new std::vector<char>(arraySize, 1);
 
  209 #ifdef HAVE_TEUCHOSCORE_CXX11 
  212       typedef std::shared_ptr<std::vector<char> > shared_ptr_t;
 
  213       std::vector<shared_ptr_t > sp_vec(numActualLoops);
 
  217         sp_vec[i] = shared_ptr_t(
new std::vector<char>(arraySize, 1));
 
  227       std::vector<RCP<std::vector<char> > > p_vec(numActualLoops);
 
  231         p_vec[i] = 
rcp(
new std::vector<char>(arraySize, 1));
 
  237 #ifdef HAVE_TEUCHOSCORE_CXX11 
  239     const double spRatio = spTime / rawPtrTime;
 
  240     outputter.outputField(spRatio);
 
  244     const double rcpRatio = rcpTime / rawPtrTime;
 
  245     outputter.outputField(rcpRatio);
 
  250     finalRcpRawRatio = 
TEUCHOS_MIN(rcpRatio, finalRcpRawRatio);
 
  255   TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawCreateDestroyRatio );
 
  266   const double relTestCost = 5e-3;
 
  267   const int maxLoopIters = 1000;
 
  268   const double numInnerLoops = relCpuSpeed / relTestCost;
 
  271       << 
"Messuring the overhead of incrementing and deincrementing the reference count\n" 
  272       << 
"comparing RCP to raw pointer" 
  273 #ifdef HAVE_TEUCHOSCORE_CXX11 
  274       << 
" and std::shared_ptr" 
  279   TabularOutputter outputter(out);
 
  280   outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
 
  281   outputter.setFieldTypePrecision(TO::INT, intPrec);
 
  283   outputter.pushFieldSpec(
"array dim", TO::INT);
 
  284   outputter.pushFieldSpec(
"num loops", TO::INT);
 
  285   outputter.pushFieldSpec(
"raw", TO::DOUBLE);
 
  286 #ifdef HAVE_TEUCHOSCORE_CXX11 
  287   outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
 
  289   outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
 
  290   outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
 
  291 #ifdef HAVE_TEUCHOSCORE_CXX11 
  292   outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
 
  295   outputter.outputHeader();
 
  297   double finalRcpRawRatio = 100000.0;
 
  298 #ifdef HAVE_TEUCHOSCORE_CXX11 
  299   double finalRcpSpRatio = 100000.0;
 
  305     test_case_k < maxLoopIters && arraySize <= maxArraySize;
 
  311     outputter.outputField(arraySize);
 
  314     const int numActualLoops =
 
  317           (numInnerLoops / arraySize)
 
  318           * std::log(static_cast<double>(arraySize+1))
 
  322     outputter.outputField(numActualLoops);
 
  333       char dummy_char1 = 
'n';
 
  334       char dummy_char2 = 
'o'; 
 
  335       std::vector<char*> p_raw_vec(arraySize);
 
  338         for (
int i=0; i < arraySize; ++i) {
 
  339           p_raw_vec[i] = &dummy_char1;
 
  340           p_raw_vec[i] = &dummy_char2; 
 
  346 #ifdef HAVE_TEUCHOSCORE_CXX11 
  349       typedef std::shared_ptr<char> shared_ptr_t;
 
  350       shared_ptr_t sp1(
new char(
'n'));
 
  351       shared_ptr_t sp2(
new char(
'o')); 
 
  352       std::vector<shared_ptr_t> sp_vec(arraySize);
 
  355         for (
int i=0; i < arraySize; ++i) {
 
  366       RCP<char> p1(
new char(
'n'));
 
  367       RCP<char> p2(
new char(
'o')); 
 
  368       std::vector<RCP<char> > p_vec(arraySize);
 
  371         for (
int i=0; i < arraySize; ++i) {
 
  383     const double rcpRawRatio = rcpTime / rawPtrTime;
 
  384     finalRcpRawRatio = 
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
 
  385     outputter.outputField(rcpRawRatio);
 
  387 #ifdef HAVE_TEUCHOSCORE_CXX11 
  389     const double rcpSpRatio = rcpTime / spTime;
 
  390     finalRcpSpRatio = 
TEUCHOS_MIN(rcpSpRatio, finalRcpSpRatio);
 
  391     outputter.outputField(rcpSpRatio);
 
  401   TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawAdjustRefCountRatio );
 
  403 #ifdef HAVE_TEUCHOSCORE_CXX11 
  404   TEST_COMPARE( finalRcpSpRatio, <=, maxRcpSpAdjustRefCountRatio );
 
  416   const double relTestCost = 1e-4;
 
  417   const int maxLoopIters = 1000;
 
  418   const double numInnerLoops = relCpuSpeed / relTestCost;
 
  421       << 
"Measuring the overhead of dereferencing RCP" 
  422 #ifdef HAVE_TEUCHOSCORE_CXX11 
  425       << 
" and a raw pointer.\n" 
  428   TabularOutputter outputter(out);
 
  429   outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
 
  430   outputter.setFieldTypePrecision(TO::INT, intPrec);
 
  432   outputter.pushFieldSpec(
"array dim", TO::INT);
 
  433   outputter.pushFieldSpec(
"num loops", TO::INT);
 
  434   outputter.pushFieldSpec(
"raw", TO::DOUBLE);
 
  435 #ifdef HAVE_TEUCHOSCORE_CXX11 
  436   outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
 
  438   outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
 
  439   outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
 
  440 #ifdef HAVE_TEUCHOSCORE_CXX11 
  441   outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
 
  444   outputter.outputHeader();
 
  446   double finalRcpRawRatio = 100000.0;
 
  448   const int dummy_int_val = 1;
 
  449   int overall_dummy_int_out = 0;
 
  454     test_case_k < maxLoopIters && arraySize <= maxArraySize;
 
  460     outputter.outputField(arraySize);
 
  463     const int numActualLoops =
 
  466           (numInnerLoops / arraySize)
 
  467           * std::log(static_cast<double>(arraySize+1))
 
  471     outputter.outputField(numActualLoops);
 
  473     int dummy_int_out = 0;
 
  477       int dummy_int = dummy_int_val;
 
  478       std::vector<int*> p_raw_vec(arraySize);
 
  479       for (
int i=0; i < arraySize; ++i) {
 
  480         p_raw_vec[i] = &dummy_int;
 
  485         for (
int i=0; i < arraySize; ++i) {
 
  486           dummy_int_out += *p_raw_vec[i];
 
  491     overall_dummy_int_out += dummy_int_out;
 
  493 #ifdef HAVE_TEUCHOSCORE_CXX11 
  496       typedef std::shared_ptr<int> shared_ptr_t;
 
  497       shared_ptr_t sp(
new int(dummy_int_val));
 
  498       std::vector<shared_ptr_t> sp_vec(arraySize);
 
  499       for (
int i=0; i < arraySize; ++i) {
 
  505         for (
int i=0; i < arraySize; ++i) {
 
  506           dummy_int_out += *sp_vec[i];
 
  511     overall_dummy_int_out += dummy_int_out;
 
  516       RCP<int> p(
new int(dummy_int_val));
 
  517       std::vector<RCP<int> > p_vec(arraySize);
 
  518       for (
int i=0; i < arraySize; ++i) {
 
  524         for (
int i=0; i < arraySize; ++i) {
 
  525           dummy_int_out += *p_vec[i];
 
  530     overall_dummy_int_out += dummy_int_out;
 
  533     const double rcpRawRatio = rcpTime / rawPtrTime;
 
  534     finalRcpRawRatio = 
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
 
  535     outputter.outputField(rcpRawRatio);
 
  537 #ifdef HAVE_TEUCHOSCORE_CXX11 
  539     const double rcpSpRatio = rcpTime / spTime;
 
  540     outputter.outputField(rcpSpRatio);
 
  550   TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
 
  555   if (overall_dummy_int_out == 0)
 
  562   SomeStruct(
int member_in) : member(member_in) {}
 
  572   const double relTestCost = 1e-4;
 
  573   const int maxLoopIters = 1000;
 
  574   const double numInnerLoops = relCpuSpeed / relTestCost;
 
  577       << 
"Measuring the overhead of dereferencing RCP" 
  578 #ifdef HAVE_TEUCHOSCORE_CXX11 
  581       << 
" and a raw pointer.\n" 
  584   TabularOutputter outputter(out);
 
  585   outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
 
  586   outputter.setFieldTypePrecision(TO::INT, intPrec);
 
  588   outputter.pushFieldSpec(
"array dim", TO::INT);
 
  589   outputter.pushFieldSpec(
"num loops", TO::INT);
 
  590   outputter.pushFieldSpec(
"raw", TO::DOUBLE);
 
  591 #ifdef HAVE_TEUCHOSCORE_CXX11 
  592   outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
 
  594   outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
 
  595   outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
 
  596 #ifdef HAVE_TEUCHOSCORE_CXX11 
  597   outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
 
  600   outputter.outputHeader();
 
  602   double finalRcpRawRatio = 100000.0;
 
  604   const int dummy_int_val = 1;
 
  605   int overall_dummy_int_out = 0;
 
  609     test_case_k < maxLoopIters && arraySize <= maxArraySize;
 
  615     outputter.outputField(arraySize);
 
  618     const int numActualLoops =
 
  621           (numInnerLoops / arraySize)
 
  622           * std::log(static_cast<double>(arraySize+1))
 
  626     outputter.outputField(numActualLoops);
 
  628     int dummy_int_out = 0;
 
  632       SomeStruct dummy_SomeStruct(dummy_int_val);
 
  633       std::vector<SomeStruct*> p_raw_vec(arraySize);
 
  634       for (
int i=0; i < arraySize; ++i) {
 
  635         p_raw_vec[i] = &dummy_SomeStruct;
 
  640         for (
int i=0; i < arraySize; ++i) {
 
  641           dummy_int_out += p_raw_vec[i]->member;
 
  646     overall_dummy_int_out += dummy_int_out;
 
  648 #ifdef HAVE_TEUCHOSCORE_CXX11 
  651       typedef std::shared_ptr<SomeStruct> shared_ptr_t;
 
  652       shared_ptr_t sp(
new SomeStruct(dummy_int_val));
 
  653       std::vector<shared_ptr_t> sp_vec(arraySize);
 
  654       for (
int i=0; i < arraySize; ++i) {
 
  660         for (
int i=0; i < arraySize; ++i) {
 
  661           dummy_int_out += sp_vec[i]->member;
 
  666     overall_dummy_int_out += dummy_int_out;
 
  671       RCP<SomeStruct> p(
new SomeStruct(dummy_int_val));
 
  672       std::vector<RCP<SomeStruct> > p_vec(arraySize);
 
  673       for (
int i=0; i < arraySize; ++i) {
 
  679         for (
int i=0; i < arraySize; ++i) {
 
  680           dummy_int_out += p_vec[i]->member;
 
  685     overall_dummy_int_out += dummy_int_out;
 
  688     const double rcpRawRatio = rcpTime / rawPtrTime;
 
  689     finalRcpRawRatio = 
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
 
  690     outputter.outputField(rcpRawRatio);
 
  692 #ifdef HAVE_TEUCHOSCORE_CXX11 
  694     const double rcpSpRatio = rcpTime / spTime;
 
  695     outputter.outputField(rcpSpRatio);
 
  705   TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
 
  710   if (overall_dummy_int_out == 0)
 
RCP< T > rcp(const boost::shared_ptr< T > &sptr)
Conversion function that takes in a boost::shared_ptr object and spits out a Teuchos::RCP object...
 
ERCPStrength
Used to specify if the pointer is weak or strong. 
 
#define TEST_INEQUALITY_CONST(v1, v2)
Assert the inequality of v1 and constant v2. 
 
static CommandLineProcessor & getCLP()
Return the CLP to add options to. 
 
#define TEST_COMPARE(v1, comp, v2)
Assert that v1 comp v2 (where comp = '==', '>=", "!=", etc). 
 
#define TEUCHOS_UNIT_TEST(TEST_GROUP, TEST_NAME)
Macro for defining a (non-templated) unit test. 
 
Utility class that makes it easy to create formatted tables of output. 
 
Policy class for deallocator that uses delete to delete a pointer which is used by RCP...
 
Node class to keep track of address and the reference count for a reference-counted utility class and...
 
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated. 
 
Templated implementation class of RCPNode that has the responsibility for deleting the reference-coun...
 
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
Set a boolean option. 
 
#define TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(OUTPUTTER, NUMLOOPS, NUMINNERLOOPS)
Start a timer block using a TabularOutputter object . 
 
#define TEUCHOS_END_PERF_OUTPUT_TIMER(OUTPUTTER, VARNAME)
End a timer block, output the time field to a TabularOutputter object, and set a variable with the ti...
 
#define TEUCHOS_MAX(x, y)
 
Handle class that manages the RCPNode's reference counting. 
 
Smart reference counting pointer class for automatic garbage collection. 
 
#define TEUCHOS_START_PERF_OUTPUT_TIMER(OUTPUTTER, NUMLOOPS)
Start a timer block using a TabularOutputter object . 
 
#define TEUCHOS_MIN(x, y)
 
Reference-counted pointer class and non-member templated function implementations. 
 
Class that helps parse command line input arguments from (argc,argv[]) and set options.