Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_crsUtils.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_CRSUTILS_HPP
11 #define TPETRA_DETAILS_CRSUTILS_HPP
12 #include <numeric>
13 #include <type_traits>
14 
15 #include "TpetraCore_config.h"
16 #include "Kokkos_Core.hpp"
18 #include "Tpetra_Details_CrsPadding.hpp"
19 #include "Tpetra_Details_WrappedDualView.hpp"
20 #include <iostream>
21 #include <memory>
22 #include <unordered_map>
23 
29 
30 namespace Tpetra {
31 namespace Details {
32 
33 namespace impl {
34 
35 template<class ViewType>
36 ViewType
37 make_uninitialized_view(
38  const std::string& name,
39  const size_t size,
40  const bool verbose,
41  const std::string* const prefix)
42 {
43  if (verbose) {
44  std::ostringstream os;
45  os << *prefix << "Allocate Kokkos::View " << name
46  << ": " << size << std::endl;
47  std::cerr << os.str();
48  }
49  using Kokkos::view_alloc;
50  using Kokkos::WithoutInitializing;
51  return ViewType(view_alloc(name, WithoutInitializing), size);
52 }
53 
54 template<class ViewType>
55 ViewType
56 make_initialized_view(
57  const std::string& name,
58  const size_t size,
59  const bool verbose,
60  const std::string* const prefix)
61 {
62  if (verbose) {
63  std::ostringstream os;
64  os << *prefix << "Allocate & initialize Kokkos::View "
65  << name << ": " << size << std::endl;
66  std::cerr << os.str();
67  }
68  return ViewType(name, size);
69 }
70 
71 template<class OutViewType, class InViewType>
72 void
73 assign_to_view(OutViewType& out,
74  const InViewType& in,
75  const char viewName[],
76  const bool verbose,
77  const std::string* const prefix)
78 {
79  if (verbose) {
80  std::ostringstream os;
81  os << *prefix << "Assign to Kokkos::View " << viewName
82  << ": Old size: " << out.extent(0)
83  << ", New size: " << in.extent(0) << std::endl;
84  std::cerr << os.str();
85  }
86  out = in;
87 }
88 
89 template<class MemorySpace, class ViewType>
90 auto create_mirror_view(
91  const MemorySpace& memSpace,
92  const ViewType& view,
93  const bool verbose,
94  const std::string* const prefix) ->
95  decltype(Kokkos::create_mirror_view(memSpace, view))
96 {
97  if (verbose) {
98  std::ostringstream os;
99  os << *prefix << "Create mirror view: "
100  << "view.extent(0): " << view.extent(0) << std::endl;
101  std::cerr << os.str();
102  }
103  return Kokkos::create_mirror_view(memSpace, view);
104 }
105 
106 enum class PadCrsAction {
107  INDICES_ONLY,
108  INDICES_AND_VALUES
109 };
110 
119 template<class RowPtr, class Indices, class Values, class Padding>
120 void
121 pad_crs_arrays(
122  const PadCrsAction action,
123  const RowPtr& row_ptr_beg,
124  const RowPtr& row_ptr_end,
125  Indices& indices_wdv,
126  Values& values_wdv,
127  const Padding& padding,
128  const int my_rank,
129  const bool verbose)
130 {
131  using execution_space = typename Indices::t_dev::execution_space;
132  using Kokkos::view_alloc;
133  using Kokkos::WithoutInitializing;
134  using std::endl;
135  std::unique_ptr<std::string> prefix;
136 
137  const size_t maxNumToPrint = verbose ?
139  if (verbose) {
140  std::ostringstream os;
141  os << "Proc " << my_rank << ": Tpetra::...::pad_crs_arrays: ";
142  prefix = std::unique_ptr<std::string>(new std::string(os.str()));
143  os << "Start" << endl;
144  std::cerr << os.str();
145  }
146  Kokkos::HostSpace hostSpace;
147 
148  if (verbose) {
149  std::ostringstream os;
150  os << *prefix << "On input: ";
151  auto row_ptr_beg_h =
152  Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
153  // DEEP_COPY REVIEW - NOT TESTED
154  Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
155  verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg before scan",
156  maxNumToPrint);
157  os << ", ";
158  auto row_ptr_end_h =
159  Kokkos::create_mirror_view(hostSpace, row_ptr_end);
160  // DEEP_COPY REVIEW - NOT TESTED
161  Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
162  verbosePrintArray(os, row_ptr_end_h, "row_ptr_end before scan",
163  maxNumToPrint);
164  os << ", indices.extent(0): " << indices_wdv.extent(0)
165  << ", values.extent(0): " << values_wdv.extent(0)
166  << ", padding: ";
167  padding.print(os);
168  os << endl;
169  std::cerr << os.str();
170  }
171 
172  if (row_ptr_beg.size() == 0) {
173  if (verbose) {
174  std::ostringstream os;
175  os << *prefix << "Done; local matrix has no rows" << endl;
176  std::cerr << os.str();
177  }
178  return; // nothing to do
179  }
180 
181  const size_t lclNumRows(row_ptr_beg.size() - 1);
182  RowPtr newAllocPerRow =
183  make_uninitialized_view<RowPtr>("newAllocPerRow", lclNumRows,
184  verbose, prefix.get());
185  if (verbose) {
186  std::ostringstream os;
187  os << *prefix << "Fill newAllocPerRow & compute increase" << endl;
188  std::cerr << os.str();
189  }
190  size_t increase = 0;
191  {
192  // Must do on host because padding uses std::map
193  execution_space exec_space_instance = execution_space();
194  auto row_ptr_end_h = create_mirror_view(
195  hostSpace, row_ptr_end, verbose, prefix.get());
196  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
197  Kokkos::deep_copy(exec_space_instance, row_ptr_end_h, row_ptr_end);
198  auto row_ptr_beg_h = create_mirror_view(
199  hostSpace, row_ptr_beg, verbose, prefix.get());
200  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
201  Kokkos::deep_copy(exec_space_instance, row_ptr_beg_h, row_ptr_beg);
202 
203  // lbv 03/15/23: The execution space deep_copy does an asynchronous
204  // copy so we really want to fence that space before touching the
205  // host data as it is not guarenteed to have arrived by the time we
206  // start the parallel_reduce below which might use a different
207  // execution space, see:
208  // https://kokkos.github.io/kokkos-core-wiki/API/core/view/deep_copy.html#semantics
209  exec_space_instance.fence();
210 
211  auto newAllocPerRow_h = create_mirror_view(
212  hostSpace, newAllocPerRow, verbose, prefix.get());
213  using host_range_type = Kokkos::RangePolicy<
214  Kokkos::DefaultHostExecutionSpace, size_t>;
215  Kokkos::parallel_reduce
216  ("Tpetra::CrsGraph: Compute new allocation size per row",
217  host_range_type(0, lclNumRows),
218  [&] (const size_t lclRowInd, size_t& lclIncrease) {
219  const size_t start = row_ptr_beg_h[lclRowInd];
220  const size_t end = row_ptr_beg_h[lclRowInd+1];
221  TEUCHOS_ASSERT( end >= start );
222  const size_t oldAllocSize = end - start;
223  const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start;
224  TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize );
225 
226  // This is not a pack routine. Do not shrink! Shrinking now
227  // to fit the number of entries would ignore users' hint for
228  // the max number of entries in each row. Also, CrsPadding
229  // only counts entries and ignores any available free space.
230 
231  auto result = padding.get_result(lclRowInd);
232  const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt;
233  if (newNumEnt > oldAllocSize) {
234  lclIncrease += (newNumEnt - oldAllocSize);
235  newAllocPerRow_h[lclRowInd] = newNumEnt;
236  }
237  else {
238  newAllocPerRow_h[lclRowInd] = oldAllocSize;
239  }
240  }, increase);
241 
242  if (verbose) {
243  std::ostringstream os;
244  os << *prefix << "increase: " << increase << ", ";
245  verbosePrintArray(os, newAllocPerRow_h, "newAllocPerRow",
246  maxNumToPrint);
247  os << endl;
248  std::cerr << os.str();
249  }
250 
251  if (increase == 0) {
252  return;
253  }
254  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
255  Kokkos::deep_copy(execution_space(), newAllocPerRow, newAllocPerRow_h);
256  }
257 
258  using inds_value_type =
259  typename Indices::t_dev::non_const_value_type;
260  using vals_value_type = typename Values::t_dev::non_const_value_type;
261 
262  {
263  auto indices_old = indices_wdv.getDeviceView(Access::ReadOnly);
264  const size_t newIndsSize = size_t(indices_old.size()) + increase;
265  auto indices_new = make_uninitialized_view<typename Indices::t_dev>(
266  "Tpetra::CrsGraph column indices", newIndsSize, verbose,
267  prefix.get());
268 
269  typename Values::t_dev values_new;
270  auto values_old = values_wdv.getDeviceView(Access::ReadOnly);
271  if (action == PadCrsAction::INDICES_AND_VALUES) {
272  const size_t newValsSize = newIndsSize;
273  // NOTE (mfh 10 Feb 2020) If we don't initialize values_new here,
274  // then the CrsMatrix tests fail.
275  values_new = make_initialized_view<typename Values::t_dev>(
276  "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get());
277  }
278 
279  if (verbose) {
280  std::ostringstream os;
281  os << *prefix << "Repack" << endl;
282  std::cerr << os.str();
283  }
284 
285  using range_type = Kokkos::RangePolicy<execution_space, size_t>;
286  Kokkos::parallel_scan(
287  "Tpetra::CrsGraph or CrsMatrix repack",
288  range_type(size_t(0), size_t(lclNumRows+1)),
289  KOKKOS_LAMBDA (const size_t lclRow, size_t& newRowBeg,
290  const bool finalPass)
291  {
292  // row_ptr_beg has lclNumRows + 1 entries.
293  // row_ptr_end has lclNumRows entries.
294  // newAllocPerRow has lclNumRows entries.
295  const size_t row_beg = row_ptr_beg[lclRow];
296  const size_t row_end =
297  lclRow < lclNumRows ? row_ptr_end[lclRow] : row_beg;
298  const size_t numEnt = row_end - row_beg;
299  const size_t newRowAllocSize =
300  lclRow < lclNumRows ? newAllocPerRow[lclRow] : size_t(0);
301  if (finalPass) {
302  if (lclRow < lclNumRows) {
303  const Kokkos::pair<size_t, size_t> oldRange(
304  row_beg, row_beg + numEnt);
305  const Kokkos::pair<size_t, size_t> newRange(
306  newRowBeg, newRowBeg + numEnt);
307  auto oldColInds = Kokkos::subview(indices_old, oldRange);
308  auto newColInds = Kokkos::subview(indices_new, newRange);
309  // memcpy works fine on device; the next step is to
310  // introduce two-level parallelism and use team copy.
311  memcpy(newColInds.data(), oldColInds.data(),
312  numEnt * sizeof(inds_value_type));
313  if (action == PadCrsAction::INDICES_AND_VALUES) {
314  auto oldVals =
315  Kokkos::subview(values_old, oldRange);
316  auto newVals = Kokkos::subview(values_new, newRange);
317  memcpy((void*) newVals.data(), oldVals.data(),
318  numEnt * sizeof(vals_value_type));
319  }
320  }
321  // It's the final pass, so we can modify these arrays.
322  row_ptr_beg[lclRow] = newRowBeg;
323  if (lclRow < lclNumRows) {
324  row_ptr_end[lclRow] = newRowBeg + numEnt;
325  }
326  }
327  newRowBeg += newRowAllocSize;
328  });
329 
330  if (verbose)
331  {
332  std::ostringstream os;
333 
334  os << *prefix;
335  auto row_ptr_beg_h =
336  Kokkos::create_mirror_view(hostSpace, row_ptr_beg);
337  // DEEP_COPY REVIEW - NOT TESTED
338  Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg);
339  verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg after scan",
340  maxNumToPrint);
341  os << endl;
342 
343  os << *prefix;
344  auto row_ptr_end_h =
345  Kokkos::create_mirror_view(hostSpace, row_ptr_end);
346  // DEEP_COPY REVIEW - NOT TESTED
347  Kokkos::deep_copy(row_ptr_end_h, row_ptr_end);
348  verbosePrintArray(os, row_ptr_end_h, "row_ptr_end after scan",
349  maxNumToPrint);
350  os << endl;
351 
352  std::cout << os.str();
353  }
354 
355  indices_wdv = Indices(indices_new);
356  values_wdv = Values(values_new);
357  }
358 
359  if (verbose) {
360  auto indices_h = indices_wdv.getHostView(Access::ReadOnly);
361  auto values_h = values_wdv.getHostView(Access::ReadOnly);
362  std::ostringstream os;
363  os << "On output: ";
364  verbosePrintArray(os, indices_h, "indices", maxNumToPrint);
365  os << ", ";
366  verbosePrintArray(os, values_h, "values", maxNumToPrint);
367  os << ", padding: ";
368  padding.print(os);
369  os << endl;
370  }
371 
372  if (verbose) {
373  std::ostringstream os;
374  os << *prefix << "Done" << endl;
375  std::cerr << os.str();
376  }
377 }
378 
380 template <class Pointers, class InOutIndices, class InIndices, class IndexMap>
381 size_t
382 insert_crs_indices(
383  typename Pointers::value_type const row,
384  Pointers const& row_ptrs,
385  InOutIndices& cur_indices,
386  size_t& num_assigned,
387  InIndices const& new_indices,
388  IndexMap&& map,
389  std::function<void(size_t const, size_t const, size_t const)> cb)
390 {
391  if (new_indices.size() == 0) {
392  return 0;
393  }
394 
395  if (cur_indices.size() == 0) {
396  // No room to insert new indices
397  return Teuchos::OrdinalTraits<size_t>::invalid();
398  }
399 
400  using offset_type = typename std::decay<decltype (row_ptrs[0])>::type;
401  using ordinal_type = typename std::decay<decltype (cur_indices[0])>::type;
402 
403  const offset_type start = row_ptrs[row];
404  offset_type end = start + static_cast<offset_type> (num_assigned);
405  const size_t num_avail = (row_ptrs[row + 1] < end) ? size_t (0) :
406  row_ptrs[row + 1] - end;
407  const size_t num_new_indices = static_cast<size_t> (new_indices.size ());
408  size_t num_inserted = 0;
409 
410  size_t numIndicesLookup = num_assigned + num_new_indices;
411 
412  // Threshold determined from test/Utils/insertCrsIndicesThreshold.cpp
413  const size_t useLookUpTableThreshold = 400;
414 
415  if (numIndicesLookup <= useLookUpTableThreshold || num_new_indices == 1) {
416  // For rows with few nonzeros, can use a serial search to find duplicates
417  // Or if inserting only one index, serial search is as fast as anything else
418  for (size_t k = 0; k < num_new_indices; ++k) {
419  const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
420  offset_type row_offset = start;
421  for (; row_offset < end; ++row_offset) {
422  if (idx == cur_indices[row_offset]) {
423  break;
424  }
425  }
426 
427  if (row_offset == end) {
428  if (num_inserted >= num_avail) { // not enough room
429  return Teuchos::OrdinalTraits<size_t>::invalid();
430  }
431  // This index is not yet in indices
432  cur_indices[end++] = idx;
433  num_inserted++;
434  }
435  if (cb) {
436  cb(k, start, row_offset - start);
437  }
438  }
439  }
440  else {
441  // For rows with many nonzeros, use a lookup table to find duplicates
442  std::unordered_map<ordinal_type, offset_type> idxLookup(numIndicesLookup);
443 
444  // Put existing indices into the lookup table
445  for (size_t k = 0; k < num_assigned; k++) {
446  idxLookup[cur_indices[start+k]] = start+k;
447  }
448 
449  // Check for new indices in table; insert if not there yet
450  for (size_t k = 0; k < num_new_indices; k++) {
451  const ordinal_type idx = std::forward<IndexMap>(map)(new_indices[k]);
452  offset_type row_offset;
453 
454  auto it = idxLookup.find(idx);
455  if (it == idxLookup.end()) {
456  if (num_inserted >= num_avail) { // not enough room
457  return Teuchos::OrdinalTraits<size_t>::invalid();
458  }
459  // index not found; insert it
460  row_offset = end;
461  cur_indices[end++] = idx;
462  idxLookup[idx] = row_offset;
463  num_inserted++;
464  }
465  else {
466  // index found; note its position
467  row_offset = it->second;
468  }
469  if (cb) {
470  cb(k, start, row_offset - start);
471  }
472  }
473  }
474  num_assigned += num_inserted;
475  return num_inserted;
476 }
477 
479 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
480 size_t
481 find_crs_indices(
482  typename Pointers::value_type const row,
483  Pointers const& row_ptrs,
484  const size_t curNumEntries,
485  Indices1 const& cur_indices,
486  Indices2 const& new_indices,
487  IndexMap&& map,
488  Callback&& cb)
489 {
490  if (new_indices.size() == 0)
491  return 0;
492 
493  using ordinal =
494  typename std::remove_const<typename Indices1::value_type>::type;
495  auto invalid_ordinal = Teuchos::OrdinalTraits<ordinal>::invalid();
496 
497  const size_t start = static_cast<size_t> (row_ptrs[row]);
498  const size_t end = start + curNumEntries;
499  size_t num_found = 0;
500  for (size_t k = 0; k < new_indices.size(); k++)
501  {
502  auto row_offset = start;
503  auto idx = std::forward<IndexMap>(map)(new_indices[k]);
504  if (idx == invalid_ordinal)
505  continue;
506  for (; row_offset < end; row_offset++)
507  {
508  if (idx == cur_indices[row_offset])
509  {
510  std::forward<Callback>(cb)(k, start, row_offset - start);
511  num_found++;
512  }
513  }
514  }
515  return num_found;
516 }
517 
518 } // namespace impl
519 
520 
538 template<class RowPtr, class Indices, class Padding>
539 void
541  const RowPtr& rowPtrBeg,
542  const RowPtr& rowPtrEnd,
543  Indices& indices_wdv,
544  const Padding& padding,
545  const int my_rank,
546  const bool verbose)
547 {
548  using impl::pad_crs_arrays;
549  // send empty values array
550  Indices values_null;
551  pad_crs_arrays<RowPtr, Indices, Indices, Padding>(
552  impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd,
553  indices_wdv, values_null, padding, my_rank, verbose);
554 }
555 
556 template<class RowPtr, class Indices, class Values, class Padding>
557 void
559  const RowPtr& rowPtrBeg,
560  const RowPtr& rowPtrEnd,
561  Indices& indices_wdv,
562  Values& values_wdv,
563  const Padding& padding,
564  const int my_rank,
565  const bool verbose)
566 {
567  using impl::pad_crs_arrays;
568  pad_crs_arrays<RowPtr, Indices, Values, Padding>(
569  impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd,
570  indices_wdv, values_wdv, padding, my_rank, verbose);
571 }
572 
618 template <class Pointers, class InOutIndices, class InIndices>
619 size_t
621  typename Pointers::value_type const row,
622  Pointers const& rowPtrs,
623  InOutIndices& curIndices,
624  size_t& numAssigned,
625  InIndices const& newIndices,
626  std::function<void(const size_t, const size_t, const size_t)> cb =
627  std::function<void(const size_t, const size_t, const size_t)>())
628 {
629  static_assert(std::is_same<typename std::remove_const<typename InOutIndices::value_type>::type,
630  typename std::remove_const<typename InIndices::value_type>::type>::value,
631  "Expected views to have same value type");
632 
633  // Provide a unit map for the more general insert_indices
634  using ordinal = typename InOutIndices::value_type;
635  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
636  numAssigned, newIndices, [](ordinal const idx) { return idx; }, cb);
637  return numInserted;
638 }
639 
640 template <class Pointers, class InOutIndices, class InIndices>
641 size_t
643  typename Pointers::value_type const row,
644  Pointers const& rowPtrs,
645  InOutIndices& curIndices,
646  size_t& numAssigned,
647  InIndices const& newIndices,
648  std::function<typename InOutIndices::value_type(const typename InIndices::value_type)> map,
649  std::function<void(const size_t, const size_t, const size_t)> cb =
650  std::function<void(const size_t, const size_t, const size_t)>())
651 {
652  auto numInserted = impl::insert_crs_indices(row, rowPtrs, curIndices,
653  numAssigned, newIndices, map, cb);
654  return numInserted;
655 }
656 
657 
687 template <class Pointers, class Indices1, class Indices2, class Callback>
688 size_t
690  typename Pointers::value_type const row,
691  Pointers const& rowPtrs,
692  const size_t curNumEntries,
693  Indices1 const& curIndices,
694  Indices2 const& newIndices,
695  Callback&& cb)
696 {
697  static_assert(std::is_same<typename std::remove_const<typename Indices1::value_type>::type,
698  typename std::remove_const<typename Indices2::value_type>::type>::value,
699  "Expected views to have same value type");
700  // Provide a unit map for the more general find_crs_indices
701  using ordinal = typename Indices2::value_type;
702  auto numFound = impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices,
703  [=](ordinal ind){ return ind; }, cb);
704  return numFound;
705 }
706 
707 template <class Pointers, class Indices1, class Indices2, class IndexMap, class Callback>
708 size_t
710  typename Pointers::value_type const row,
711  Pointers const& rowPtrs,
712  const size_t curNumEntries,
713  Indices1 const& curIndices,
714  Indices2 const& newIndices,
715  IndexMap&& map,
716  Callback&& cb)
717 {
718  return impl::find_crs_indices(row, rowPtrs, curNumEntries, curIndices, newIndices, map, cb);
719 }
720 
721 } // namespace Details
722 } // namespace Tpetra
723 
724 #endif // TPETRA_DETAILS_CRSUTILS_HPP
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
void start()
Start the deep_copy counter.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.