Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_Merge.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_MERGE_HPP
11 #define TPETRA_DETAILS_MERGE_HPP
12 
13 #include "TpetraCore_config.h"
14 #include "Teuchos_TestForException.hpp"
15 #include <algorithm> // std::sort
16 #include <utility> // std::pair, std::make_pair
17 #include <stdexcept>
18 
19 namespace Tpetra {
20 namespace Details {
21 
40 template <class OrdinalType, class IndexType>
41 IndexType
42 countMergeUnsortedIndices(const OrdinalType curInds[],
43  const IndexType numCurInds,
44  const OrdinalType inputInds[],
45  const IndexType numInputInds) {
46  IndexType mergeCount = 0;
47 
48  if (numCurInds <= numInputInds) {
49  // More input than current entries, so iterate linearly over
50  // input and scan current entries repeatedly.
51  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
52  const OrdinalType inVal = inputInds[inPos];
53  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
54  if (curInds[curPos] == inVal) {
55  ++mergeCount;
56  }
57  }
58  }
59  } else { // numCurInds > numInputInds
60  // More current entries than input, so iterate linearly over
61  // current entries and scan input repeatedly.
62  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
63  const OrdinalType curVal = curInds[curPos];
64  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
65  if (inputInds[inPos] == curVal) {
66  ++mergeCount;
67  }
68  }
69  }
70  }
71 
72 #ifdef HAVE_TPETRA_DEBUG
73  TEUCHOS_TEST_FOR_EXCEPTION(mergeCount > numInputInds, std::logic_error, "mergeCount = " << mergeCount << " > numInputInds = " << numInputInds << ".");
74 #endif // HAVE_TPETRA_DEBUG
75  return mergeCount;
76 }
77 
95 template <class OrdinalType, class IndexType>
96 IndexType
97 countMergeSortedIndices(const OrdinalType curInds[],
98  const IndexType numCurInds,
99  const OrdinalType inputInds[],
100  const IndexType numInputInds) {
101  // Only count possible merges; don't merge yet. If the row
102  // doesn't have enough space, we want to return without side
103  // effects.
104  IndexType curPos = 0;
105  IndexType inPos = 0;
106  IndexType mergeCount = 0;
107  while (inPos < numInputInds && curPos < numCurInds) {
108  const OrdinalType inVal = inputInds[inPos];
109  const OrdinalType curVal = curInds[curPos];
110 
111  if (curVal == inVal) { // can merge
112  ++mergeCount;
113  ++inPos; // go on to next input
114  } else if (curVal < inVal) {
115  ++curPos; // go on to next row entry
116  } else { // curVal > inVal
117  ++inPos; // can't merge it ever, since row entries sorted
118  }
119  }
120 
121 #ifdef HAVE_TPETRA_DEBUG
122  TEUCHOS_TEST_FOR_EXCEPTION(inPos > numInputInds, std::logic_error, "inPos = " << inPos << " > numInputInds = " << numInputInds << ".");
123  TEUCHOS_TEST_FOR_EXCEPTION(curPos > numCurInds, std::logic_error, "curPos = " << curPos << " > numCurInds = " << numCurInds << ".");
124  TEUCHOS_TEST_FOR_EXCEPTION(mergeCount > numInputInds, std::logic_error, "mergeCount = " << mergeCount << " > numInputInds = " << numInputInds << ".");
125 #endif // HAVE_TPETRA_DEBUG
126 
127  // At this point, 2 situations are possible:
128  //
129  // 1. inPos == numInputInds: We looked at all inputs. Some
130  // (mergeCount of them) could have been merged.
131  // 2. inPos < numInputInds: We didn't get to look at all inputs.
132  // Since the inputs are sorted, we know that those inputs we
133  // didn't examine weren't mergeable.
134  //
135  // Either way, mergeCount gives the number of mergeable inputs.
136  return mergeCount;
137 }
138 
159 template <class OrdinalType, class IndexType>
160 std::pair<bool, IndexType>
161 mergeSortedIndices(OrdinalType curInds[],
162  const IndexType midPos,
163  const IndexType endPos,
164  const OrdinalType inputInds[],
165  const IndexType numInputInds) {
166  // Optimize for the following cases, in decreasing order of
167  // optimization concern:
168  //
169  // a. Current row has allocated space but no entries
170  // b. All input indices already in the graph
171  //
172  // If the row has insufficient space for a merge, don't do
173  // anything! Just return an upper bound on the number of extra
174  // entries required to fit everything. This imposes extra cost,
175  // but correctly supports the count, allocate, fill, compute
176  // pattern. (If some entries were merged in and others weren't,
177  // how would you know which got merged in? CrsGraph insert is
178  // idempotent, but CrsMatrix insert does a += on the value and
179  // is therefore not idempotent.)
180  if (midPos == 0) {
181  // Current row has no entries, but may have preallocated space.
182  if (endPos >= numInputInds) {
183  // Sufficient space for new entries; copy directly.
184  for (IndexType k = 0; k < numInputInds; ++k) {
185  curInds[k] = inputInds[k];
186  }
187  std::sort(curInds, curInds + numInputInds);
188  return std::make_pair(true, numInputInds);
189  } else { // not enough space
190  return std::make_pair(false, numInputInds);
191  }
192  } else { // current row contains indices, requiring merge
193  // Only count possible merges; don't merge yet. If the row
194  // doesn't have enough space, we want to return without side
195  // effects.
196  const IndexType mergeCount =
197  countMergeSortedIndices<OrdinalType, IndexType>(curInds, midPos,
198  inputInds,
199  numInputInds);
200  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
201  const IndexType newRowLen = midPos + extraSpaceNeeded;
202  if (newRowLen > endPos) {
203  return std::make_pair(false, newRowLen);
204  } else { // we have enough space; merge in
205  IndexType curPos = 0;
206  IndexType inPos = 0;
207  IndexType newPos = midPos;
208  while (inPos < numInputInds && curPos < midPos) {
209  const OrdinalType inVal = inputInds[inPos];
210  const OrdinalType curVal = curInds[curPos];
211 
212  if (curVal == inVal) { // can merge
213  ++inPos; // merge and go on to next input
214  } else if (curVal < inVal) {
215  ++curPos; // go on to next row entry
216  } else { // curVal > inVal
217  // The input doesn't exist in the row.
218  // Copy it to the end; we'll sort it in later.
219  curInds[newPos] = inVal;
220  ++newPos;
221  ++inPos; // move on to next input
222  }
223  }
224 
225  // If any inputs remain, and the current row has space for them,
226  // then copy them in. We'll sort them later.
227  for (; inPos < numInputInds && newPos < newRowLen; ++inPos, ++newPos) {
228  curInds[newPos] = inputInds[inPos];
229  }
230 
231 #ifdef HAVE_TPETRA_DEBUG
232  TEUCHOS_TEST_FOR_EXCEPTION(newPos != newRowLen, std::logic_error, "mergeSortedIndices: newPos = " << newPos << " != newRowLen = " << newRowLen << " = " << midPos << " + " << extraSpaceNeeded << ". Please report this bug to the Tpetra "
233  "developers.");
234 #endif // HAVE_TPETRA_DEBUG
235 
236  if (newPos != midPos) { // new entries at end; sort them in
237  // FIXME (mfh 03 Jan 2016) Rather than sorting, it would
238  // be faster (linear time) just to iterate backwards
239  // through the current entries, pushing them over to make
240  // room for unmerged input. However, I'm not so worried
241  // about the asymptotics here, because dense rows in a
242  // sparse matrix are ungood anyway.
243  std::sort(curInds, curInds + newPos);
244  }
245  return std::make_pair(true, newPos);
246  }
247  }
248 }
249 
271 template <class OrdinalType, class IndexType>
272 std::pair<bool, IndexType>
273 mergeUnsortedIndices(OrdinalType curInds[],
274  const IndexType midPos,
275  const IndexType endPos,
276  const OrdinalType inputInds[],
277  const IndexType numInputInds) {
278  // Optimize for the following cases, in decreasing order of
279  // optimization concern:
280  //
281  // a. Current row has allocated space but no entries
282  // b. All input indices already in the graph
283  //
284  // If the row has insufficient space for a merge, don't do
285  // anything! Just return an upper bound on the number of extra
286  // entries required to fit everything. This imposes extra cost,
287  // but correctly supports the count, allocate, fill, compute
288  // pattern. (If some entries were merged in and others weren't,
289  // how would you know which got merged in? CrsGraph insert is
290  // idempotent, but CrsMatrix insert does a += on the value and
291  // is therefore not idempotent.)
292  if (midPos == 0) {
293  // Current row has no entries, but may have preallocated space.
294  if (endPos >= numInputInds) {
295  // Sufficient space for new entries; copy directly.
296  for (IndexType k = 0; k < numInputInds; ++k) {
297  curInds[k] = inputInds[k];
298  }
299  return std::make_pair(true, numInputInds);
300  } else { // not enough space
301  return std::make_pair(false, numInputInds);
302  }
303  } else { // current row contains indices, requiring merge
304  // Only count possible merges; don't merge yet. If the row
305  // doesn't have enough space, we want to return without side
306  // effects.
307  const IndexType mergeCount =
308  countMergeUnsortedIndices<OrdinalType, IndexType>(curInds, midPos,
309  inputInds,
310  numInputInds);
311  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
312  const IndexType newRowLen = midPos + extraSpaceNeeded;
313  if (newRowLen > endPos) {
314  return std::make_pair(false, newRowLen);
315  } else { // we have enough space; merge in
316  // Iterate linearly over input. Scan current entries
317  // repeatedly. Add new entries at end.
318  IndexType newPos = midPos;
319  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
320  const OrdinalType inVal = inputInds[inPos];
321  bool merged = false;
322  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
323  if (curInds[curPos] == inVal) {
324  merged = true;
325  }
326  }
327  if (!merged) {
328  curInds[newPos] = inVal;
329  ++newPos;
330  }
331  }
332  return std::make_pair(true, newPos);
333  }
334  }
335 }
336 
361 template <class OrdinalType, class ValueType, class IndexType>
362 std::pair<bool, IndexType>
363 mergeUnsortedIndicesAndValues(OrdinalType curInds[],
364  ValueType curVals[],
365  const IndexType midPos,
366  const IndexType endPos,
367  const OrdinalType inputInds[],
368  const ValueType inputVals[],
369  const IndexType numInputInds) {
370  // Optimize for the following cases, in decreasing order of
371  // optimization concern:
372  //
373  // a. Current row has allocated space but no entries
374  // b. All input indices already in the graph
375  //
376  // If the row has insufficient space for a merge, don't do
377  // anything! Just return an upper bound on the number of extra
378  // entries required to fit everything. This imposes extra cost,
379  // but correctly supports the count, allocate, fill, compute
380  // pattern. (If some entries were merged in and others weren't,
381  // how would you know which got merged in? CrsGraph insert is
382  // idempotent, but CrsMatrix insert does a += on the value and
383  // is therefore not idempotent.)
384  if (midPos == 0) {
385  // Current row has no entries, but may have preallocated space.
386  if (endPos >= numInputInds) {
387  // Sufficient space for new entries; copy directly.
388  for (IndexType k = 0; k < numInputInds; ++k) {
389  curInds[k] = inputInds[k];
390  curVals[k] = inputVals[k];
391  }
392  return std::make_pair(true, numInputInds);
393  } else { // not enough space
394  return std::make_pair(false, numInputInds);
395  }
396  } else { // current row contains indices, requiring merge
397  // Only count possible merges; don't merge yet. If the row
398  // doesn't have enough space, we want to return without side
399  // effects.
400  const IndexType mergeCount =
401  countMergeUnsortedIndices<OrdinalType, IndexType>(curInds, midPos,
402  inputInds,
403  numInputInds);
404  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
405  const IndexType newRowLen = midPos + extraSpaceNeeded;
406  if (newRowLen > endPos) {
407  return std::make_pair(false, newRowLen);
408  } else { // we have enough space; merge in
409  // Iterate linearly over input. Scan current entries
410  // repeatedly. Add new entries at end.
411  IndexType newPos = midPos;
412  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
413  const OrdinalType inInd = inputInds[inPos];
414  bool merged = false;
415  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
416  if (curInds[curPos] == inInd) {
417  merged = true;
418  curVals[curPos] += inputVals[inPos];
419  }
420  }
421  if (!merged) {
422  curInds[newPos] = inInd;
423  curVals[newPos] = inputVals[inPos];
424  ++newPos;
425  }
426  }
427  return std::make_pair(true, newPos);
428  }
429  }
430 }
431 
432 } // namespace Details
433 } // namespace Tpetra
434 
435 #endif // TPETRA_DETAILS_MERGE_HPP
std::pair< bool, IndexType > mergeUnsortedIndicesAndValues(OrdinalType curInds[], ValueType curVals[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const ValueType inputVals[], const IndexType numInputInds)
Attempt to merge the input indices and values into the current row&#39;s column indices and corresponding...
std::pair< bool, IndexType > mergeSortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row&#39;s column indices, assuming that both the curr...
IndexType countMergeUnsortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
IndexType countMergeSortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row&#39;s column indices, assuming that both the curr...