Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_Merge.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_MERGE_HPP
11 #define TPETRA_DETAILS_MERGE_HPP
12 
13 #include "TpetraCore_config.h"
14 #include "Teuchos_TestForException.hpp"
15 #include <algorithm> // std::sort
16 #include <utility> // std::pair, std::make_pair
17 #include <stdexcept>
18 
19 namespace Tpetra {
20 namespace Details {
21 
40 template<class OrdinalType, class IndexType>
41 IndexType
42 countMergeUnsortedIndices (const OrdinalType curInds[],
43  const IndexType numCurInds,
44  const OrdinalType inputInds[],
45  const IndexType numInputInds)
46 {
47  IndexType mergeCount = 0;
48 
49  if (numCurInds <= numInputInds) {
50  // More input than current entries, so iterate linearly over
51  // input and scan current entries repeatedly.
52  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
53  const OrdinalType inVal = inputInds[inPos];
54  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
55  if (curInds[curPos] == inVal) {
56  ++mergeCount;
57  }
58  }
59  }
60  }
61  else { // numCurInds > numInputInds
62  // More current entries than input, so iterate linearly over
63  // current entries and scan input repeatedly.
64  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
65  const OrdinalType curVal = curInds[curPos];
66  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
67  if (inputInds[inPos] == curVal) {
68  ++mergeCount;
69  }
70  }
71  }
72  }
73 
74 #ifdef HAVE_TPETRA_DEBUG
75  TEUCHOS_TEST_FOR_EXCEPTION
76  (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
77  mergeCount << " > numInputInds = " << numInputInds << ".");
78 #endif // HAVE_TPETRA_DEBUG
79  return mergeCount;
80 }
81 
99 template<class OrdinalType, class IndexType>
100 IndexType
101 countMergeSortedIndices (const OrdinalType curInds[],
102  const IndexType numCurInds,
103  const OrdinalType inputInds[],
104  const IndexType numInputInds)
105 {
106  // Only count possible merges; don't merge yet. If the row
107  // doesn't have enough space, we want to return without side
108  // effects.
109  IndexType curPos = 0;
110  IndexType inPos = 0;
111  IndexType mergeCount = 0;
112  while (inPos < numInputInds && curPos < numCurInds) {
113  const OrdinalType inVal = inputInds[inPos];
114  const OrdinalType curVal = curInds[curPos];
115 
116  if (curVal == inVal) { // can merge
117  ++mergeCount;
118  ++inPos; // go on to next input
119  } else if (curVal < inVal) {
120  ++curPos; // go on to next row entry
121  } else { // curVal > inVal
122  ++inPos; // can't merge it ever, since row entries sorted
123  }
124  }
125 
126 #ifdef HAVE_TPETRA_DEBUG
127  TEUCHOS_TEST_FOR_EXCEPTION
128  (inPos > numInputInds, std::logic_error, "inPos = " << inPos <<
129  " > numInputInds = " << numInputInds << ".");
130  TEUCHOS_TEST_FOR_EXCEPTION
131  (curPos > numCurInds, std::logic_error, "curPos = " << curPos <<
132  " > numCurInds = " << numCurInds << ".");
133  TEUCHOS_TEST_FOR_EXCEPTION
134  (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
135  mergeCount << " > numInputInds = " << numInputInds << ".");
136 #endif // HAVE_TPETRA_DEBUG
137 
138  // At this point, 2 situations are possible:
139  //
140  // 1. inPos == numInputInds: We looked at all inputs. Some
141  // (mergeCount of them) could have been merged.
142  // 2. inPos < numInputInds: We didn't get to look at all inputs.
143  // Since the inputs are sorted, we know that those inputs we
144  // didn't examine weren't mergeable.
145  //
146  // Either way, mergeCount gives the number of mergeable inputs.
147  return mergeCount;
148 }
149 
150 
171 template<class OrdinalType, class IndexType>
172 std::pair<bool, IndexType>
173 mergeSortedIndices (OrdinalType curInds[],
174  const IndexType midPos,
175  const IndexType endPos,
176  const OrdinalType inputInds[],
177  const IndexType numInputInds)
178 {
179  // Optimize for the following cases, in decreasing order of
180  // optimization concern:
181  //
182  // a. Current row has allocated space but no entries
183  // b. All input indices already in the graph
184  //
185  // If the row has insufficient space for a merge, don't do
186  // anything! Just return an upper bound on the number of extra
187  // entries required to fit everything. This imposes extra cost,
188  // but correctly supports the count, allocate, fill, compute
189  // pattern. (If some entries were merged in and others weren't,
190  // how would you know which got merged in? CrsGraph insert is
191  // idempotent, but CrsMatrix insert does a += on the value and
192  // is therefore not idempotent.)
193  if (midPos == 0) {
194  // Current row has no entries, but may have preallocated space.
195  if (endPos >= numInputInds) {
196  // Sufficient space for new entries; copy directly.
197  for (IndexType k = 0; k < numInputInds; ++k) {
198  curInds[k] = inputInds[k];
199  }
200  std::sort (curInds, curInds + numInputInds);
201  return std::make_pair (true, numInputInds);
202  }
203  else { // not enough space
204  return std::make_pair (false, numInputInds);
205  }
206  }
207  else { // current row contains indices, requiring merge
208  // Only count possible merges; don't merge yet. If the row
209  // doesn't have enough space, we want to return without side
210  // effects.
211  const IndexType mergeCount =
212  countMergeSortedIndices<OrdinalType, IndexType> (curInds, midPos,
213  inputInds,
214  numInputInds);
215  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
216  const IndexType newRowLen = midPos + extraSpaceNeeded;
217  if (newRowLen > endPos) {
218  return std::make_pair (false, newRowLen);
219  }
220  else { // we have enough space; merge in
221  IndexType curPos = 0;
222  IndexType inPos = 0;
223  IndexType newPos = midPos;
224  while (inPos < numInputInds && curPos < midPos) {
225  const OrdinalType inVal = inputInds[inPos];
226  const OrdinalType curVal = curInds[curPos];
227 
228  if (curVal == inVal) { // can merge
229  ++inPos; // merge and go on to next input
230  } else if (curVal < inVal) {
231  ++curPos; // go on to next row entry
232  } else { // curVal > inVal
233  // The input doesn't exist in the row.
234  // Copy it to the end; we'll sort it in later.
235  curInds[newPos] = inVal;
236  ++newPos;
237  ++inPos; // move on to next input
238  }
239  }
240 
241  // If any inputs remain, and the current row has space for them,
242  // then copy them in. We'll sort them later.
243  for (; inPos < numInputInds && newPos < newRowLen; ++inPos, ++newPos) {
244  curInds[newPos] = inputInds[inPos];
245  }
246 
247 #ifdef HAVE_TPETRA_DEBUG
248  TEUCHOS_TEST_FOR_EXCEPTION
249  (newPos != newRowLen, std::logic_error, "mergeSortedIndices: newPos = "
250  << newPos << " != newRowLen = " << newRowLen << " = " << midPos <<
251  " + " << extraSpaceNeeded << ". Please report this bug to the Tpetra "
252  "developers.");
253 #endif // HAVE_TPETRA_DEBUG
254 
255  if (newPos != midPos) { // new entries at end; sort them in
256  // FIXME (mfh 03 Jan 2016) Rather than sorting, it would
257  // be faster (linear time) just to iterate backwards
258  // through the current entries, pushing them over to make
259  // room for unmerged input. However, I'm not so worried
260  // about the asymptotics here, because dense rows in a
261  // sparse matrix are ungood anyway.
262  std::sort (curInds, curInds + newPos);
263  }
264  return std::make_pair (true, newPos);
265  }
266  }
267 }
268 
269 
291 template<class OrdinalType, class IndexType>
292 std::pair<bool, IndexType>
293 mergeUnsortedIndices (OrdinalType curInds[],
294  const IndexType midPos,
295  const IndexType endPos,
296  const OrdinalType inputInds[],
297  const IndexType numInputInds)
298 {
299  // Optimize for the following cases, in decreasing order of
300  // optimization concern:
301  //
302  // a. Current row has allocated space but no entries
303  // b. All input indices already in the graph
304  //
305  // If the row has insufficient space for a merge, don't do
306  // anything! Just return an upper bound on the number of extra
307  // entries required to fit everything. This imposes extra cost,
308  // but correctly supports the count, allocate, fill, compute
309  // pattern. (If some entries were merged in and others weren't,
310  // how would you know which got merged in? CrsGraph insert is
311  // idempotent, but CrsMatrix insert does a += on the value and
312  // is therefore not idempotent.)
313  if (midPos == 0) {
314  // Current row has no entries, but may have preallocated space.
315  if (endPos >= numInputInds) {
316  // Sufficient space for new entries; copy directly.
317  for (IndexType k = 0; k < numInputInds; ++k) {
318  curInds[k] = inputInds[k];
319  }
320  return std::make_pair (true, numInputInds);
321  }
322  else { // not enough space
323  return std::make_pair (false, numInputInds);
324  }
325  }
326  else { // current row contains indices, requiring merge
327  // Only count possible merges; don't merge yet. If the row
328  // doesn't have enough space, we want to return without side
329  // effects.
330  const IndexType mergeCount =
331  countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
332  inputInds,
333  numInputInds);
334  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
335  const IndexType newRowLen = midPos + extraSpaceNeeded;
336  if (newRowLen > endPos) {
337  return std::make_pair (false, newRowLen);
338  }
339  else { // we have enough space; merge in
340  // Iterate linearly over input. Scan current entries
341  // repeatedly. Add new entries at end.
342  IndexType newPos = midPos;
343  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
344  const OrdinalType inVal = inputInds[inPos];
345  bool merged = false;
346  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
347  if (curInds[curPos] == inVal) {
348  merged = true;
349  }
350  }
351  if (! merged) {
352  curInds[newPos] = inVal;
353  ++newPos;
354  }
355  }
356  return std::make_pair (true, newPos);
357  }
358  }
359 }
360 
385 template<class OrdinalType, class ValueType, class IndexType>
386 std::pair<bool, IndexType>
387 mergeUnsortedIndicesAndValues (OrdinalType curInds[],
388  ValueType curVals[],
389  const IndexType midPos,
390  const IndexType endPos,
391  const OrdinalType inputInds[],
392  const ValueType inputVals[],
393  const IndexType numInputInds)
394 {
395  // Optimize for the following cases, in decreasing order of
396  // optimization concern:
397  //
398  // a. Current row has allocated space but no entries
399  // b. All input indices already in the graph
400  //
401  // If the row has insufficient space for a merge, don't do
402  // anything! Just return an upper bound on the number of extra
403  // entries required to fit everything. This imposes extra cost,
404  // but correctly supports the count, allocate, fill, compute
405  // pattern. (If some entries were merged in and others weren't,
406  // how would you know which got merged in? CrsGraph insert is
407  // idempotent, but CrsMatrix insert does a += on the value and
408  // is therefore not idempotent.)
409  if (midPos == 0) {
410  // Current row has no entries, but may have preallocated space.
411  if (endPos >= numInputInds) {
412  // Sufficient space for new entries; copy directly.
413  for (IndexType k = 0; k < numInputInds; ++k) {
414  curInds[k] = inputInds[k];
415  curVals[k] = inputVals[k];
416  }
417  return std::make_pair (true, numInputInds);
418  }
419  else { // not enough space
420  return std::make_pair (false, numInputInds);
421  }
422  }
423  else { // current row contains indices, requiring merge
424  // Only count possible merges; don't merge yet. If the row
425  // doesn't have enough space, we want to return without side
426  // effects.
427  const IndexType mergeCount =
428  countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
429  inputInds,
430  numInputInds);
431  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
432  const IndexType newRowLen = midPos + extraSpaceNeeded;
433  if (newRowLen > endPos) {
434  return std::make_pair (false, newRowLen);
435  }
436  else { // we have enough space; merge in
437  // Iterate linearly over input. Scan current entries
438  // repeatedly. Add new entries at end.
439  IndexType newPos = midPos;
440  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
441  const OrdinalType inInd = inputInds[inPos];
442  bool merged = false;
443  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
444  if (curInds[curPos] == inInd) {
445  merged = true;
446  curVals[curPos] += inputVals[inPos];
447  }
448  }
449  if (! merged) {
450  curInds[newPos] = inInd;
451  curVals[newPos] = inputVals[inPos];
452  ++newPos;
453  }
454  }
455  return std::make_pair (true, newPos);
456  }
457  }
458 }
459 
460 } // namespace Details
461 } // namespace Tpetra
462 
463 #endif // TPETRA_DETAILS_MERGE_HPP
std::pair< bool, IndexType > mergeUnsortedIndicesAndValues(OrdinalType curInds[], ValueType curVals[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const ValueType inputVals[], const IndexType numInputInds)
Attempt to merge the input indices and values into the current row&#39;s column indices and corresponding...
std::pair< bool, IndexType > mergeSortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row&#39;s column indices, assuming that both the curr...
IndexType countMergeUnsortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
IndexType countMergeSortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row&#39;s column indices, assuming that both the curr...