Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_Merge.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_MERGE_HPP
43 #define TPETRA_DETAILS_MERGE_HPP
44 
45 #include "TpetraCore_config.h"
46 #include "Teuchos_TestForException.hpp"
47 #include <algorithm> // std::sort
48 #include <utility> // std::pair, std::make_pair
49 #include <stdexcept>
50 
51 namespace Tpetra {
52 namespace Details {
53 
72 template<class OrdinalType, class IndexType>
73 IndexType
74 countMergeUnsortedIndices (const OrdinalType curInds[],
75  const IndexType numCurInds,
76  const OrdinalType inputInds[],
77  const IndexType numInputInds)
78 {
79  IndexType mergeCount = 0;
80 
81  if (numCurInds <= numInputInds) {
82  // More input than current entries, so iterate linearly over
83  // input and scan current entries repeatedly.
84  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
85  const OrdinalType inVal = inputInds[inPos];
86  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
87  if (curInds[curPos] == inVal) {
88  ++mergeCount;
89  }
90  }
91  }
92  }
93  else { // numCurInds > numInputInds
94  // More current entries than input, so iterate linearly over
95  // current entries and scan input repeatedly.
96  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
97  const OrdinalType curVal = curInds[curPos];
98  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
99  if (inputInds[inPos] == curVal) {
100  ++mergeCount;
101  }
102  }
103  }
104  }
105 
106 #ifdef HAVE_TPETRA_DEBUG
107  TEUCHOS_TEST_FOR_EXCEPTION
108  (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
109  mergeCount << " > numInputInds = " << numInputInds << ".");
110 #endif // HAVE_TPETRA_DEBUG
111  return mergeCount;
112 }
113 
131 template<class OrdinalType, class IndexType>
132 IndexType
133 countMergeSortedIndices (const OrdinalType curInds[],
134  const IndexType numCurInds,
135  const OrdinalType inputInds[],
136  const IndexType numInputInds)
137 {
138  // Only count possible merges; don't merge yet. If the row
139  // doesn't have enough space, we want to return without side
140  // effects.
141  IndexType curPos = 0;
142  IndexType inPos = 0;
143  IndexType mergeCount = 0;
144  while (inPos < numInputInds && curPos < numCurInds) {
145  const OrdinalType inVal = inputInds[inPos];
146  const OrdinalType curVal = curInds[curPos];
147 
148  if (curVal == inVal) { // can merge
149  ++mergeCount;
150  ++inPos; // go on to next input
151  } else if (curVal < inVal) {
152  ++curPos; // go on to next row entry
153  } else { // curVal > inVal
154  ++inPos; // can't merge it ever, since row entries sorted
155  }
156  }
157 
158 #ifdef HAVE_TPETRA_DEBUG
159  TEUCHOS_TEST_FOR_EXCEPTION
160  (inPos > numInputInds, std::logic_error, "inPos = " << inPos <<
161  " > numInputInds = " << numInputInds << ".");
162  TEUCHOS_TEST_FOR_EXCEPTION
163  (curPos > numCurInds, std::logic_error, "curPos = " << curPos <<
164  " > numCurInds = " << numCurInds << ".");
165  TEUCHOS_TEST_FOR_EXCEPTION
166  (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
167  mergeCount << " > numInputInds = " << numInputInds << ".");
168 #endif // HAVE_TPETRA_DEBUG
169 
170  // At this point, 2 situations are possible:
171  //
172  // 1. inPos == numInputInds: We looked at all inputs. Some
173  // (mergeCount of them) could have been merged.
174  // 2. inPos < numInputInds: We didn't get to look at all inputs.
175  // Since the inputs are sorted, we know that those inputs we
176  // didn't examine weren't mergeable.
177  //
178  // Either way, mergeCount gives the number of mergeable inputs.
179  return mergeCount;
180 }
181 
182 
203 template<class OrdinalType, class IndexType>
204 std::pair<bool, IndexType>
205 mergeSortedIndices (OrdinalType curInds[],
206  const IndexType midPos,
207  const IndexType endPos,
208  const OrdinalType inputInds[],
209  const IndexType numInputInds)
210 {
211  // Optimize for the following cases, in decreasing order of
212  // optimization concern:
213  //
214  // a. Current row has allocated space but no entries
215  // b. All input indices already in the graph
216  //
217  // If the row has insufficient space for a merge, don't do
218  // anything! Just return an upper bound on the number of extra
219  // entries required to fit everything. This imposes extra cost,
220  // but correctly supports the count, allocate, fill, compute
221  // pattern. (If some entries were merged in and others weren't,
222  // how would you know which got merged in? CrsGraph insert is
223  // idempotent, but CrsMatrix insert does a += on the value and
224  // is therefore not idempotent.)
225  if (midPos == 0) {
226  // Current row has no entries, but may have preallocated space.
227  if (endPos >= numInputInds) {
228  // Sufficient space for new entries; copy directly.
229  for (IndexType k = 0; k < numInputInds; ++k) {
230  curInds[k] = inputInds[k];
231  }
232  std::sort (curInds, curInds + numInputInds);
233  return std::make_pair (true, numInputInds);
234  }
235  else { // not enough space
236  return std::make_pair (false, numInputInds);
237  }
238  }
239  else { // current row contains indices, requiring merge
240  // Only count possible merges; don't merge yet. If the row
241  // doesn't have enough space, we want to return without side
242  // effects.
243  const IndexType mergeCount =
244  countMergeSortedIndices<OrdinalType, IndexType> (curInds, midPos,
245  inputInds,
246  numInputInds);
247  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
248  const IndexType newRowLen = midPos + extraSpaceNeeded;
249  if (newRowLen > endPos) {
250  return std::make_pair (false, newRowLen);
251  }
252  else { // we have enough space; merge in
253  IndexType curPos = 0;
254  IndexType inPos = 0;
255  IndexType newPos = midPos;
256  while (inPos < numInputInds && curPos < midPos) {
257  const OrdinalType inVal = inputInds[inPos];
258  const OrdinalType curVal = curInds[curPos];
259 
260  if (curVal == inVal) { // can merge
261  ++inPos; // merge and go on to next input
262  } else if (curVal < inVal) {
263  ++curPos; // go on to next row entry
264  } else { // curVal > inVal
265  // The input doesn't exist in the row.
266  // Copy it to the end; we'll sort it in later.
267  curInds[newPos] = inVal;
268  ++newPos;
269  ++inPos; // move on to next input
270  }
271  }
272 
273  // If any inputs remain, and the current row has space for them,
274  // then copy them in. We'll sort them later.
275  for (; inPos < numInputInds && newPos < newRowLen; ++inPos, ++newPos) {
276  curInds[newPos] = inputInds[inPos];
277  }
278 
279 #ifdef HAVE_TPETRA_DEBUG
280  TEUCHOS_TEST_FOR_EXCEPTION
281  (newPos != newRowLen, std::logic_error, "mergeSortedIndices: newPos = "
282  << newPos << " != newRowLen = " << newRowLen << " = " << midPos <<
283  " + " << extraSpaceNeeded << ". Please report this bug to the Tpetra "
284  "developers.");
285 #endif // HAVE_TPETRA_DEBUG
286 
287  if (newPos != midPos) { // new entries at end; sort them in
288  // FIXME (mfh 03 Jan 2016) Rather than sorting, it would
289  // be faster (linear time) just to iterate backwards
290  // through the current entries, pushing them over to make
291  // room for unmerged input. However, I'm not so worried
292  // about the asymptotics here, because dense rows in a
293  // sparse matrix are ungood anyway.
294  std::sort (curInds, curInds + newPos);
295  }
296  return std::make_pair (true, newPos);
297  }
298  }
299 }
300 
301 
323 template<class OrdinalType, class IndexType>
324 std::pair<bool, IndexType>
325 mergeUnsortedIndices (OrdinalType curInds[],
326  const IndexType midPos,
327  const IndexType endPos,
328  const OrdinalType inputInds[],
329  const IndexType numInputInds)
330 {
331  // Optimize for the following cases, in decreasing order of
332  // optimization concern:
333  //
334  // a. Current row has allocated space but no entries
335  // b. All input indices already in the graph
336  //
337  // If the row has insufficient space for a merge, don't do
338  // anything! Just return an upper bound on the number of extra
339  // entries required to fit everything. This imposes extra cost,
340  // but correctly supports the count, allocate, fill, compute
341  // pattern. (If some entries were merged in and others weren't,
342  // how would you know which got merged in? CrsGraph insert is
343  // idempotent, but CrsMatrix insert does a += on the value and
344  // is therefore not idempotent.)
345  if (midPos == 0) {
346  // Current row has no entries, but may have preallocated space.
347  if (endPos >= numInputInds) {
348  // Sufficient space for new entries; copy directly.
349  for (IndexType k = 0; k < numInputInds; ++k) {
350  curInds[k] = inputInds[k];
351  }
352  return std::make_pair (true, numInputInds);
353  }
354  else { // not enough space
355  return std::make_pair (false, numInputInds);
356  }
357  }
358  else { // current row contains indices, requiring merge
359  // Only count possible merges; don't merge yet. If the row
360  // doesn't have enough space, we want to return without side
361  // effects.
362  const IndexType mergeCount =
363  countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
364  inputInds,
365  numInputInds);
366  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
367  const IndexType newRowLen = midPos + extraSpaceNeeded;
368  if (newRowLen > endPos) {
369  return std::make_pair (false, newRowLen);
370  }
371  else { // we have enough space; merge in
372  // Iterate linearly over input. Scan current entries
373  // repeatedly. Add new entries at end.
374  IndexType newPos = midPos;
375  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
376  const OrdinalType inVal = inputInds[inPos];
377  bool merged = false;
378  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
379  if (curInds[curPos] == inVal) {
380  merged = true;
381  }
382  }
383  if (! merged) {
384  curInds[newPos] = inVal;
385  ++newPos;
386  }
387  }
388  return std::make_pair (true, newPos);
389  }
390  }
391 }
392 
417 template<class OrdinalType, class ValueType, class IndexType>
418 std::pair<bool, IndexType>
419 mergeUnsortedIndicesAndValues (OrdinalType curInds[],
420  ValueType curVals[],
421  const IndexType midPos,
422  const IndexType endPos,
423  const OrdinalType inputInds[],
424  const ValueType inputVals[],
425  const IndexType numInputInds)
426 {
427  // Optimize for the following cases, in decreasing order of
428  // optimization concern:
429  //
430  // a. Current row has allocated space but no entries
431  // b. All input indices already in the graph
432  //
433  // If the row has insufficient space for a merge, don't do
434  // anything! Just return an upper bound on the number of extra
435  // entries required to fit everything. This imposes extra cost,
436  // but correctly supports the count, allocate, fill, compute
437  // pattern. (If some entries were merged in and others weren't,
438  // how would you know which got merged in? CrsGraph insert is
439  // idempotent, but CrsMatrix insert does a += on the value and
440  // is therefore not idempotent.)
441  if (midPos == 0) {
442  // Current row has no entries, but may have preallocated space.
443  if (endPos >= numInputInds) {
444  // Sufficient space for new entries; copy directly.
445  for (IndexType k = 0; k < numInputInds; ++k) {
446  curInds[k] = inputInds[k];
447  curVals[k] = inputVals[k];
448  }
449  return std::make_pair (true, numInputInds);
450  }
451  else { // not enough space
452  return std::make_pair (false, numInputInds);
453  }
454  }
455  else { // current row contains indices, requiring merge
456  // Only count possible merges; don't merge yet. If the row
457  // doesn't have enough space, we want to return without side
458  // effects.
459  const IndexType mergeCount =
460  countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
461  inputInds,
462  numInputInds);
463  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
464  const IndexType newRowLen = midPos + extraSpaceNeeded;
465  if (newRowLen > endPos) {
466  return std::make_pair (false, newRowLen);
467  }
468  else { // we have enough space; merge in
469  // Iterate linearly over input. Scan current entries
470  // repeatedly. Add new entries at end.
471  IndexType newPos = midPos;
472  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
473  const OrdinalType inInd = inputInds[inPos];
474  bool merged = false;
475  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
476  if (curInds[curPos] == inInd) {
477  merged = true;
478  curVals[curPos] += inputVals[inPos];
479  }
480  }
481  if (! merged) {
482  curInds[newPos] = inInd;
483  curVals[newPos] = inputVals[inPos];
484  ++newPos;
485  }
486  }
487  return std::make_pair (true, newPos);
488  }
489  }
490 }
491 
492 } // namespace Details
493 } // namespace Tpetra
494 
495 #endif // TPETRA_DETAILS_MERGE_HPP
std::pair< bool, IndexType > mergeUnsortedIndicesAndValues(OrdinalType curInds[], ValueType curVals[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const ValueType inputVals[], const IndexType numInputInds)
Attempt to merge the input indices and values into the current row&#39;s column indices and corresponding...
std::pair< bool, IndexType > mergeSortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row&#39;s column indices, assuming that both the curr...
IndexType countMergeUnsortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
IndexType countMergeSortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row&#39;s column indices, assuming that both the curr...