Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_Sequential_AddSparseMatrices.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos: Node API and Parallel Node Kernels
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 //@HEADER
41 
42 #ifndef KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP
43 #define KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP
44 
45 #include <Kokkos_ConfigDefs.hpp>
46 #include <functional> // std::plus
47 
48 namespace Kokkos {
49 namespace Sequential {
50 
70 template<class OffsetType, class OrdinalType>
71 OrdinalType
72 countMergedRowEntries (const OffsetType ptr1[],
73  const OrdinalType ind1[],
74  const OffsetType ptr2[],
75  const OrdinalType ind2[],
76  const OrdinalType i);
77 
104 template<class OffsetType,
105  class OrdinalType,
106  class ScalarType,
107  class BinaryFunctionType>
108 OrdinalType
109 mergeRows (const OffsetType ptrOut[],
110  OrdinalType indOut[],
111  ScalarType valOut[],
112  const OffsetType ptr1[],
113  const OrdinalType ind1[],
114  const ScalarType val1[],
115  const OffsetType ptr2[],
116  const OrdinalType ind2[],
117  const ScalarType val2[],
118  const OrdinalType i,
119  BinaryFunctionType f);
120 
126 template<class ScalarType>
128 public:
129  AddMatrixEntries (const ScalarType& alpha, const ScalarType& beta) :
130  alpha_ (alpha), beta_ (beta) {}
131 
132  inline ScalarType
133  operator () (const ScalarType& A_ij, const ScalarType& B_ij) const {
134  return alpha_ * A_ij + beta_ * B_ij;
135  }
136 private:
137  const ScalarType alpha_;
138  const ScalarType beta_;
139 };
140 
160 template<class OffsetType, class OrdinalType, class ScalarType>
161 void
162 addSparseMatrices (OffsetType*& ptrResult,
163  OrdinalType*& indResult,
164  ScalarType*& valResult,
165  const ScalarType alpha,
166  const OffsetType ptr1[],
167  const OrdinalType ind1[],
168  const ScalarType val1[],
169  const ScalarType beta,
170  const OffsetType ptr2[],
171  const OrdinalType ind2[],
172  const ScalarType val2[],
173  const OrdinalType numRows);
174 
175 template<class OffsetType, class OrdinalType>
176 OrdinalType
177 countMergedRowEntries (const OffsetType ptr1[],
178  const OrdinalType ind1[],
179  const OffsetType ptr2[],
180  const OrdinalType ind2[],
181  const OrdinalType i)
182 {
183  const OffsetType start1 = ptr1[i];
184  const OffsetType end1 = ptr1[i+1];
185  const OffsetType start2 = ptr2[i];
186  const OffsetType end2 = ptr2[i+1];
187 
188  OffsetType mark1 = start1, mark2 = start2;
189  OrdinalType count = 0;
190  while (mark1 < end1 && mark2 < end2) {
191  if (ind1[mark1] == ind2[mark2]) {
192  ++mark1;
193  ++mark2;
194  } else if (ind1[mark1] < ind2[mark2]) {
195  ++mark1;
196  } else { // ind1[mark1] > ind2[mark2]
197  ++mark2;
198  }
199  ++count;
200  }
201  // Include any remaining entries.
202  count += end1 - mark1;
203  count += end2 - mark2;
204  return count;
205 }
206 
207 template<class OffsetType,
208  class OrdinalType,
209  class ScalarType,
210  class BinaryFunctionType>
211 OrdinalType
212 mergeRows (const OffsetType ptrOut[],
213  OrdinalType indOut[],
214  ScalarType valOut[],
215  const OffsetType ptr1[],
216  const OrdinalType ind1[],
217  const ScalarType val1[],
218  const OffsetType ptr2[],
219  const OrdinalType ind2[],
220  const ScalarType val2[],
221  const OrdinalType i,
222  BinaryFunctionType f)
223 {
224  const OffsetType startOut = ptrOut[i];
225  const OffsetType endOut = ptrOut[i+1];
226  const OffsetType start1 = ptr1[i];
227  const OffsetType end1 = ptr1[i+1];
228  const OffsetType start2 = ptr2[i];
229  const OffsetType end2 = ptr2[i+1];
230  const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero ();
231 
232  OffsetType mark1 = start1, mark2 = start2, markOut = startOut;
233  while (mark1 < end1 && mark2 < end2 && markOut < endOut) {
234  if (ind1[mark1] == ind2[mark2]) {
235  indOut[markOut] = ind1[mark1];
236  valOut[markOut] = f (val1[mark1], val2[mark2]);
237  ++mark1;
238  ++mark2;
239  } else if (ind1[mark1] < ind2[mark2]) {
240  indOut[markOut] = ind1[mark1];
241  // This makes sense if f(x,y) is alpha*x + beta*y.
242  valOut[markOut] = f (val1[mark1], zero);
243  ++mark1;
244  } else { // ind1[mark1] > ind2[mark2]
245  indOut[markOut] = ind2[mark2];
246  // This makes sense if f(x,y) is alpha*x + beta*y.
247  valOut[markOut] = f (zero, val2[mark2]);
248  ++mark2;
249  }
250  ++markOut;
251  }
252  // Include any remaining entries.
253  while (mark1 < end1 && markOut < endOut) {
254  indOut[markOut] = ind1[mark1];
255  // This makes sense if f(x,y) is alpha*x + beta*y.
256  valOut[markOut] = f (val1[mark1], zero);
257  ++mark1;
258  ++markOut;
259  }
260  while (mark2 < end2 && markOut < endOut) {
261  indOut[markOut] = ind2[mark2];
262  // This makes sense if f(x,y) is alpha*x + beta*y.
263  valOut[markOut] = f (zero, val2[mark2]);
264  ++mark2;
265  ++markOut;
266  }
267  // This is a logic error, because it means either that
268  // countMergedRowEntries didn't work, or that it was called
269  // incorrectly for this row.
271  markOut >= endOut && (mark1 < end1 || mark2 < end2),
272  std::logic_error,
273  "Kokkos::Sequential::mergeRows: Row " << i << " of the output array has "
274  << (end1 - mark1) << " + " << (end2 - mark2) << " too few entries.");
275  return markOut;
276 }
277 
278 template<class OffsetType, class OrdinalType, class ScalarType>
279 void
280 addSparseMatrices (OffsetType*& ptrResult,
281  OrdinalType*& indResult,
282  ScalarType*& valResult,
283  const ScalarType alpha,
284  const OffsetType ptr1[],
285  const OrdinalType ind1[],
286  const ScalarType val1[],
287  const ScalarType beta,
288  const OffsetType ptr2[],
289  const OrdinalType ind2[],
290  const ScalarType val2[],
291  const OrdinalType numRows)
292 {
294 
295  // We don't allocate using ArrayRCP's constructor (that takes the
296  // array size), because it initializes the arrays itself. We want
297  // to initialize ourselves, to ensure first-touch allocation.
298 
299  OffsetType* ptrOut = NULL;
300  OrdinalType* indOut = NULL;
301  ScalarType* valOut = NULL;
302 
303  // Allocate and fill ptrOut.
304  try {
305  // We should be careful not to initialize the array here, so that
306  // the parallelizable loop that assigns to it below will first-touch
307  // initialize it.
308  ptrOut = new OffsetType [numRows + 1];
309 
310  // Expect that the column indices are sorted and merged. Compute
311  // the number of entries in each row. We'll do this in place in the
312  // row offsets array. This may be done safely in parallel. Doing
313  // so would first-touch initialize ptrOut.
314 
315  if (alpha == STS::zero ()) {
316  if (beta == STS::zero ()) {
317  // Special case: alpha == 0, beta == 0.
318  // The resulting sum has zero entries.
319  std::fill (ptrOut, ptrOut + numRows + 1, Teuchos::as<OffsetType> (0));
320  ptrResult = ptrOut;
321  indResult = NULL;
322  valResult = NULL;
323  return;
324  } else { // alpha == 0, beta != 0
325  // Just copy the second matrix, suitably scaled, into the output.
326  memcpy (ptrOut, ptr2, (numRows+1) * sizeof (OffsetType));
327  const OffsetType numEntries = ptr2[numRows];
328  indOut = new OrdinalType [numEntries];
329  memcpy (indOut, ind2, numEntries * sizeof (OrdinalType));
330  valOut = new ScalarType [numEntries];
331  if (beta == STS::one ()) {
332  memcpy (valOut, val2, numEntries * sizeof (ScalarType));
333  } else { // beta != 1
334  for (OrdinalType k = 0; k < numEntries; ++k) {
335  valOut[k] = beta * val2[k];
336  }
337  }
338  ptrResult = ptrOut;
339  indResult = indOut;
340  valResult = valOut;
341  return;
342  }
343  }
344  else if (beta == STS::zero ()) { // alpha != 0, beta == 0
345  // Just copy the first matrix into the output.
346  memcpy (ptrOut, ptr1, (numRows+1) * sizeof (OffsetType));
347  const OffsetType numEntries = ptr1[numRows];
348  indOut = new OrdinalType [numEntries];
349  memcpy (indOut, ind1, numEntries * sizeof (OrdinalType));
350  valOut = new ScalarType [numEntries];
351  if (alpha == STS::one ()) {
352  memcpy (valOut, val1, numEntries * sizeof (ScalarType));
353  } else { // alpha != 1
354  for (OrdinalType k = 0; k < numEntries; ++k) {
355  valOut[k] = alpha * val1[k];
356  }
357  }
358  ptrResult = ptrOut;
359  indResult = indOut;
360  valResult = valOut;
361  return;
362  }
363  else { // alpha != 0 and beta != 0
364  ptrOut[0] = 0;
365  for (OrdinalType i = 0; i < numRows; ++i) {
366  ptrOut[i+1] = countMergedRowEntries (ptr1, ind1, ptr2, ind2, i);
367  }
368  // Sum-scan to compute row offsets.
369  // This may be parallelized via e.g., TBB's parallel_scan().
370  for (OrdinalType i = 1; i < numRows; ++i) {
371  ptrOut[i+1] += ptrOut[i];
372  }
373  }
374  } catch (...) {
375  if (ptrOut != NULL) {
376  delete [] ptrOut;
377  }
378  if (indOut != NULL) {
379  delete [] indOut;
380  }
381  if (valOut != NULL) {
382  delete [] valOut;
383  }
384  throw;
385  }
386  //
387  // Allocate storage for indices and values.
388  //
389  const OffsetType numEntries = ptrOut[numRows];
390 
391  // Allocate and fill indOut and valOut.
392  try {
393  indOut = new OrdinalType [numEntries];
394  valOut = new ScalarType [numEntries];
395 
396  // Merge and add the matrices. This may be done safely in
397  // parallel, since all the arrays have correct sizes and writes to
398  // different rows are independent. We've also already tested for
399  // the special cases alpha == 0 and/or beta == 0.
400  if (alpha == STS::one () && beta == STS::one ()) {
401  for (OrdinalType i = 0; i < numRows; ++i) {
402  (void) mergeRows (ptrOut, indOut, valOut,
403  ptr1, ind1, val1,
404  ptr2, ind2, val2, i,
405  std::plus<ScalarType> ());
406  }
407  } else {
408  AddMatrixEntries<ScalarType> f (alpha, beta);
409  for (OrdinalType i = 0; i < numRows; ++i) {
410  (void) mergeRows (ptrOut, indOut, valOut,
411  ptr1, ind1, val1,
412  ptr2, ind2, val2,
413  i, f);
414  }
415  }
416  } catch (...) {
417  if (indOut != NULL) {
418  delete [] indOut;
419  }
420  if (valOut != NULL) {
421  delete [] valOut;
422  }
423  if (ptrOut != NULL) { // we know it's not, but doesn't hurt to test
424  delete [] ptrOut;
425  }
426  throw;
427  }
428 
429  // "Commit" the output arguments.
430  ptrResult = ptrOut;
431  indResult = indOut;
432  valResult = valOut;
433 }
434 
435 } // namespace Sequential
436 } // namespace Kokkos
437 
438 #endif // KOKKOS_SEQUENTIAL_ADDSPARSEMATRICES_HPP
439 
Compute C_ij = alpha * A_ij + beta * B_ij.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)