MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_MatrixConstruction.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_MATRIXCONSTRUCTION_HPP
11 #define MUELU_MATRIXCONSTRUCTION_HPP
12 
13 #include "Kokkos_Core.hpp"
14 #include "Kokkos_ArithTraits.hpp"
15 
16 #include "MueLu_DroppingCommon.hpp"
17 
18 #include "Xpetra_MatrixFactory.hpp"
19 
20 #ifdef MUELU_COALESCE_DROP_DEBUG
21 // For demangling function names
22 #include <cxxabi.h>
23 #endif
24 
25 namespace MueLu::MatrixConstruction {
36 template <class local_matrix_type, class functor_type, class... remaining_functor_types>
38  private:
39  using scalar_type = typename local_matrix_type::value_type;
40  using local_ordinal_type = typename local_matrix_type::ordinal_type;
41  using memory_space = typename local_matrix_type::memory_space;
42  using results_view = Kokkos::View<DecisionType*, memory_space>;
43 
44  using rowptr_type = typename local_matrix_type::row_map_type::non_const_type;
45 
46  local_matrix_type A;
49  functor_type functor;
50  PointwiseCountingFunctor<local_matrix_type, remaining_functor_types...> remainingFunctors;
52 
53 #ifdef MUELU_COALESCE_DROP_DEBUG
54  std::string functorName;
55 #endif
56 
57  public:
58  PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type& functor_, remaining_functor_types&... remainingFunctors_)
59  : A(A_)
60  , results(results_)
61  , rowptr(rowptr_)
62  , functor(functor_)
63  , remainingFunctors(A_, results_, rowptr_, false, remainingFunctors_...)
64  , firstFunctor(true) {
65 #ifdef MUELU_COALESCE_DROP_DEBUG
66  std::string mangledFunctorName = typeid(decltype(functor)).name();
67  int status = 0;
68  char* demangledFunctorName = 0;
69  demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status);
70  functorName = demangledFunctorName;
71 #endif
72  }
73 
74  PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, bool firstFunctor_, functor_type& functor_, remaining_functor_types&... remainingFunctors_)
75  : A(A_)
76  , results(results_)
77  , rowptr(rowptr_)
78  , functor(functor_)
79  , remainingFunctors(A_, results_, rowptr_, false, remainingFunctors_...)
80  , firstFunctor(firstFunctor_) {
81 #ifdef MUELU_COALESCE_DROP_DEBUG
82  std::string mangledFunctorName = typeid(decltype(functor)).name();
83  int status = 0;
84  char* demangledFunctorName = 0;
85  demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status);
86  functorName = demangledFunctorName;
87 #endif
88  }
89 
90  KOKKOS_INLINE_FUNCTION
91  void operator()(const local_ordinal_type rlid, local_ordinal_type& nnz, const bool& final) const {
92 #ifdef MUELU_COALESCE_DROP_DEBUG
93  if (firstFunctor) {
94  Kokkos::printf("\nStarting on row %d\n", rlid);
95 
96  auto row = A.rowConst(rlid);
97 
98  Kokkos::printf("indices: ");
99  for (local_ordinal_type k = 0; k < row.length; ++k) {
100  auto clid = row.colidx(k);
101  Kokkos::printf("%5d ", clid);
102  }
103  Kokkos::printf("\n");
104 
105  Kokkos::printf("values: ");
106  for (local_ordinal_type k = 0; k < row.length; ++k) {
107  auto val = row.value(k);
108  Kokkos::printf("%5f ", val);
109  }
110  Kokkos::printf("\n");
111  }
112 #endif
113 
114  functor(rlid);
115 
116 #ifdef MUELU_COALESCE_DROP_DEBUG
117  {
118  Kokkos::printf("%s\n", functorName.c_str());
119 
120  auto row = A.rowConst(rlid);
121  const size_t offset = A.graph.row_map(rlid);
122 
123  Kokkos::printf("decisions: ");
124  for (local_ordinal_type k = 0; k < row.length; ++k) {
125  Kokkos::printf("%5d ", results(offset + k));
126  }
127  Kokkos::printf("\n");
128  }
129 #endif
130 
131  remainingFunctors(rlid, nnz, final);
132  }
133 };
134 
135 template <class local_matrix_type, class functor_type>
136 class PointwiseCountingFunctor<local_matrix_type, functor_type> {
137  private:
138  using scalar_type = typename local_matrix_type::value_type;
139  using local_ordinal_type = typename local_matrix_type::ordinal_type;
140  using memory_space = typename local_matrix_type::memory_space;
141  using results_view = Kokkos::View<DecisionType*, memory_space>;
142 
143  using rowptr_type = typename local_matrix_type::row_map_type::non_const_type;
144 
145  local_matrix_type A;
148  functor_type functor;
150 
151 #ifdef MUELU_COALESCE_DROP_DEBUG
152  std::string functorName;
153 #endif
154 
155  public:
156  PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, functor_type& functor_)
157  : A(A_)
158  , results(results_)
159  , rowptr(rowptr_)
160  , functor(functor_)
161  , firstFunctor(true) {
162 #ifdef MUELU_COALESCE_DROP_DEBUG
163  std::string mangledFunctorName = typeid(decltype(functor)).name();
164  int status = 0;
165  char* demangledFunctorName = 0;
166  demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status);
167  functorName = demangledFunctorName;
168 #endif
169  }
170 
171  PointwiseCountingFunctor(local_matrix_type& A_, results_view& results_, rowptr_type& rowptr_, bool firstFunctor_, functor_type& functor_)
172  : A(A_)
173  , results(results_)
174  , rowptr(rowptr_)
175  , functor(functor_)
176  , firstFunctor(firstFunctor_) {
177 #ifdef MUELU_COALESCE_DROP_DEBUG
178  std::string mangledFunctorName = typeid(decltype(functor)).name();
179  int status = 0;
180  char* demangledFunctorName = 0;
181  demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status);
182  functorName = demangledFunctorName;
183 #endif
184  }
185 
186  KOKKOS_INLINE_FUNCTION
187  void operator()(const local_ordinal_type rlid, local_ordinal_type& nnz, const bool& final) const {
188 #ifdef MUELU_COALESCE_DROP_DEBUG
189  if (firstFunctor) {
190  Kokkos::printf("\nStarting on row %d\n", rlid);
191 
192  auto row = A.rowConst(rlid);
193 
194  Kokkos::printf("indices: ");
195  for (local_ordinal_type k = 0; k < row.length; ++k) {
196  auto clid = row.colidx(k);
197  Kokkos::printf("%5d ", clid);
198  }
199  Kokkos::printf("\n");
200 
201  Kokkos::printf("values: ");
202  for (local_ordinal_type k = 0; k < row.length; ++k) {
203  auto val = row.value(k);
204  Kokkos::printf("%5f ", val);
205  }
206  Kokkos::printf("\n");
207  }
208 #endif
209 
210  functor(rlid);
211 
212 #ifdef MUELU_COALESCE_DROP_DEBUG
213  Kokkos::printf("%s\n", functorName);
214 
215  auto row = A.rowConst(rlid);
216  const size_t offset = A.graph.row_map(rlid);
217 
218  Kokkos::printf("decisions: ");
219  for (local_ordinal_type k = 0; k < row.length; ++k) {
220  Kokkos::printf("%5d ", results(offset + k));
221  }
222 
223  Kokkos::printf("\n");
224  Kokkos::printf("Done with row %d\n", rlid);
225 #endif
226 
227  size_t start = A.graph.row_map(rlid);
228  size_t end = A.graph.row_map(rlid + 1);
229  for (size_t i = start; i < end; ++i) {
230  if (results(i) == KEEP) {
231  ++nnz;
232  }
233  }
234  if (final)
235  rowptr(rlid + 1) = nnz;
236  }
237 };
238 
247 template <class local_matrix_type, class local_graph_type, bool lumping>
249  private:
250  using scalar_type = typename local_matrix_type::value_type;
251  using local_ordinal_type = typename local_matrix_type::ordinal_type;
252  using memory_space = typename local_matrix_type::memory_space;
253  using results_view = Kokkos::View<DecisionType*, memory_space>;
254  using ATS = Kokkos::ArithTraits<scalar_type>;
255  using magnitudeType = typename ATS::magnitudeType;
256 
257  local_matrix_type A;
259  local_matrix_type filteredA;
260  local_graph_type graph;
262  const scalar_type zero = ATS::zero();
263  const scalar_type one = ATS::one();
264 
265  public:
266  PointwiseFillReuseFunctor(local_matrix_type& A_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_, magnitudeType dirichletThreshold_)
267  : A(A_)
268  , results(results_)
269  , filteredA(filteredA_)
270  , graph(graph_)
271  , dirichletThreshold(dirichletThreshold_) {}
272 
273  KOKKOS_INLINE_FUNCTION
274  void operator()(const local_ordinal_type rlid) const {
275  auto rowA = A.row(rlid);
276  size_t row_start = A.graph.row_map(rlid);
277  auto rowFilteredA = filteredA.row(rlid);
278  local_ordinal_type j = 0;
279  local_ordinal_type jj = 0;
280  local_ordinal_type graph_offset = graph.row_map(rlid);
281  scalar_type diagCorrection = zero;
282  local_ordinal_type diagOffset = -1;
283  for (local_ordinal_type k = 0; k < rowA.length; ++k) {
284  if constexpr (lumping) {
285  local_ordinal_type clid = rowA.colidx(k);
286  if (rlid == clid) {
287  diagOffset = j;
288  }
289  }
290  if (results(row_start + k) == KEEP) {
291  rowFilteredA.colidx(j) = rowA.colidx(k);
292  rowFilteredA.value(j) = rowA.value(k);
293  ++j;
294  graph.entries(graph_offset + jj) = rowA.colidx(k);
295  ++jj;
296  } else if constexpr (lumping) {
297  diagCorrection += rowA.value(k);
298  rowFilteredA.colidx(j) = rowA.colidx(k);
299  rowFilteredA.value(j) = zero;
300  ++j;
301  } else {
302  rowFilteredA.colidx(j) = rowA.colidx(k);
303  rowFilteredA.value(j) = zero;
304  ++j;
305  }
306  }
307  if constexpr (lumping) {
308  rowFilteredA.value(diagOffset) += diagCorrection;
309  if ((dirichletThreshold >= 0.0) && (ATS::real(rowFilteredA.value(diagOffset)) <= dirichletThreshold))
310  rowFilteredA.value(diagOffset) = one;
311  }
312  }
313 };
314 
322 template <class local_matrix_type, bool lumping>
324  private:
325  using scalar_type = typename local_matrix_type::value_type;
326  using local_ordinal_type = typename local_matrix_type::ordinal_type;
327  using memory_space = typename local_matrix_type::memory_space;
328  using results_view = Kokkos::View<DecisionType*, memory_space>;
329  using ATS = Kokkos::ArithTraits<scalar_type>;
330  using magnitudeType = typename ATS::magnitudeType;
331 
332  local_matrix_type A;
334  local_matrix_type filteredA;
336  const scalar_type zero = ATS::zero();
337  const scalar_type one = ATS::one();
338 
339  public:
340  PointwiseFillNoReuseFunctor(local_matrix_type& A_, results_view& results_, local_matrix_type& filteredA_, magnitudeType dirichletThreshold_)
341  : A(A_)
342  , results(results_)
343  , filteredA(filteredA_)
344  , dirichletThreshold(dirichletThreshold_) {}
345 
346  KOKKOS_INLINE_FUNCTION
347  void operator()(const local_ordinal_type rlid) const {
348  auto rowA = A.row(rlid);
349  size_t K = A.graph.row_map(rlid);
350  auto rowFilteredA = filteredA.row(rlid);
351  local_ordinal_type j = 0;
352  scalar_type diagCorrection = zero;
353  local_ordinal_type diagOffset = -1;
354  for (local_ordinal_type k = 0; k < rowA.length; ++k) {
355  if constexpr (lumping) {
356  local_ordinal_type clid = rowA.colidx(k);
357  if (rlid == clid) {
358  diagOffset = j;
359  }
360  }
361  if (results(K + k) == KEEP) {
362  rowFilteredA.colidx(j) = rowA.colidx(k);
363  rowFilteredA.value(j) = rowA.value(k);
364  ++j;
365  } else if constexpr (lumping) {
366  diagCorrection += rowA.value(k);
367  }
368  }
369  if constexpr (lumping) {
370  rowFilteredA.value(diagOffset) += diagCorrection;
371  if ((dirichletThreshold >= 0.0) && (ATS::real(rowFilteredA.value(diagOffset)) <= dirichletThreshold))
372  rowFilteredA.value(diagOffset) = one;
373  }
374  }
375 };
376 
377 template <class local_matrix_type>
379  public:
380  using local_ordinal_type = typename local_matrix_type::ordinal_type;
381  using memory_space = typename local_matrix_type::memory_space;
382  using block_indices_view_type = Kokkos::View<local_ordinal_type*, memory_space>;
383 
384  local_matrix_type A;
387 
388  public:
389  BlockRowComparison(local_matrix_type& A_, local_ordinal_type bsize_, block_indices_view_type ghosted_point_to_block_)
390  : A(A_)
391  , bsize(bsize_)
392  , ghosted_point_to_block(ghosted_point_to_block_) {}
393 
394  template <class local_matrix_type2>
395  struct Comparator {
396  private:
397  using local_ordinal_type = typename local_matrix_type2::ordinal_type;
398  using memory_space = typename local_matrix_type2::memory_space;
399  using block_indices_view_type = Kokkos::View<local_ordinal_type*, memory_space>;
400 
401  const local_matrix_type2 A;
404 
405  public:
406  KOKKOS_INLINE_FUNCTION
407  Comparator(const local_matrix_type2& A_, local_ordinal_type bsize_, local_ordinal_type brlid_, block_indices_view_type ghosted_point_to_block_)
408  : A(A_)
409  , offset(A_.graph.row_map(bsize_ * brlid_))
410  , ghosted_point_to_block(ghosted_point_to_block_) {}
411 
412  KOKKOS_INLINE_FUNCTION
413  bool operator()(size_t x, size_t y) const {
414  return ghosted_point_to_block(A.graph.entries(offset + x)) < ghosted_point_to_block(A.graph.entries(offset + y));
415  }
416  };
417 
419 
420  KOKKOS_INLINE_FUNCTION
423  }
424 };
425 
436 template <class local_matrix_type,
437  class functor_type,
438  class... remaining_functor_types>
440  private:
441  using scalar_type = typename local_matrix_type::value_type;
442  using local_ordinal_type = typename local_matrix_type::ordinal_type;
443  using memory_space = typename local_matrix_type::memory_space;
444  using results_view = Kokkos::View<DecisionType*, memory_space>;
445  using block_indices_view_type = Kokkos::View<local_ordinal_type*, memory_space>;
446  using permutation_type = Kokkos::View<local_ordinal_type*, memory_space>;
447 
448  using rowptr_type = typename local_matrix_type::row_map_type::non_const_type;
449  using ATS = Kokkos::ArithTraits<local_ordinal_type>;
450 
451  local_matrix_type A;
457 
458  functor_type functor;
459 
462 
463  VectorCountingFunctor<local_matrix_type, remaining_functor_types...> remainingFunctors;
464 
465 #ifdef MUELU_COALESCE_DROP_DEBUG
466  std::string functorName;
467 #endif
468 
469  public:
470  VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_, remaining_functor_types&... remainingFunctors_)
471  : A(A_)
472  , blockSize(blockSize_)
473  , ghosted_point_to_block(ghosted_point_to_block_)
474  , results(results_)
475  , filtered_rowptr(filtered_rowptr_)
476  , graph_rowptr(graph_rowptr_)
477  , functor(functor_)
479  , remainingFunctors(A_, blockSize_, ghosted_point_to_block_, results_, filtered_rowptr_, graph_rowptr_, remainingFunctors_...) {
480  permutation = permutation_type("permutation", A.nnz());
481 #ifdef MUELU_COALESCE_DROP_DEBUG
482  std::string mangledFunctorName = typeid(decltype(functor)).name();
483  int status = 0;
484  char* demangledFunctorName = 0;
485  demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status);
486  functorName = demangledFunctorName;
487 #endif
488  }
489 
490  KOKKOS_INLINE_FUNCTION
491  void join(Kokkos::pair<local_ordinal_type, local_ordinal_type>& dest, const Kokkos::pair<local_ordinal_type, local_ordinal_type>& src) const {
492  dest.first += src.first;
493  dest.second += src.second;
494  }
495 
496  KOKKOS_INLINE_FUNCTION
497  void operatorRow(const local_ordinal_type rlid) const {
498  functor(rlid);
499  remainingFunctors.operatorRow(rlid);
500  }
501 
502  KOKKOS_INLINE_FUNCTION
503  void operator()(const local_ordinal_type brlid, Kokkos::pair<local_ordinal_type, local_ordinal_type>& nnz, const bool& final) const {
504  auto nnz_filtered = &nnz.first;
505  auto nnz_graph = &nnz.second;
506 
507 #ifdef MUELU_COALESCE_DROP_DEBUG
508  Kokkos::printf("\nStarting on block row %d\n", brlid);
509 #endif
510  for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) {
511 #ifdef MUELU_COALESCE_DROP_DEBUG
512  {
513  Kokkos::printf("\nStarting on row %d\n", rlid);
514 
515  auto row = A.rowConst(rlid);
516 
517  Kokkos::printf("indices: ");
518  for (local_ordinal_type k = 0; k < row.length; ++k) {
519  auto clid = row.colidx(k);
520  Kokkos::printf("%5d ", clid);
521  }
522  Kokkos::printf("\n");
523 
524  Kokkos::printf("values: ");
525  for (local_ordinal_type k = 0; k < row.length; ++k) {
526  auto val = row.value(k);
527  Kokkos::printf("%5f ", val);
528  }
529  Kokkos::printf("\n");
530  }
531 #endif
532 
533  functor(rlid);
534  remainingFunctors.operatorRow(rlid);
535 
536 #ifdef MUELU_COALESCE_DROP_DEBUG
537  {
538  Kokkos::printf("%s\n", functorName.c_str());
539 
540  auto row = A.rowConst(rlid);
541  const size_t offset = A.graph.row_map(rlid);
542 
543  Kokkos::printf("decisions: ");
544  for (local_ordinal_type k = 0; k < row.length; ++k) {
545  Kokkos::printf("%5d ", results(offset + k));
546  }
547  Kokkos::printf("\n");
548  }
549 #endif
550 
551 #ifdef MUELU_COALESCE_DROP_DEBUG
552  Kokkos::printf("Done with row %d\n", rlid);
553 #endif
554 
555  size_t start = A.graph.row_map(rlid);
556  size_t end = A.graph.row_map(rlid + 1);
557  for (size_t i = start; i < end; ++i) {
558  if (results(i) == KEEP) {
559  ++(*nnz_filtered);
560  }
561  }
562  if (final)
563  filtered_rowptr(rlid + 1) = *nnz_filtered;
564  }
565 
566 #ifdef MUELU_COALESCE_DROP_DEBUG
567  Kokkos::printf("Done with block row %d\nGraph indices ", brlid);
568 #endif
569 
570  // column lids for all rows in the block
571  auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
572  A.graph.row_map(blockSize * (brlid + 1))));
573  // set up a permutatation index
574  auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
575  A.graph.row_map(blockSize * (brlid + 1))));
576  for (size_t i = 0; i < block_permutation.extent(0); ++i)
577  block_permutation(i) = i;
578  // get permuatation for sorted column indices of the entire block
579  auto comparator = comparison.getComparator(brlid);
580  Misc::serialHeapSort(block_permutation, comparator);
581 
582  local_ordinal_type prev_bclid = -1;
583  bool alreadyAdded = false;
584 
585  // loop over all sorted entries in block
586  auto offset = A.graph.row_map(blockSize * brlid);
587  for (size_t i = 0; i < block_permutation.extent(0); ++i) {
588  auto idx = offset + block_permutation(i);
589  auto clid = A.graph.entries(idx);
590  auto bclid = ghosted_point_to_block(clid);
591 
592  // unseen block column index
593  if (bclid > prev_bclid)
594  alreadyAdded = false;
595 
596  // add entry to graph
597  if (!alreadyAdded && (results(idx) == KEEP)) {
598  ++(*nnz_graph);
599  alreadyAdded = true;
600 #ifdef MUELU_COALESCE_DROP_DEBUG
601  Kokkos::printf("%5d ", bclid);
602 #endif
603  }
604  prev_bclid = bclid;
605  }
606 #ifdef MUELU_COALESCE_DROP_DEBUG
607  Kokkos::printf("\n");
608 #endif
609  if (final)
610  graph_rowptr(brlid + 1) = *nnz_graph;
611  }
612 };
613 
614 template <class local_matrix_type,
615  class functor_type>
616 class VectorCountingFunctor<local_matrix_type, functor_type> {
617  private:
618  using scalar_type = typename local_matrix_type::value_type;
619  using local_ordinal_type = typename local_matrix_type::ordinal_type;
620  using memory_space = typename local_matrix_type::memory_space;
621  using results_view = Kokkos::View<DecisionType*, memory_space>;
622  using block_indices_view_type = Kokkos::View<local_ordinal_type*, memory_space>;
623  using permutation_type = Kokkos::View<local_ordinal_type*, memory_space>;
624 
625  using rowptr_type = typename local_matrix_type::row_map_type::non_const_type;
626  using ATS = Kokkos::ArithTraits<local_ordinal_type>;
627 
628  local_matrix_type A;
634 
636  functor_type functor;
637 
638 #ifdef MUELU_COALESCE_DROP_DEBUG
639  std::string functorName;
640 #endif
641 
644 
645  public:
646  VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_)
647  : A(A_)
648  , blockSize(blockSize_)
649  , ghosted_point_to_block(ghosted_point_to_block_)
650  , results(results_)
651  , filtered_rowptr(filtered_rowptr_)
652  , graph_rowptr(graph_rowptr_)
653  , functor(functor_)
655  permutation = permutation_type("permutation", A.nnz());
656 #ifdef MUELU_COALESCE_DROP_DEBUG
657  std::string mangledFunctorName = typeid(decltype(functor)).name();
658  int status = 0;
659  char* demangledFunctorName = 0;
660  demangledFunctorName = abi::__cxa_demangle(mangledFunctorName.c_str(), 0, 0, &status);
661  functorName = demangledFunctorName;
662 #endif
663  }
664 
665  KOKKOS_INLINE_FUNCTION
666  void join(Kokkos::pair<local_ordinal_type, local_ordinal_type>& dest, const Kokkos::pair<local_ordinal_type, local_ordinal_type>& src) const {
667  dest.first += src.first;
668  dest.second += src.second;
669  }
670 
671  KOKKOS_INLINE_FUNCTION
672  void operatorRow(const local_ordinal_type rlid) const {
673  functor(rlid);
674  }
675 
676  KOKKOS_INLINE_FUNCTION
677  void operator()(const local_ordinal_type brlid, Kokkos::pair<local_ordinal_type, local_ordinal_type>& nnz, const bool& final) const {
678  auto nnz_filtered = &nnz.first;
679  auto nnz_graph = &nnz.second;
680 
681 #ifdef MUELU_COALESCE_DROP_DEBUG
682  Kokkos::printf("\nStarting on block row %d\n", brlid);
683 #endif
684  for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) {
685 #ifdef MUELU_COALESCE_DROP_DEBUG
686  {
687  Kokkos::printf("\nStarting on row %d\n", rlid);
688 
689  auto row = A.rowConst(rlid);
690 
691  Kokkos::printf("indices: ");
692  for (local_ordinal_type k = 0; k < row.length; ++k) {
693  auto clid = row.colidx(k);
694  Kokkos::printf("%5d ", clid);
695  }
696  Kokkos::printf("\n");
697 
698  Kokkos::printf("values: ");
699  for (local_ordinal_type k = 0; k < row.length; ++k) {
700  auto val = row.value(k);
701  Kokkos::printf("%5f ", val);
702  }
703  Kokkos::printf("\n");
704  }
705 #endif
706 
707  functor(rlid);
708 
709 #ifdef MUELU_COALESCE_DROP_DEBUG
710  {
711  Kokkos::printf("%s\n", functorName.c_str());
712 
713  auto row = A.rowConst(rlid);
714  const size_t offset = A.graph.row_map(rlid);
715 
716  Kokkos::printf("decisions: ");
717  for (local_ordinal_type k = 0; k < row.length; ++k) {
718  Kokkos::printf("%5d ", results(offset + k));
719  }
720  Kokkos::printf("\n");
721  }
722 #endif
723 
724 #ifdef MUELU_COALESCE_DROP_DEBUG
725  Kokkos::printf("Done with row %d\n", rlid);
726 #endif
727 
728  size_t start = A.graph.row_map(rlid);
729  size_t end = A.graph.row_map(rlid + 1);
730  for (size_t i = start; i < end; ++i) {
731  if (results(i) == KEEP) {
732  ++(*nnz_filtered);
733  }
734  }
735  if (final)
736  filtered_rowptr(rlid + 1) = *nnz_filtered;
737  }
738 
739 #ifdef MUELU_COALESCE_DROP_DEBUG
740  Kokkos::printf("Done with block row %d\nGraph indices ", brlid);
741 #endif
742 
743  // column lids for all rows in the block
744  auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
745  A.graph.row_map(blockSize * (brlid + 1))));
746  // set up a permutation index
747  auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
748  A.graph.row_map(blockSize * (brlid + 1))));
749  for (size_t i = 0; i < block_permutation.extent(0); ++i)
750  block_permutation(i) = i;
751  // get permutation for sorted column indices of the entire block
752  auto comparator = comparison.getComparator(brlid);
753  Misc::serialHeapSort(block_permutation, comparator);
754 
755  local_ordinal_type prev_bclid = -1;
756  bool alreadyAdded = false;
757 
758  // loop over all sorted entries in block
759  auto offset = A.graph.row_map(blockSize * brlid);
760  for (size_t i = 0; i < block_permutation.extent(0); ++i) {
761  auto idx = offset + block_permutation(i);
762  auto clid = A.graph.entries(idx);
763  auto bclid = ghosted_point_to_block(clid);
764 
765  // unseen block column index
766  if (bclid > prev_bclid)
767  alreadyAdded = false;
768 
769  // add entry to graph
770  if (!alreadyAdded && (results(idx) == KEEP)) {
771  ++(*nnz_graph);
772  alreadyAdded = true;
773 #ifdef MUELU_COALESCE_DROP_DEBUG
774  Kokkos::printf("%5d ", bclid);
775 #endif
776  }
777  prev_bclid = bclid;
778  }
779 #ifdef MUELU_COALESCE_DROP_DEBUG
780  Kokkos::printf("\n");
781 #endif
782  if (final)
783  graph_rowptr(brlid + 1) = *nnz_graph;
784  }
785 };
786 
794 template <class local_matrix_type, bool lumping, bool reuse>
796  private:
797  using scalar_type = typename local_matrix_type::value_type;
798  using local_ordinal_type = typename local_matrix_type::ordinal_type;
799  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
800  using memory_space = typename local_matrix_type::memory_space;
801  using results_view = Kokkos::View<DecisionType*, memory_space>;
802  using ATS = Kokkos::ArithTraits<scalar_type>;
803  using OTS = Kokkos::ArithTraits<local_ordinal_type>;
804  using block_indices_view_type = Kokkos::View<local_ordinal_type*, memory_space>;
805  using permutation_type = Kokkos::View<local_ordinal_type*, memory_space>;
806  using magnitudeType = typename ATS::magnitudeType;
807 
808  local_matrix_type A;
812  local_matrix_type filteredA;
815  const scalar_type zero = ATS::zero();
816  const scalar_type one = ATS::one();
817 
820 
821  public:
822  VectorFillFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_, magnitudeType dirichletThreshold_)
823  : A(A_)
824  , blockSize(blockSize_)
825  , ghosted_point_to_block(ghosted_point_to_block_)
826  , results(results_)
827  , filteredA(filteredA_)
828  , graph(graph_)
829  , dirichletThreshold(dirichletThreshold_)
831  permutation = permutation_type("permutation", A.nnz());
832  }
833 
834  KOKKOS_INLINE_FUNCTION
835  void operator()(const local_ordinal_type brlid) const {
836  for (local_ordinal_type rlid = blockSize * brlid; rlid < blockSize * (brlid + 1); ++rlid) {
837  auto rowA = A.row(rlid);
838  size_t row_start = A.graph.row_map(rlid);
839  auto rowFilteredA = filteredA.row(rlid);
840  local_ordinal_type j = 0;
841  scalar_type diagCorrection = zero;
842  local_ordinal_type diagOffset = -1;
843  for (local_ordinal_type k = 0; k < rowA.length; ++k) {
844  if constexpr (lumping) {
845  local_ordinal_type clid = rowA.colidx(k);
846  if (rlid == clid) {
847  diagOffset = j;
848  }
849  }
850  if (results(row_start + k) == KEEP) {
851  rowFilteredA.colidx(j) = rowA.colidx(k);
852  rowFilteredA.value(j) = rowA.value(k);
853  ++j;
854  } else if constexpr (lumping) {
855  diagCorrection += rowA.value(k);
856  if constexpr (reuse) {
857  rowFilteredA.colidx(j) = rowA.colidx(k);
858  rowFilteredA.value(j) = zero;
859  ++j;
860  }
861  } else if constexpr (reuse) {
862  rowFilteredA.colidx(j) = rowA.colidx(k);
863  rowFilteredA.value(j) = zero;
864  ++j;
865  }
866  }
867  if constexpr (lumping) {
868  rowFilteredA.value(diagOffset) += diagCorrection;
869  if ((dirichletThreshold >= 0.0) && (ATS::real(rowFilteredA.value(diagOffset)) <= dirichletThreshold))
870  rowFilteredA.value(diagOffset) = one;
871  }
872  }
873 
874  // column lids for all rows in the block
875  auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
876  A.graph.row_map(blockSize * (brlid + 1))));
877  // set up a permuatation index
878  auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
879  A.graph.row_map(blockSize * (brlid + 1))));
880  for (size_t i = 0; i < block_permutation.extent(0); ++i)
881  block_permutation(i) = i;
882  // get permutation for sorted column indices of the entire block
883  auto comparator = comparison.getComparator(brlid);
884  Misc::serialHeapSort(block_permutation, comparator);
885 
886  local_ordinal_type prev_bclid = -1;
887  bool alreadyAdded = false;
888  local_ordinal_type j = graph.row_map(brlid);
889 
890  // loop over all sorted entries in block
891  auto offset = A.graph.row_map(blockSize * brlid);
892  for (size_t i = 0; i < block_permutation.extent(0); ++i) {
893  auto idx = offset + block_permutation(i);
894  auto clid = A.graph.entries(idx);
895  auto bclid = ghosted_point_to_block(clid);
896 
897  // unseen block column index
898  if (bclid > prev_bclid)
899  alreadyAdded = false;
900 
901  // add entry to graph
902  if (!alreadyAdded && (results(idx) == KEEP)) {
903  graph.entries(j) = bclid;
904  ++j;
905  alreadyAdded = true;
906  }
907  prev_bclid = bclid;
908  }
909  }
910 };
911 
912 template <class local_matrix_type>
914  private:
915  using scalar_type = typename local_matrix_type::value_type;
916  using local_ordinal_type = typename local_matrix_type::ordinal_type;
917  using memory_space = typename local_matrix_type::memory_space;
918  using results_view = Kokkos::View<DecisionType*, memory_space>;
919  using block_indices_view_type = Kokkos::View<local_ordinal_type*, memory_space>;
920  using permutation_type = Kokkos::View<local_ordinal_type*, memory_space>;
921 
922  using rowptr_type = typename local_matrix_type::row_map_type::non_const_type;
923  using ATS = Kokkos::ArithTraits<local_ordinal_type>;
924 
925  local_matrix_type A;
929 
932 
933  public:
934  MergeCountFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, rowptr_type& merged_rowptr_)
935  : A(A_)
936  , blockSize(blockSize_)
937  , ghosted_point_to_block(ghosted_point_to_block_)
938  , merged_rowptr(merged_rowptr_)
940  permutation = permutation_type("permutation", A.nnz());
941  }
942 
943  KOKKOS_INLINE_FUNCTION
944  void operator()(const local_ordinal_type brlid, local_ordinal_type& nnz_graph, const bool& final) const {
945  // column lids for all rows in the block
946  auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
947  A.graph.row_map(blockSize * (brlid + 1))));
948  // set up a permutation index
949  auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
950  A.graph.row_map(blockSize * (brlid + 1))));
951  for (size_t i = 0; i < block_permutation.extent(0); ++i)
952  block_permutation(i) = i;
953  // get permutation for sorted column indices of the entire block
954  auto comparator = comparison.getComparator(brlid);
955  Misc::serialHeapSort(block_permutation, comparator);
956 
957  local_ordinal_type prev_bclid = -1;
958  bool alreadyAdded = false;
959 
960  // loop over all sorted entries in block
961  auto offset = A.graph.row_map(blockSize * brlid);
962  for (size_t i = 0; i < block_permutation.extent(0); ++i) {
963  auto idx = offset + block_permutation(i);
964  auto clid = A.graph.entries(idx);
965  auto bclid = ghosted_point_to_block(clid);
966 
967  // unseen block column index
968  if (bclid > prev_bclid)
969  alreadyAdded = false;
970 
971  // add entry to graph
972  if (!alreadyAdded) {
973  ++nnz_graph;
974  alreadyAdded = true;
975 #ifdef MUELU_COALESCE_DROP_DEBUG
976  Kokkos::printf("%5d ", bclid);
977 #endif
978  }
979  prev_bclid = bclid;
980  }
981 #ifdef MUELU_COALESCE_DROP_DEBUG
982  Kokkos::printf("\n");
983 #endif
984  if (final)
985  merged_rowptr(brlid + 1) = nnz_graph;
986  }
987 };
988 
989 template <class local_matrix_type>
991  private:
992  using scalar_type = typename local_matrix_type::value_type;
993  using local_ordinal_type = typename local_matrix_type::ordinal_type;
994  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
995  using memory_space = typename local_matrix_type::memory_space;
996  using results_view = Kokkos::View<DecisionType*, memory_space>;
997  using ATS = Kokkos::ArithTraits<scalar_type>;
998  using OTS = Kokkos::ArithTraits<local_ordinal_type>;
999  using block_indices_view_type = Kokkos::View<local_ordinal_type*, memory_space>;
1000  using permutation_type = Kokkos::View<local_ordinal_type*, memory_space>;
1001  using magnitudeType = typename ATS::magnitudeType;
1002 
1003  local_matrix_type A;
1006  local_matrix_type mergedA;
1007  const scalar_type zero = ATS::zero();
1008  const scalar_type one = ATS::one();
1009 
1012 
1013  public:
1014  MergeFillFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, local_matrix_type& mergedA_)
1015  : A(A_)
1016  , blockSize(blockSize_)
1017  , ghosted_point_to_block(ghosted_point_to_block_)
1018  , mergedA(mergedA_)
1020  permutation = permutation_type("permutation", A.nnz());
1021  }
1022 
1023  KOKKOS_INLINE_FUNCTION
1024  void operator()(const local_ordinal_type brlid) const {
1025  // column lids for all rows in the block
1026  auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
1027  A.graph.row_map(blockSize * (brlid + 1))));
1028  // set up a permuatation index
1029  auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid),
1030  A.graph.row_map(blockSize * (brlid + 1))));
1031  for (size_t i = 0; i < block_permutation.extent(0); ++i)
1032  block_permutation(i) = i;
1033  // get permutation for sorted column indices of the entire block
1034  auto comparator = comparison.getComparator(brlid);
1035  Misc::serialHeapSort(block_permutation, comparator);
1036 
1037  local_ordinal_type prev_bclid = -1;
1038  bool alreadyAdded = false;
1039  local_ordinal_type j = mergedA.graph.row_map(brlid);
1040 
1041  // loop over all sorted entries in block
1042  auto offset = A.graph.row_map(blockSize * brlid);
1043  for (size_t i = 0; i < block_permutation.extent(0); ++i) {
1044  auto idx = offset + block_permutation(i);
1045  auto clid = A.graph.entries(idx);
1046  auto bclid = ghosted_point_to_block(clid);
1047 
1048  // unseen block column index
1049  if (bclid > prev_bclid)
1050  alreadyAdded = false;
1051 
1052  // add entry to graph
1053  if (!alreadyAdded) {
1054  mergedA.graph.entries(j) = bclid;
1055  mergedA.values(j) = one;
1056  ++j;
1057  alreadyAdded = true;
1058  }
1059  prev_bclid = bclid;
1060  }
1061  }
1062 };
1063 
1064 } // namespace MueLu::MatrixConstruction
1065 
1066 #endif
typename local_matrix_type::ordinal_type local_ordinal_type
KOKKOS_INLINE_FUNCTION bool operator()(size_t x, size_t y) const
Kokkos::ArithTraits< local_ordinal_type > OTS
typename local_matrix_type::memory_space memory_space
Kokkos::View< DecisionType *, memory_space > results_view
typename local_matrix_type::memory_space memory_space
Kokkos::ArithTraits< local_ordinal_type > OTS
Kokkos::View< local_ordinal_type *, memory_space > permutation_type
typename local_matrix_type::row_map_type::non_const_type rowptr_type
Kokkos::View< DecisionType *, memory_space > results_view
KOKKOS_INLINE_FUNCTION void operatorRow(const local_ordinal_type rlid) const
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type rlid, local_ordinal_type &nnz, const bool &final) const
KOKKOS_INLINE_FUNCTION void operatorRow(const local_ordinal_type rlid) const
KOKKOS_INLINE_FUNCTION comparator_type getComparator(local_ordinal_type brlid) const
PointwiseCountingFunctor(local_matrix_type &A_, results_view &results_, rowptr_type &rowptr_, functor_type &functor_)
PointwiseCountingFunctor< local_matrix_type, remaining_functor_types...> remainingFunctors
KOKKOS_INLINE_FUNCTION void join(Kokkos::pair< local_ordinal_type, local_ordinal_type > &dest, const Kokkos::pair< local_ordinal_type, local_ordinal_type > &src) const
Kokkos::View< DecisionType *, memory_space > results_view
typename local_matrix_type::ordinal_type local_ordinal_type
VectorCountingFunctor< local_matrix_type, remaining_functor_types...> remainingFunctors
typename local_matrix_type::memory_space memory_space
PointwiseCountingFunctor(local_matrix_type &A_, results_view &results_, rowptr_type &rowptr_, bool firstFunctor_, functor_type &functor_, remaining_functor_types &...remainingFunctors_)
Kokkos::View< local_ordinal_type *, memory_space > permutation_type
Kokkos::View< local_ordinal_type *, memory_space > block_indices_view_type
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid, Kokkos::pair< local_ordinal_type, local_ordinal_type > &nnz, const bool &final) const
typename local_matrix_type::ordinal_type local_ordinal_type
typename local_matrix_type::memory_space memory_space
BlockRowComparison< local_matrix_type > comparison
Kokkos::View< local_ordinal_type *, memory_space > block_indices_view_type
typename local_matrix_type::value_type scalar_type
Kokkos::ArithTraits< local_ordinal_type > ATS
BlockRowComparison< local_matrix_type > comparison
Kokkos::View< DecisionType *, memory_space > results_view
Kokkos::ArithTraits< local_ordinal_type > ATS
Kokkos::View< local_ordinal_type *, memory_space > block_indices_view_type
typename local_matrix_type::value_type scalar_type
typename local_matrix_type::memory_space memory_space
typename local_matrix_type::row_map_type::non_const_type rowptr_type
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type rlid) const
KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type &v, comparator_type comparator)
typename local_matrix_type::value_type scalar_type
VectorCountingFunctor(local_matrix_type &A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view &results_, rowptr_type &filtered_rowptr_, rowptr_type &graph_rowptr_, functor_type &functor_, remaining_functor_types &...remainingFunctors_)
Functor that executes a sequence of sub-functors on each block of rows.
typename local_matrix_type::ordinal_type local_ordinal_type
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type rlid) const
PointwiseCountingFunctor(local_matrix_type &A_, results_view &results_, rowptr_type &rowptr_, functor_type &functor_, remaining_functor_types &...remainingFunctors_)
VectorFillFunctor(local_matrix_type &A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view &results_, local_matrix_type &filteredA_, local_graph_type &graph_, magnitudeType dirichletThreshold_)
PointwiseCountingFunctor(local_matrix_type &A_, results_view &results_, rowptr_type &rowptr_, bool firstFunctor_, functor_type &functor_)
Kokkos::View< local_ordinal_type *, memory_space > permutation_type
Kokkos::View< DecisionType *, memory_space > results_view
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping...
typename local_matrix_type::ordinal_type local_ordinal_type
typename local_matrix_type::ordinal_type local_ordinal_type
PointwiseFillReuseFunctor(local_matrix_type &A_, results_view &results_, local_matrix_type &filteredA_, local_graph_type &graph_, magnitudeType dirichletThreshold_)
MergeCountFunctor(local_matrix_type &A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, rowptr_type &merged_rowptr_)
PointwiseFillNoReuseFunctor(local_matrix_type &A_, results_view &results_, local_matrix_type &filteredA_, magnitudeType dirichletThreshold_)
Functor that executes a sequence of sub-functors on each row for a problem with blockSize == 1...
Kokkos::View< DecisionType *, memory_space > results_view
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid, Kokkos::pair< local_ordinal_type, local_ordinal_type > &nnz, const bool &final) const
typename local_matrix_type::ordinal_type local_ordinal_type
KOKKOS_INLINE_FUNCTION Comparator(const local_matrix_type2 &A_, local_ordinal_type bsize_, local_ordinal_type brlid_, block_indices_view_type ghosted_point_to_block_)
void start()
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type rlid, local_ordinal_type &nnz, const bool &final) const
Kokkos::View< local_ordinal_type *, memory_space > permutation_type
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid, local_ordinal_type &nnz_graph, const bool &final) const
typename local_matrix_type::row_map_type::non_const_type rowptr_type
typename local_matrix_type::memory_space memory_space
typename local_matrix_type::value_type scalar_type
Kokkos::View< local_ordinal_type *, memory_space > block_indices_view_type
VectorCountingFunctor(local_matrix_type &A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view &results_, rowptr_type &filtered_rowptr_, rowptr_type &graph_rowptr_, functor_type &functor_)
typename local_matrix_type::staticcrsgraph_type local_graph_type
Functor does not reuse the graph of the matrix for a problem with blockSize == 1. ...
BlockRowComparison< local_matrix_type > comparison
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid) const
BlockRowComparison(local_matrix_type &A_, local_ordinal_type bsize_, block_indices_view_type ghosted_point_to_block_)
Kokkos::View< DecisionType *, memory_space > results_view
BlockRowComparison< local_matrix_type > comparison
TransListIter end
Kokkos::View< local_ordinal_type *, memory_space > block_indices_view_type
Kokkos::View< local_ordinal_type *, memory_space > block_indices_view_type
KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid) const
typename local_matrix_type::ordinal_type local_ordinal_type
MergeFillFunctor(local_matrix_type &A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, local_matrix_type &mergedA_)
KOKKOS_INLINE_FUNCTION void join(Kokkos::pair< local_ordinal_type, local_ordinal_type > &dest, const Kokkos::pair< local_ordinal_type, local_ordinal_type > &src) const
typename local_matrix_type::staticcrsgraph_type local_graph_type