Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_CrsArrays.hpp
Go to the documentation of this file.
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
47 
48 #ifndef __IFPACK2_CRSARRAYS_DECL_HPP__
49 #define __IFPACK2_CRSARRAYS_DECL_HPP__
50 
51 #include <Tpetra_RowMatrix.hpp>
52 #include <Tpetra_CrsMatrix.hpp>
53 #include <Kokkos_DefaultNode.hpp>
54 #include <KokkosSparse_CrsMatrix.hpp>
55 #include <Ifpack2_LocalFilter.hpp>
56 #include <Ifpack2_ReorderFilter.hpp>
57 
58 namespace Ifpack2
59 {
60 namespace Details
61 {
62 
63 //Utility for getting the local values, rowptrs and colinds (in Kokkos::Views) for any RowMatrix
64 //Used by Fic, Filu and Fildl but may also be useful in other classes
65 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
66 struct CrsArrayReader
67 {
68  typedef typename Node::device_type device_type;
69  typedef typename device_type::execution_space execution_space;
70  typedef Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> TRowMatrix;
71  typedef Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> TCrsMatrix;
72  typedef Ifpack2::LocalFilter<TRowMatrix> Filter;
73  typedef Ifpack2::ReorderFilter<TRowMatrix> ReordFilter;
74  typedef KokkosSparse::CrsMatrix<Scalar, LocalOrdinal, execution_space> KCrsMatrix;
75  typedef Kokkos::View<LocalOrdinal*, execution_space> OrdinalArray;
76  typedef Kokkos::View<Scalar*, execution_space> ScalarArray;
77  typedef Kokkos::View<LocalOrdinal*, Kokkos::HostSpace> OrdinalArrayHost;
78  typedef Kokkos::View<Scalar*, Kokkos::HostSpace> ScalarArrayHost;
80  typedef Kokkos::Serial functor_space;
81  typedef Kokkos::RangePolicy<functor_space, int> RangePol;
82 
85  {
86  CountEntriesFunctor(const TRowMatrix* A_, OrdinalArrayHost& rowptrs_) : A(A_), rowptrs(rowptrs_) {}
87  KOKKOS_INLINE_FUNCTION void operator()(const int row) const
88  {
89  rowptrs(row) = A->getNumEntriesInLocalRow(row);
90  }
91  const TRowMatrix* A;
92  OrdinalArrayHost& rowptrs;
93  };
94 
97  {
98  GetIndicesFunctor(const TRowMatrix* A_, const OrdinalArrayHost& rowptrs_, OrdinalArrayHost& colinds_) :
99  A(A_), rowptrs(rowptrs_), colinds(colinds_) {}
100  KOKKOS_INLINE_FUNCTION void operator()(const int row) const
101  {
102  LocalOrdinal offset = rowptrs(row);
103  size_t entries = rowptrs(row + 1) - offset;
104  Teuchos::Array<Scalar> valsArray(entries);
105  Teuchos::Array<LocalOrdinal> indicesArray(entries);
106  auto valsView = valsArray();
107  auto indicesView = indicesArray();
108  A->getLocalRowCopy(row, indicesView, valsView, entries);
109  std::sort(indicesView.getRawPtr(), indicesView().getRawPtr() + entries);
110  for(LocalOrdinal i = 0; i < entries; i++)
111  {
112  colinds(offset + i) = indicesView[i];
113  }
114  }
115  const TRowMatrix* A;
116  const OrdinalArrayHost& rowptrs;
117  OrdinalArrayHost& colinds;
118  };
119 
122  {
123  GetValuesFunctor(const TRowMatrix* A_, const OrdinalArrayHost& rowptrs_, ScalarArrayHost& vals_) :
124  A(A_), rowptrs(rowptrs_), vals(vals_) {}
125  KOKKOS_INLINE_FUNCTION void operator()(const int row) const
126  {
127  LocalOrdinal offset = rowptrs(row);
128  size_t entries = rowptrs(row + 1) - offset;
129  Teuchos::Array<Scalar> valsArray(entries);
130  Teuchos::Array<LocalOrdinal> indicesArray(entries);
131  auto valsView = valsArray();
132  auto indicesView = indicesArray();
133  A->getLocalRowCopy(row, indicesView, valsView, entries);
134  Tpetra::sort2(indicesView.getRawPtr(), indicesView.getRawPtr() + entries, valsView.getRawPtr());
135  for(LocalOrdinal i = 0; i < entries; i++)
136  {
137  vals(offset + i) = valsView[i];
138  }
139  }
140  const TRowMatrix* A;
141  const OrdinalArrayHost& rowptrs;
142  ScalarArrayHost& vals;
143  };
144 
146  // \param[in] A The matrix
147  // \param[out] vals The values array on device (allocated inside function)
148  // \param[in] rowptrs The rowptrs host view provided by getStructure()
149  static void getValues(const TRowMatrix* A, ScalarArray& vals, OrdinalArrayHost& rowptrs)
150  {
151  auto Acrs = dynamic_cast<const TCrsMatrix*>(A);
152  if(Acrs)
153  {
154  getValuesCrs(Acrs, vals);
155  return;
156  }
157  //Allocate host values and device values
158  LocalOrdinal nrows = A->getNodeNumRows();
159  ScalarArrayHost valsHost("Values (host)", rowptrs[nrows]);
160  vals = ScalarArray("Values", rowptrs[nrows]);
161  GetValuesFunctor funct(A, rowptrs, valsHost);
162  Kokkos::parallel_for(RangePol(0, nrows), funct);
163  Kokkos::deep_copy(vals, valsHost);
164  }
165 
167  // \param[in] A The matrix
168  // \param[out] rowptrsHost The rowptrs array, in host space (allocated inside function)
169  // \param[out] rowptrs The rowptrs host array, in device space (allocated inside function). Will have exactly the same values as rowptrsHost.
170  // \param[out] colinds The colinds array, in device space (allocated inside function)
171  static void getStructure(const TRowMatrix* A, OrdinalArrayHost& rowptrsHost, OrdinalArray& rowptrs, OrdinalArray& colinds)
172  {
173  auto Acrs = dynamic_cast<const TCrsMatrix*>(A);
174  if(Acrs)
175  {
176  getStructureCrs(Acrs, rowptrs, colinds);
177  return;
178  }
179  //Need to allocate new array, then copy in one row at a time
180  //note: actual rowptrs in the CrsMatrix implementation is size_t, but
181  //FastILU needs them as LocalOrdinal so this function provides an OrdinalArray
182  LocalOrdinal nrows = A->getNodeNumRows();
183  rowptrsHost = OrdinalArrayHost("RowPtrs (host)", nrows + 1);
184  rowptrs = OrdinalArray("RowPtrs", nrows + 1);
185  CountEntriesFunctor countFunct(A, rowptrsHost);
186  Kokkos::parallel_for(RangePol(0, nrows), countFunct);
187  //convert rowptrsHost to prefix sum
188  {
189  LocalOrdinal accum = 0;
190  for(LocalOrdinal i = 0; i < nrows; i++)
191  {
192  LocalOrdinal temp = rowptrsHost[i];
193  rowptrsHost[i] = accum;
194  accum += temp;
195  }
196  rowptrsHost[nrows] = accum;
197  }
198  //Get colinds (host)
199  OrdinalArrayHost colindsHost = OrdinalArrayHost("ColInds (host)", rowptrsHost[nrows]);
200  colinds = OrdinalArray("ColInds", rowptrsHost[nrows]);
201  GetIndicesFunctor indicesFunct(A, rowptrsHost, colindsHost);
202  Kokkos::parallel_for(RangePol(0, nrows), indicesFunct);
203  //copy rowptrs and colinds to device
204  Kokkos::deep_copy(rowptrs, rowptrsHost);
205  Kokkos::deep_copy(colinds, colindsHost);
206  }
207 
208  private:
209 
211  static void getValuesCrs(const TCrsMatrix* A, ScalarArray& vals)
212  {
213  vals = A->getLocalMatrix().values;
214  }
215 
217  static void getStructureCrs(const TCrsMatrix* A, OrdinalArray& rowptrs, OrdinalArray& colinds)
218  {
219  //rowptrs really have data type size_t, but need them as LocalOrdinal, so must convert manually
220  auto rowmap = A->getLocalMatrix().graph.row_map;
221  //allocate rowptrs, it's a deep copy (colinds is a shallow copy so not necessary for it)
222  rowptrs = OrdinalArray("RowPtrs", A->getNodeNumRows() + 1);
223  for(size_t i = 0; i < rowmap.extent(0); i++)
224  {
225  rowptrs[i] = rowmap[i];
226  }
227  colinds = A->getLocalMatrix().graph.entries;
228  }
229 };
230 
231 } //Details
232 } //Ifpack2
233 
234 #endif
235 
Wraps a Tpetra::RowMatrix in a filter that reorders local rows and columns.
Definition: Ifpack2_ReorderFilter_decl.hpp:69
Functor for counting matrix entries per row in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:84
Functor for getting matrix values in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:121
Functor for getting column indices in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:96
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160