Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_CrsArrays.hpp
Go to the documentation of this file.
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
47 
48 #ifndef __IFPACK2_CRSARRAYS_DECL_HPP__
49 #define __IFPACK2_CRSARRAYS_DECL_HPP__
50 
51 #include <Tpetra_RowMatrix.hpp>
52 #include <Tpetra_CrsMatrix.hpp>
53 #include <Kokkos_DefaultNode.hpp>
54 #include <KokkosSparse_CrsMatrix.hpp>
55 #include <Ifpack2_LocalFilter.hpp>
56 #include <Ifpack2_ReorderFilter.hpp>
57 
58 namespace Ifpack2
59 {
60 namespace Details
61 {
62 
63 //Utility for getting the local values, rowptrs and colinds (in Kokkos::Views) for any RowMatrix
64 //Used by Fic, Filu and Fildl but may also be useful in other classes
65 template<typename Scalar, typename LocalOrdinal, typename GlobalOrdinal, typename Node>
66 struct CrsArrayReader
67 {
68  typedef typename Node::device_type device_type;
69  typedef typename device_type::execution_space execution_space;
70  typedef Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> TRowMatrix;
71  typedef Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> TCrsMatrix;
72  typedef Ifpack2::LocalFilter<TRowMatrix> Filter;
73  typedef Ifpack2::ReorderFilter<TRowMatrix> ReordFilter;
74  typedef KokkosSparse::CrsMatrix<Scalar, LocalOrdinal, execution_space> KCrsMatrix;
75  typedef Kokkos::View<LocalOrdinal*, execution_space> OrdinalArray;
76  typedef Kokkos::View<Scalar*, execution_space> ScalarArray;
77  typedef Kokkos::View<LocalOrdinal*, Kokkos::HostSpace> OrdinalArrayHost;
78  typedef Kokkos::View<Scalar*, Kokkos::HostSpace> ScalarArrayHost;
80  typedef Kokkos::Serial functor_space;
81  typedef Kokkos::RangePolicy<functor_space, int> RangePol;
82 
85  {
86  CountEntriesFunctor(const TRowMatrix* A_, OrdinalArrayHost& rowptrs_) : A(A_), rowptrs(rowptrs_) {}
87  KOKKOS_INLINE_FUNCTION void operator()(const int row) const
88  {
89  rowptrs(row) = A->getNumEntriesInLocalRow(row);
90  }
91  const TRowMatrix* A;
92  OrdinalArrayHost& rowptrs;
93  };
94 
97  {
98  GetIndicesFunctor(const TRowMatrix* A_, const OrdinalArrayHost& rowptrs_, OrdinalArrayHost& colinds_) :
99  A(A_), rowptrs(rowptrs_), colinds(colinds_) {}
100  KOKKOS_INLINE_FUNCTION void operator()(const int row) const
101  {
102  LocalOrdinal offset = rowptrs(row);
103  size_t entries = rowptrs(row + 1) - offset;
104  Teuchos::Array<Scalar> valsArray(entries);
105  Teuchos::Array<LocalOrdinal> indicesArray(entries);
106  auto valsView = valsArray();
107  auto indicesView = indicesArray();
108  A->getLocalRowCopy(row, indicesView, valsView, entries);
109  std::sort(indicesView.getRawPtr(), indicesView().getRawPtr() + entries);
110  for(LocalOrdinal i = 0; i < entries; i++)
111  {
112  colinds(offset + i) = indicesView[i];
113  }
114  }
115  const TRowMatrix* A;
116  const OrdinalArrayHost& rowptrs;
117  OrdinalArrayHost& colinds;
118  };
119 
122  {
123  GetValuesFunctor(const TRowMatrix* A_, const OrdinalArrayHost& rowptrs_, ScalarArrayHost& vals_) :
124  A(A_), rowptrs(rowptrs_), vals(vals_) {}
125  KOKKOS_INLINE_FUNCTION void operator()(const int row) const
126  {
127  LocalOrdinal offset = rowptrs(row);
128  std::cout << "Rowptr[" << row << "] = " << offset << '\n';
129  size_t entries = rowptrs(row + 1) - offset;
130  Teuchos::Array<Scalar> valsArray(entries);
131  Teuchos::Array<LocalOrdinal> indicesArray(entries);
132  auto valsView = valsArray();
133  auto indicesView = indicesArray();
134  A->getLocalRowCopy(row, indicesView, valsView, entries);
135  Tpetra::sort2(indicesView.getRawPtr(), indicesView.getRawPtr() + entries, valsView.getRawPtr());
136  for(LocalOrdinal i = 0; i < entries; i++)
137  {
138  vals(offset + i) = valsView[i];
139  }
140  }
141  const TRowMatrix* A;
142  const OrdinalArrayHost& rowptrs;
143  ScalarArrayHost& vals;
144  };
145 
147  // \param[in] A The matrix
148  // \param[out] vals The values array on device (allocated inside function)
149  // \param[in] rowptrs The rowptrs host view provided by getStructure()
150  static void getValues(const TRowMatrix* A, ScalarArray& vals, OrdinalArrayHost& rowptrs)
151  {
152  auto Acrs = dynamic_cast<const TCrsMatrix*>(A);
153  if(Acrs)
154  {
155  getValuesCrs(Acrs, vals);
156  return;
157  }
158  //Allocate host values and device values
159  LocalOrdinal nrows = A->getNodeNumRows();
160  ScalarArrayHost valsHost("Values (host)", rowptrs[nrows]);
161  vals = ScalarArray("Values", rowptrs[nrows]);
162  GetValuesFunctor funct(A, rowptrs, valsHost);
163  Kokkos::parallel_for(RangePol(0, nrows), funct);
164  Kokkos::deep_copy(vals, valsHost);
165  }
166 
168  // \param[in] A The matrix
169  // \param[out] rowptrsHost The rowptrs array, in host space (allocated inside function)
170  // \param[out] rowptrs The rowptrs host array, in device space (allocated inside function). Will have exactly the same values as rowptrsHost.
171  // \param[out] colinds The colinds array, in device space (allocated inside function)
172  static void getStructure(const TRowMatrix* A, OrdinalArrayHost& rowptrsHost, OrdinalArray& rowptrs, OrdinalArray& colinds)
173  {
174  auto Acrs = dynamic_cast<const TCrsMatrix*>(A);
175  if(Acrs)
176  {
177  getStructureCrs(Acrs, rowptrs, colinds);
178  return;
179  }
180  //Need to allocate new array, then copy in one row at a time
181  //note: actual rowptrs in the CrsMatrix implementation is size_t, but
182  //FastILU needs them as LocalOrdinal so this function provides an OrdinalArray
183  LocalOrdinal nrows = A->getNodeNumRows();
184  rowptrsHost = OrdinalArrayHost("RowPtrs (host)", nrows + 1);
185  rowptrs = OrdinalArray("RowPtrs", nrows + 1);
186  CountEntriesFunctor countFunct(A, rowptrsHost);
187  Kokkos::parallel_for(RangePol(0, nrows), countFunct);
188  //convert rowptrsHost to prefix sum
189  {
190  LocalOrdinal accum = 0;
191  for(LocalOrdinal i = 0; i < nrows; i++)
192  {
193  LocalOrdinal temp = rowptrsHost[i];
194  rowptrsHost[i] = accum;
195  accum += temp;
196  }
197  rowptrsHost[nrows] = accum;
198  }
199  //Get colinds (host)
200  OrdinalArrayHost colindsHost = OrdinalArrayHost("ColInds (host)", rowptrsHost[nrows]);
201  colinds = OrdinalArray("ColInds", rowptrsHost[nrows]);
202  GetIndicesFunctor indicesFunct(A, rowptrsHost, colindsHost);
203  Kokkos::parallel_for(RangePol(0, nrows), indicesFunct);
204  //copy rowptrs and colinds to device
205  Kokkos::deep_copy(rowptrs, rowptrsHost);
206  Kokkos::deep_copy(colinds, colindsHost);
207  }
208 
209  private:
210 
212  static void getValuesCrs(const TCrsMatrix* A, ScalarArray& vals)
213  {
214  vals = A->getLocalMatrix().values;
215  }
216 
218  static void getStructureCrs(const TCrsMatrix* A, OrdinalArray& rowptrs, OrdinalArray& colinds)
219  {
220  //rowptrs really have data type size_t, but need them as LocalOrdinal, so must convert manually
221  auto rowmap = A->getLocalMatrix().graph.row_map;
222  //allocate rowptrs, it's a deep copy (colinds is a shallow copy so not necessary for it)
223  rowptrs = OrdinalArray("RowPtrs", A->getNodeNumRows() + 1);
224  for(size_t i = 0; i < rowmap.extent(0); i++)
225  {
226  rowptrs[i] = rowmap[i];
227  }
228  colinds = A->getLocalMatrix().graph.entries;
229  }
230 };
231 
232 } //Details
233 } //Ifpack2
234 
235 #endif
236 
Wraps a Tpetra::RowMatrix in a filter that reorders local rows and columns.
Definition: Ifpack2_ReorderFilter_decl.hpp:69
Functor for counting matrix entries per row in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:84
Functor for getting matrix values in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:121
Functor for getting column indices in parallel.
Definition: Ifpack2_Details_CrsArrays.hpp:96
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160