Teko  Version of the Day
 All Classes Files Functions Variables Pages
Teko_BlockingTpetra.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Teko: A package for block and physics based preconditioning
4 //
5 // Copyright 2010 NTESS and the Teko contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teko_BlockingTpetra.hpp"
11 #include "Teko_Utilities.hpp"
12 #include "Tpetra_Details_makeColMap_decl.hpp"
13 
14 #include "Tpetra_Vector.hpp"
15 #include "Tpetra_Map.hpp"
16 #include "Kokkos_StdAlgorithms.hpp"
17 #include "Kokkos_Sort.hpp"
18 #include "Kokkos_NestedSort.hpp"
19 #include "KokkosSparse_SortCrs.hpp"
20 
21 using Teuchos::RCP;
22 using Teuchos::rcp;
23 
24 namespace Teko {
25 namespace TpetraHelpers {
26 namespace Blocking {
27 
41 const MapPair buildSubMap(const std::vector<GO>& gid, const Teuchos::Comm<int>& comm) {
42  using GST = Tpetra::global_size_t;
43  const GST invalid = Teuchos::OrdinalTraits<GST>::invalid();
44  Teuchos::RCP<Tpetra::Map<LO, GO, NT>> gidMap = rcp(
45  new Tpetra::Map<LO, GO, NT>(invalid, Teuchos::ArrayView<const GO>(gid), 0, rcpFromRef(comm)));
46  Teuchos::RCP<Tpetra::Map<LO, GO, NT>> contigMap =
47  rcp(new Tpetra::Map<LO, GO, NT>(invalid, gid.size(), 0, rcpFromRef(comm)));
48 
49  return std::make_pair(gidMap, contigMap);
50 }
51 
61 const ImExPair buildExportImport(const Tpetra::Map<LO, GO, NT>& baseMap, const MapPair& maps) {
62  return std::make_pair(rcp(new Tpetra::Import<LO, GO, NT>(rcpFromRef(baseMap), maps.first)),
63  rcp(new Tpetra::Export<LO, GO, NT>(maps.first, rcpFromRef(baseMap))));
64 }
65 
73 void buildSubVectors(const std::vector<MapPair>& maps,
74  std::vector<RCP<Tpetra::MultiVector<ST, LO, GO, NT>>>& vectors, int count) {
75  std::vector<MapPair>::const_iterator mapItr;
76 
77  // loop over all maps
78  for (mapItr = maps.begin(); mapItr != maps.end(); mapItr++) {
79  // add new elements to vectors
80  RCP<Tpetra::MultiVector<ST, LO, GO, NT>> mv =
81  rcp(new Tpetra::MultiVector<ST, LO, GO, NT>((*mapItr).second, count));
82  vectors.push_back(mv);
83  }
84 }
85 
93 void one2many(std::vector<RCP<Tpetra::MultiVector<ST, LO, GO, NT>>>& many,
94  const Tpetra::MultiVector<ST, LO, GO, NT>& single,
95  const std::vector<RCP<Tpetra::Import<LO, GO, NT>>>& subImport) {
96  std::vector<RCP<Tpetra::MultiVector<ST, LO, GO, NT>>>::const_iterator vecItr;
97  std::vector<RCP<Tpetra::Import<LO, GO, NT>>>::const_iterator impItr;
98 
99  // using Importers fill the sub vectors from the mama vector
100  for (vecItr = many.begin(), impItr = subImport.begin(); vecItr != many.end();
101  ++vecItr, ++impItr) {
102  // for ease of access to the destination
103  RCP<Tpetra::MultiVector<ST, LO, GO, NT>> destVec = *vecItr;
104 
105  // extract the map with global indicies from the current vector
106  const Tpetra::Map<LO, GO, NT>& globalMap = *(*impItr)->getTargetMap();
107 
108  // build the import vector as a view on the destination
109  GO lda = destVec->getStride();
110  GO destSize = destVec->getGlobalLength() * destVec->getNumVectors();
111  std::vector<ST> destArray(destSize);
112  Teuchos::ArrayView<ST> destVals(destArray);
113  destVec->get1dCopy(destVals, lda);
114  Tpetra::MultiVector<ST, LO, GO, NT> importVector(rcpFromRef(globalMap), destVals, lda,
115  destVec->getNumVectors());
116 
117  // perform the import
118  importVector.doImport(single, **impItr, Tpetra::INSERT);
119 
120  Tpetra::Import<LO, GO, NT> importer(destVec->getMap(), destVec->getMap());
121  importVector.replaceMap(destVec->getMap());
122  destVec->doExport(importVector, importer, Tpetra::INSERT);
123  }
124 }
125 
136 void many2one(Tpetra::MultiVector<ST, LO, GO, NT>& one,
137  const std::vector<RCP<const Tpetra::MultiVector<ST, LO, GO, NT>>>& many,
138  const std::vector<RCP<Tpetra::Export<LO, GO, NT>>>& subExport) {
139  std::vector<RCP<const Tpetra::MultiVector<ST, LO, GO, NT>>>::const_iterator vecItr;
140  std::vector<RCP<Tpetra::Export<LO, GO, NT>>>::const_iterator expItr;
141 
142  // using Exporters fill the empty vector from the sub-vectors
143  for (vecItr = many.begin(), expItr = subExport.begin(); vecItr != many.end();
144  ++vecItr, ++expItr) {
145  // for ease of access to the source
146  RCP<const Tpetra::MultiVector<ST, LO, GO, NT>> srcVec = *vecItr;
147 
148  // extract the map with global indicies from the current vector
149  const Tpetra::Map<LO, GO, NT>& globalMap = *(*expItr)->getSourceMap();
150 
151  // build the export vector as a view of the destination
152  GO lda = srcVec->getStride();
153  GO srcSize = srcVec->getGlobalLength() * srcVec->getNumVectors();
154  std::vector<ST> srcArray(srcSize);
155  Teuchos::ArrayView<ST> srcVals(srcArray);
156  srcVec->get1dCopy(srcVals, lda);
157  Tpetra::MultiVector<ST, LO, GO, NT> exportVector(rcpFromRef(globalMap), srcVals, lda,
158  srcVec->getNumVectors());
159 
160  // perform the export
161  one.doExport(exportVector, **expItr, Tpetra::INSERT);
162  }
163 }
164 
170 RCP<Tpetra::Vector<GO, LO, GO, NT>> getSubBlockColumnGIDs(
171  const Tpetra::CrsMatrix<ST, LO, GO, NT>& A, const MapPair& mapPair) {
172  RCP<const Tpetra::Map<LO, GO, NT>> blkGIDMap = mapPair.first;
173  RCP<const Tpetra::Map<LO, GO, NT>> blkContigMap = mapPair.second;
174 
175  // fill index vector for rows
176  Tpetra::Vector<GO, LO, GO, NT> rIndex(A.getRowMap(), true);
177  const auto invalidLO = Teuchos::OrdinalTraits<LO>::invalid();
178  for (size_t i = 0; i < A.getLocalNumRows(); i++) {
179  // LID is need to get to contiguous map
180  LO lid = blkGIDMap->getLocalElement(
181  A.getRowMap()->getGlobalElement(i)); // this LID makes me nervous
182  if (lid != invalidLO)
183  rIndex.replaceLocalValue(i, blkContigMap->getGlobalElement(lid));
184  else
185  rIndex.replaceLocalValue(i, invalidLO);
186  }
187 
188  // get relavant column indices
189 
190  Tpetra::Import<LO, GO, NT> import(A.getRowMap(), A.getColMap());
191  RCP<Tpetra::Vector<GO, LO, GO, NT>> cIndex =
192  rcp(new Tpetra::Vector<GO, LO, GO, NT>(A.getColMap(), true));
193  cIndex->doImport(rIndex, import, Tpetra::INSERT);
194 
195  return cIndex;
196 }
197 
198 RCP<Tpetra::CrsMatrix<ST, LO, GO, NT>> buildSubBlock(
199  int i, int j, const RCP<const Tpetra::CrsMatrix<ST, LO, GO, NT>>& A,
200  const std::vector<MapPair>& subMaps,
201  Teuchos::RCP<Tpetra::Vector<GO, LO, GO, NT>> plocal2ContigGIDs) {
202  // get the number of variables families
203  int numVarFamily = subMaps.size();
204 
205  TEUCHOS_ASSERT(i >= 0 && i < numVarFamily);
206  TEUCHOS_ASSERT(j >= 0 && j < numVarFamily);
207  const RCP<const Tpetra::Map<LO, GO, NT>> gRowMap = subMaps[i].first; // new GIDs
208  const RCP<const Tpetra::Map<LO, GO, NT>> rangeMap = subMaps[i].second; // contiguous GIDs
209  const RCP<const Tpetra::Map<LO, GO, NT>> domainMap = subMaps[j].second;
210  const RCP<const Tpetra::Map<LO, GO, NT>> rowMap = rangeMap;
211 
212  if (!plocal2ContigGIDs) {
213  plocal2ContigGIDs = Blocking::getSubBlockColumnGIDs(*A, subMaps[j]);
214  }
215  Tpetra::Vector<GO, LO, GO, NT>& local2ContigGIDs = *plocal2ContigGIDs;
216 
217  // get entry information
218  LO numMyRows = rangeMap->getLocalNumElements();
219  const auto invalidLO = Teuchos::OrdinalTraits<LO>::invalid();
220 
221  auto nEntriesPerRow =
222  Kokkos::View<LO*>(Kokkos::ViewAllocateWithoutInitializing("nEntriesPerRow"), numMyRows);
223  Kokkos::deep_copy(nEntriesPerRow, invalidLO);
224 
225  using local_matrix_type = Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type;
226  using row_map_type = local_matrix_type::row_map_type::non_const_type;
227  using values_type = local_matrix_type::values_type::non_const_type;
228  using index_type = local_matrix_type::index_type::non_const_type;
229  auto prefixSumEntriesPerRow = row_map_type(
230  Kokkos::ViewAllocateWithoutInitializing("prefixSumEntriesPerRow"), numMyRows + 1);
231 
232  const auto invalid = Teuchos::OrdinalTraits<GO>::invalid();
233 
234  auto A_dev = A->getLocalMatrixDevice();
235  auto gRowMap_dev = gRowMap->getLocalMap();
236  auto A_rowmap_dev = A->getRowMap()->getLocalMap();
237  auto data = local2ContigGIDs.getLocalViewDevice(Tpetra::Access::ReadOnly);
238 
239  using matrix_execution_space =
240  typename Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type::execution_space;
241 
242  LO totalNumOwnedCols = 0;
243  Kokkos::parallel_scan(
244  Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(0, numMyRows),
245  KOKKOS_LAMBDA(const LO localRow, LO& sumNumEntries, bool finalPass) {
246  auto numOwnedCols = nEntriesPerRow(localRow);
247 
248  if (numOwnedCols == invalidLO) {
249  GO globalRow = gRowMap_dev.getGlobalElement(localRow);
250  LO lid = A_rowmap_dev.getLocalElement(globalRow);
251  const auto sparseRowView = A_dev.row(lid);
252 
253  numOwnedCols = 0;
254  for (auto localCol = 0; localCol < sparseRowView.length; localCol++) {
255  GO gid = data(sparseRowView.colidx(localCol), 0);
256  if (gid == invalid) continue;
257  numOwnedCols++;
258  }
259  nEntriesPerRow(localRow) = numOwnedCols;
260  }
261 
262  if (finalPass) {
263  prefixSumEntriesPerRow(localRow) = sumNumEntries;
264  if (localRow == (numMyRows - 1)) {
265  prefixSumEntriesPerRow(numMyRows) = prefixSumEntriesPerRow(localRow) + numOwnedCols;
266  }
267  }
268  sumNumEntries += numOwnedCols;
269  },
270  totalNumOwnedCols);
271 
272  using device_type = typename NT::device_type;
273  auto columnIndices = Kokkos::View<GO*, device_type>(
274  Kokkos::ViewAllocateWithoutInitializing("columnIndices"), totalNumOwnedCols);
275  auto values = values_type(Kokkos::ViewAllocateWithoutInitializing("values"), totalNumOwnedCols);
276 
277  LO maxNumEntriesSubblock = 0;
278  Kokkos::parallel_reduce(
279  Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(0, numMyRows),
280  KOKKOS_LAMBDA(const LO localRow, LO& maxNumEntries) {
281  GO globalRow = gRowMap_dev.getGlobalElement(localRow);
282  LO lid = A_rowmap_dev.getLocalElement(globalRow);
283  const auto sparseRowView = A_dev.row(lid);
284 
285  LO colId = 0;
286  LO colIdStart = prefixSumEntriesPerRow[localRow];
287  for (auto localCol = 0; localCol < sparseRowView.length; localCol++) {
288  GO gid = data(sparseRowView.colidx(localCol), 0);
289  if (gid == invalid) continue;
290  auto value = sparseRowView.value(localCol);
291  columnIndices(colId + colIdStart) = gid;
292  values(colId + colIdStart) = value;
293  colId++;
294  }
295  const auto numOwnedCols = nEntriesPerRow(localRow);
296  maxNumEntries = Kokkos::max(maxNumEntries, numOwnedCols);
297  },
298  Kokkos::Max<LO>(maxNumEntriesSubblock));
299 
300  Teuchos::RCP<const Tpetra::Map<LO, GO, NT>> colMap;
301  Tpetra::Details::makeColMap<LO, GO, NT>(colMap, domainMap, columnIndices);
302  TEUCHOS_ASSERT(colMap);
303 
304  auto colMap_dev = colMap->getLocalMap();
305  auto localColumnIndices =
306  index_type(Kokkos::ViewAllocateWithoutInitializing("localColumnIndices"), totalNumOwnedCols);
307  Kokkos::parallel_for(
308  Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(
309  0, totalNumOwnedCols),
310  KOKKOS_LAMBDA(const LO index) {
311  localColumnIndices(index) = colMap_dev.getLocalElement(columnIndices(index));
312  });
313 
314  KokkosSparse::sort_crs_matrix<matrix_execution_space, row_map_type, index_type, values_type>(
315  prefixSumEntriesPerRow, localColumnIndices, values);
316 
317  auto lcl_mat = Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type(
318  "localMat", numMyRows, maxNumEntriesSubblock, totalNumOwnedCols, values,
319  prefixSumEntriesPerRow, localColumnIndices);
320 
321  RCP<Tpetra::CrsMatrix<ST, LO, GO, NT>> mat =
322  rcp(new Tpetra::CrsMatrix<ST, LO, GO, NT>(lcl_mat, rowMap, colMap, domainMap, rangeMap));
323 
324  return mat;
325 }
326 
327 void rebuildSubBlock(int i, int j, const RCP<const Tpetra::CrsMatrix<ST, LO, GO, NT>>& A,
328  const std::vector<MapPair>& subMaps, Tpetra::CrsMatrix<ST, LO, GO, NT>& mat,
329  Teuchos::RCP<Tpetra::Vector<GO, LO, GO, NT>> plocal2ContigGIDs) {
330  // get the number of variables families
331  int numVarFamily = subMaps.size();
332 
333  TEUCHOS_ASSERT(i >= 0 && i < numVarFamily);
334  TEUCHOS_ASSERT(j >= 0 && j < numVarFamily);
335 
336  const Tpetra::Map<LO, GO, NT>& gRowMap = *subMaps[i].first; // new GIDs
337  const Tpetra::Map<LO, GO, NT>& rangeMap = *subMaps[i].second; // contiguous GIDs
338  const Tpetra::Map<LO, GO, NT>& domainMap = *subMaps[j].second;
339  const Tpetra::Map<LO, GO, NT>& rowMap = *subMaps[i].second;
340  const auto& colMap = mat.getColMap();
341 
342  if (!plocal2ContigGIDs) {
343  plocal2ContigGIDs = Blocking::getSubBlockColumnGIDs(*A, subMaps[j]);
344  }
345  Tpetra::Vector<GO, LO, GO, NT>& local2ContigGIDs = *plocal2ContigGIDs;
346 
347  mat.resumeFill();
348  mat.setAllToScalar(0.0);
349 
350  LO numMyRows = rowMap.getLocalNumElements();
351  using matrix_execution_space =
352  typename Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type::execution_space;
353  auto A_dev = A->getLocalMatrixDevice();
354  auto mat_dev = mat.getLocalMatrixDevice();
355  auto gRowMap_dev = gRowMap.getLocalMap();
356  auto A_rowmap_dev = A->getRowMap()->getLocalMap();
357  auto colMap_dev = colMap->getLocalMap();
358  auto data = local2ContigGIDs.getLocalViewDevice(Tpetra::Access::ReadOnly);
359  const auto invalid = Teuchos::OrdinalTraits<GO>::invalid();
360 
361  Kokkos::parallel_for(
362  Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(0, numMyRows),
363  KOKKOS_LAMBDA(const LO localRow) {
364  GO globalRow = gRowMap_dev.getGlobalElement(localRow);
365  LO lid = A_rowmap_dev.getLocalElement(globalRow);
366  const auto sparseRowView = A_dev.row(lid);
367 
368  for (auto localCol = 0; localCol < sparseRowView.length; localCol++) {
369  GO gid = data(sparseRowView.colidx(localCol), 0);
370  if (gid == invalid) continue;
371 
372  auto lidCol = colMap_dev.getLocalElement(gid);
373  auto value = sparseRowView.value(localCol);
374  mat_dev.sumIntoValues(localRow, &lidCol, 1, &value, true, false);
375  }
376  });
377 
378  mat.fillComplete(rcpFromRef(domainMap), rcpFromRef(rangeMap));
379 }
380 
381 } // namespace Blocking
382 } // namespace TpetraHelpers
383 } // namespace Teko