Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 
11 #ifndef AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP
12 #define AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP
13 
15 #include "Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp"
16 #include "Amesos2_MatrixAdapter_def.hpp"
17 
18 namespace Amesos2 {
19 
20  template <typename Scalar,
21  typename LocalOrdinal,
22  typename GlobalOrdinal,
23  typename Node>
24  ConcreteMatrixAdapter<
25  Tpetra::CrsMatrix<Scalar,
26  LocalOrdinal,
27  GlobalOrdinal,
28  Node>
29  >::ConcreteMatrixAdapter(Teuchos::RCP<matrix_t> m)
30  : AbstractConcreteMatrixAdapter<Tpetra::RowMatrix<Scalar,
31  LocalOrdinal,
32  GlobalOrdinal,
33  Node>,
34  Tpetra::CrsMatrix<Scalar,
35  LocalOrdinal,
36  GlobalOrdinal,
37  Node> >(m) // with implicit cast
38  {}
39 
40  template <typename Scalar,
41  typename LocalOrdinal,
42  typename GlobalOrdinal,
43  typename Node>
44  Teuchos::RCP<const MatrixAdapter<Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > >
45  ConcreteMatrixAdapter<
46  Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
47  >::get_impl(const Teuchos::Ptr<const map_t> map, EDistribution distribution) const
48  {
49  using Teuchos::RCP;
50  using Teuchos::rcp;
51  using Teuchos::rcpFromPtr;
52  typedef Tpetra::Import<local_ordinal_t, global_ordinal_t, node_t> import_t;
53 
54  RCP<import_t> importer =
55  rcp (new import_t (this->getRowMap(), rcpFromPtr (map)));
56 
57  RCP<matrix_t> t_mat;
58 
59  t_mat = Tpetra::importAndFillCompleteCrsMatrix<matrix_t>( (this->mat_), *importer ); // DomainMap, RangeMap, params inputs default to Teuchos::null
60 
61  // Case for non-contiguous GIDs
62  if ( distribution == CONTIGUOUS_AND_ROOTED ) {
63 
64  auto myRank = map->getComm()->getRank();
65 
66  auto local_matrix = t_mat->getLocalMatrixDevice();
67  const size_t global_num_contiguous_entries = t_mat->getGlobalNumRows();
68  const size_t local_num_contiguous_entries = (myRank == 0) ? t_mat->getGlobalNumRows() : 0;
69 
70  //create maps
71  //typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type;
72  RCP<const map_t> contiguousRowMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
73  RCP<const map_t> contiguousColMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
74  RCP<const map_t> contiguousDomainMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
75  RCP<const map_t> contiguousRangeMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
76 
77  RCP<matrix_t> contiguous_t_mat = rcp( new matrix_t(contiguousRowMap, contiguousColMap, local_matrix) );
78  contiguous_t_mat->resumeFill();
79  contiguous_t_mat->expertStaticFillComplete(contiguousDomainMap, contiguousRangeMap);
80 
81  return rcp (new ConcreteMatrixAdapter<matrix_t> (contiguous_t_mat));
82  } //end if not contiguous
83 
84  return rcp (new ConcreteMatrixAdapter<matrix_t> (t_mat));
85  }
86 
87 
88 
89  template <typename Scalar,
90  typename LocalOrdinal,
91  typename GlobalOrdinal,
92  typename Node>
93  Teuchos::RCP<const MatrixAdapter<Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > >
94  ConcreteMatrixAdapter<
95  Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
96  >::reindex_impl(Teuchos::RCP<const map_t> &contigRowMap,
97  Teuchos::RCP<const map_t> &contigColMap,
98  const EPhase current_phase) const
99  {
100  typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type;
101  using Teuchos::RCP;
102  using Teuchos::rcp;
103 #ifdef HAVE_AMESOS2_TIMERS
104  auto reindexTimer = Teuchos::TimeMonitor::getNewTimer("Time to re-index matrix gids");
105  Teuchos::TimeMonitor ReindexTimer(*reindexTimer);
106 #endif
107 
108  auto rowMap = this->mat_->getRowMap();
109  auto colMap = this->mat_->getColMap();
110  auto rowComm = rowMap->getComm();
111  auto colComm = colMap->getComm();
112 
113  GlobalOrdinal indexBase = rowMap->getIndexBase();
114  GlobalOrdinal numDoFs = this->mat_->getGlobalNumRows();
115  LocalOrdinal nRows = this->mat_->getLocalNumRows();
116  LocalOrdinal nCols = colMap->getLocalNumElements();
117 
118  RCP<matrix_t> contiguous_t_mat;
119  // check when to recompute contigRowMap & contigColMap
120  if(current_phase == PREORDERING || current_phase == SYMBFACT) {
121  auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm));
122  global_ordinal_t frow = tmpMap->getMinGlobalIndex();
123 
124  // Create new GID list for RowMap
125  Kokkos::View<global_ordinal_t*, HostExecSpaceType> rowIndexList ("rowIndexList", nRows);
126  for (local_ordinal_t k = 0; k < nRows; k++) {
127  rowIndexList(k) = frow+k; // based on index-base of rowMap
128  }
129  // Create new GID list for ColMap
130  Kokkos::View<global_ordinal_t*, HostExecSpaceType> colIndexList ("colIndexList", nCols);
131  // initialize to catch col GIDs that are not in row GIDs
132  // they will be all assigned to (n+1)th columns
133  for (local_ordinal_t k = 0; k < nCols; k++) {
134  colIndexList(k) = numDoFs+indexBase;
135  }
136  typedef Tpetra::MultiVector<global_ordinal_t,
137  local_ordinal_t,
138  global_ordinal_t,
139  node_t> gid_mv_t;
140  Teuchos::ArrayView<const global_ordinal_t> rowIndexArray(rowIndexList.data(), nRows);
141  Teuchos::ArrayView<const global_ordinal_t> colIndexArray(colIndexList.data(), nCols);
142  gid_mv_t row_mv (rowMap, rowIndexArray, nRows, 1);
143  gid_mv_t col_mv (colMap, colIndexArray, nCols, 1);
144  typedef Tpetra::Import<local_ordinal_t, global_ordinal_t, node_t> import_t;
145  RCP<import_t> importer = rcp (new import_t (rowMap, colMap));
146  col_mv.doImport (row_mv, *importer, Tpetra::INSERT);
147  {
148  // col_mv is imported from rowIndexList, which is based on index-base of rowMap
149  auto col_view = col_mv.getLocalViewHost(Tpetra::Access::ReadOnly);
150  for(int i=0; i<nCols; i++) colIndexList(i) = col_view(i,0);
151  }
152  // Create new Row & Col Maps (both based on indexBase of rowMap)
153  contigRowMap = rcp (new contiguous_map_type (numDoFs, rowIndexList.data(), nRows, indexBase, rowComm));
154  contigColMap = rcp (new contiguous_map_type (numDoFs, colIndexList.data(), nCols, indexBase, colComm));
155 
156  // Create contiguous Matrix
157  auto lclMatrix = this->mat_->getLocalMatrixDevice();
158  contiguous_t_mat = rcp( new matrix_t(contigRowMap, contigColMap, lclMatrix));
159  } else {
160  // Build Matrix with contiguous Maps
161  auto lclMatrix = this->mat_->getLocalMatrixDevice();
162  auto importer = this->mat_->getCrsGraph()->getImporter();
163  auto exporter = this->mat_->getCrsGraph()->getExporter();
164  contiguous_t_mat = rcp( new matrix_t(lclMatrix, contigRowMap, contigColMap, contigRowMap, contigColMap, importer,exporter));
165  }
166 
167  // Return new matrix adapter
168  return rcp (new ConcreteMatrixAdapter<matrix_t> (contiguous_t_mat));
169  }
170 
171 
172  template <typename Scalar,
173  typename LocalOrdinal,
174  typename GlobalOrdinal,
175  typename Node>
176  template<typename KV_S, typename KV_GO, typename KV_GS, typename host_ordinal_type_array, typename host_scalar_type_array>
177  LocalOrdinal
178  ConcreteMatrixAdapter<
179  Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
180  >::gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers,
181  host_ordinal_type_array &perm_g2l,
182  host_ordinal_type_array &recvCountRows, host_ordinal_type_array &recvDisplRows,
183  host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls,
184  host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t,
185  bool column_major, EPhase current_phase) const
186  {
187  LocalOrdinal ret = -1;
188  {
189 #ifdef HAVE_AMESOS2_TIMERS
190  Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather");
191  Teuchos::TimeMonitor GatherTimer(*gatherTime);
192 #endif
193  auto rowMap = this->mat_->getRowMap();
194  auto colMap = this->mat_->getColMap();
195  auto comm = rowMap->getComm();
196  auto nRanks = comm->getSize();
197  auto myRank = comm->getRank();
198 
199  global_ordinal_t nRows = this->mat_->getGlobalNumRows();
200  auto lclMatrix = this->mat_->getLocalMatrixDevice();
201 
202  // check when to recompute communication patterns
203  if(current_phase == PREORDERING || current_phase == SYMBFACT) {
204  // grab rowptr and colind on host
205  auto lclRowptr_d = lclMatrix.graph.row_map;
206  auto lclColind_d = lclMatrix.graph.entries;
207  auto lclRowptr = Kokkos::create_mirror_view(lclRowptr_d);
208  auto lclColind = Kokkos::create_mirror_view(lclColind_d);
209  Kokkos::deep_copy(lclRowptr, lclRowptr_d);
210  Kokkos::deep_copy(lclColind, lclColind_d);
211 
212  // index-bases
213  global_ordinal_t rowIndexBase = rowMap->getIndexBase();
214  global_ordinal_t colIndexBase = colMap->getIndexBase();
215  // map from global to local
216  host_ordinal_type_array perm_l2g; // map from GIDs
217  // true uses 'original' contiguous row inds 0:(n-1) (no need to perm sol or rhs),
218  // false uses GIDs given by map (need to swap sol & rhs, but use matrix perm, e.g., fill-reducing)
219  bool swap_cols = false; //true;
220  // workspace to transpose
221  KV_GS pointers_t;
222  KV_GO indices_t;
223  // communication counts & displacements
224  LocalOrdinal myNRows = this->mat_->getLocalNumRows();
225  Kokkos::resize(recvCounts, nRanks);
226  Kokkos::resize(recvDispls, nRanks+1);
227  bool need_to_perm = false;
228  {
229 #ifdef HAVE_AMESOS2_TIMERS
230  Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(rowptr)");
231  Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
232 #endif
233  Teuchos::gather<int, LocalOrdinal> (&myNRows, 1, recvCounts.data(), 1, 0, *comm);
234  if (myRank == 0) {
235  Kokkos::resize(recvDispls, nRanks+1);
236  recvDispls(0) = 0;
237  for (int p = 1; p <= nRanks; p++) {
238  recvDispls(p) = recvDispls(p-1) + recvCounts(p-1);
239  }
240  if (recvDispls(nRanks) != nRows) {
241  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_TpetraCrsMatrix_MatrixAdapter::gather_impl : mismatch between gathered(local nrows) and global nrows.");
242  }
243  } else {
244  for (int p = 0; p < nRanks; p++) {
245  recvCounts(p) = 0;
246  recvDispls(p) = 0;
247  }
248  recvDispls(nRanks) = 0;
249  }
250  // gether g2l perm (convert to 0-base)
251  {
252  host_ordinal_type_array lclMap;
253  Kokkos::resize(lclMap, myNRows);
254  if (myRank == 0) {
255  Kokkos::resize(perm_g2l, nRows);
256  Kokkos::resize(perm_l2g, nRows);
257  } else {
258  Kokkos::resize(perm_g2l, 1);
259  }
260  for (int i=0; i < myNRows; i++) {
261  lclMap(i) = rowMap->getGlobalElement(i);
262  }
263  Teuchos::gatherv<int, LocalOrdinal> (lclMap.data(), myNRows, perm_g2l.data(),
264  recvCounts.data(), recvDispls.data(),
265  0, *comm);
266  if (myRank == 0) {
267  for (int i=0; i < nRows; i++) {
268  perm_g2l(i) -= rowIndexBase; // map to GIDs
269  perm_l2g(perm_g2l(i)) = i; // map from GIDs
270  if (i != perm_g2l(i)) need_to_perm = true;
271  }
272  }
273  }
274  // gether rowptr
275  // - making sure same ordinal type
276  KV_GS lclRowptr_ ("localRowptr_", lclRowptr.extent(0));
277  for (int i = 0; i < int(lclRowptr.extent(0)); i++) lclRowptr_(i) = lclRowptr(i);
278  if (myRank == 0 && (column_major || need_to_perm)) {
279  Kokkos::resize(pointers_t, nRows+1);
280  } else if (myRank != 0) {
281  Kokkos::resize(pointers_t, 2);
282  }
283  LocalOrdinal sendIdx = (myNRows > 0 ? 1 : 0); // To skip sending the first rowptr entry (note: 0, if local matrix is empty)
284  LocalOrdinal *pointers_ = ((myRank != 0 || (column_major || need_to_perm)) ? pointers_t.data() : pointers.data());
285  Teuchos::gatherv<int, LocalOrdinal> (&lclRowptr_(sendIdx), myNRows, &pointers_[1],
286  recvCounts.data(), recvDispls.data(),
287  0, *comm);
288 
289  // save row counts/displs
290  Kokkos::resize(recvCountRows, nRanks);
291  Kokkos::resize(recvDisplRows, nRanks+1);
292  Kokkos::deep_copy(recvCountRows, recvCounts);
293  Kokkos::deep_copy(recvDisplRows, recvDispls);
294  if (myRank == 0) {
295  // shift to global pointers
296  pointers_[0] = 0;
297  recvCounts(0) = pointers_[recvDispls(1)];
298  LocalOrdinal displs = recvCounts(0);
299  for (int p = 1; p < nRanks; p++) {
300  // skip "Empty" submatrix (no rows)
301  // recvCounts(p) is zero, while pointers_[recvDispls(p+1)] now contains nnz from p-1
302  if (recvDispls(p+1) > recvDispls(p)) {
303  // save recvCounts from pth MPI
304  recvCounts(p) = pointers_[recvDispls(p+1)];
305  // shift pointers for pth MPI to global
306  for (int i = 1+recvDispls(p); i <= recvDispls(p+1); i++) {
307  pointers_[i] += displs;
308  }
309  displs += recvCounts(p);
310  }
311  }
312  ret = pointers_[nRows];
313  }
314  }
315  {
316 #ifdef HAVE_AMESOS2_TIMERS
317  Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(colind)");
318  Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
319 #endif
320  // gather colinds
321  if (myRank == 0) {
322  recvDispls(0) = 0;
323  for (int p = 0; p < nRanks; p++) {
324  recvDispls(p+1) = recvDispls(p) + recvCounts(p);
325  }
326  }
327  // -- convert to global colids & ** convert to base-zero **
328  KV_GO lclColind_ ("localColind_", lclColind.extent(0));
329  for (int i = 0; i < int(lclColind.extent(0)); i++) {
330  lclColind_(i) = (colMap->getGlobalElement((lclColind(i))) - colIndexBase);
331  }
332  if (column_major || need_to_perm) {
333  Kokkos::resize(indices_t, indices.extent(0));
334  Teuchos::gatherv<int, LocalOrdinal> (lclColind_.data(), lclColind_.extent(0), indices_t.data(),
335  recvCounts.data(), recvDispls.data(),
336  0, *comm);
337  } else {
338  Teuchos::gatherv<int, LocalOrdinal> (lclColind_.data(), lclColind_.extent(0), indices.data(),
339  recvCounts.data(), recvDispls.data(),
340  0, *comm);
341  }
342  }
343  if (myRank == 0) {
344 #ifdef HAVE_AMESOS2_TIMERS
345  Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose index)");
346  Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
347 #endif
348  if (swap_cols && need_to_perm) {
349  // convert col GIDs to 0:(n-1)
350  for (int i=0; i<ret; i++) {
351  if (column_major || need_to_perm) {
352  indices_t(i) = perm_l2g(indices_t(i));
353  } else {
354  indices(i) = perm_l2g(indices(i));
355  }
356  }
357  }
358  // (note: column idexes are now in base-0)
359  if (column_major) {
360  // Map to transpose
361  Kokkos::resize(transpose_map, ret);
362  // Transopose to convert to CSC
363  for (int i=0; i<=nRows; i++) {
364  pointers(i) = 0;
365  }
366  for (int k=0; k<ret; k++) {
367  int col = indices_t(k);
368  if (col < nRows-1) {
369  pointers(col+2) ++;
370  }
371  }
372  for (int i=1; i < nRows; i++) {
373  pointers(i+1) += pointers(i);
374  }
375  // nonzeroes in each column are sorted in ascending row
376  for (int row=0; row<nRows; row++) {
377  // if swap cols, then just use the original 0:(n-1), otherwise GIDs
378  int i = (swap_cols ? row : perm_l2g(row));
379  for (int k=pointers_t(i); k<pointers_t(i+1); k++) {
380  int col = indices_t(k);
381  if (col < nRows) {
382  transpose_map(k) = pointers(1+col);
383  indices(pointers(1+col)) = row;
384  pointers(1+col) ++;
385  } else {
386  // extra columns
387  transpose_map(k) = -1;
388  }
389  }
390  }
391  } else if (need_to_perm) {
392  // Map to permute
393  Kokkos::resize(transpose_map, ret);
394  for (int i=0; i<nRows; i++) {
395  int row = perm_g2l(i);
396  pointers(row+2) = pointers_t(i+1)-pointers_t(i);
397  }
398  for (int i=1; i < nRows; i++) {
399  pointers(i+1) += pointers(i);
400  }
401  for (int i=0; i<nRows; i++) {
402  int row = perm_g2l(i);
403  for (int k=pointers_t(i); k<pointers_t(i+1); k++) {
404  int col = indices_t(k);
405  if (col < nRows) {
406  transpose_map(k) = pointers(1+row);
407  indices(pointers(1+row)) = col;
408  pointers(1+row) ++;
409  } else {
410  transpose_map(k) = -1;
411  }
412  }
413  }
414  } else {
415  Kokkos::resize(transpose_map, 0);
416  }
417  }
418  if (!need_to_perm || swap_cols) {
419  // no need to perm rhs or sol
420  Kokkos::resize(perm_g2l, 0);
421  }
422  }
423  //if(current_phase == NUMFACT) // Numerical values may be used in symbolic (e.g, MWM)
424  {
425  {
426 #ifdef HAVE_AMESOS2_TIMERS
427  Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(nzvals)");
428  Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
429 #endif
430  // grab numerical values on host
431  auto lclNzvals_d = lclMatrix.values;
432  auto lclNzvals = Kokkos::create_mirror_view(lclNzvals_d);;
433  Kokkos::deep_copy(lclNzvals, lclNzvals_d);
434 
435  // gather nzvals
436  if (transpose_map.extent(0) > 0) {
437  Kokkos::resize(nzvals_t, nzvals.extent(0));
438  Teuchos::gatherv<int, Scalar> (reinterpret_cast<Scalar*> (lclNzvals.data()), lclNzvals.extent(0),
439  reinterpret_cast<Scalar*> (nzvals_t.data()), recvCounts.data(), recvDispls.data(),
440  0, *comm);
441  } else {
442  Teuchos::gatherv<int, Scalar> (reinterpret_cast<Scalar*> (lclNzvals.data()), lclNzvals.extent(0),
443  reinterpret_cast<Scalar*> (nzvals.data()), recvCounts.data(), recvDispls.data(),
444  0, *comm);
445  }
446  }
447  if (myRank == 0) {
448  // Insert Numerical values to transopose matrix
449  ret = pointers(nRows);
450 #ifdef HAVE_AMESOS2_TIMERS
451  Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose values)");
452  Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
453 #endif
454  if (transpose_map.extent(0) > 0) {
455  for (int k=0; k<ret; k++) {
456  if (transpose_map(k) >= 0) {
457  nzvals(transpose_map(k)) = nzvals_t(k);
458  }
459  }
460  }
461  }
462  }
463  // broadcast return value
464  Teuchos::broadcast<int, LocalOrdinal>(*comm, 0, 1, &ret);
465  }
466  return ret;
467  }
468 
469 
470  template <typename Scalar,
471  typename LocalOrdinal,
472  typename GlobalOrdinal,
473  typename Node>
474  void
475  ConcreteMatrixAdapter<
476  Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
477  >::describe (Teuchos::FancyOStream& os,
478  const Teuchos::EVerbosityLevel verbLevel) const
479  {
480  this->mat_->describe(os, verbLevel);
481  }
482 } // end namespace Amesos2
483 
484 #endif // AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Specialization of the ConcreteMatrixAdapter for Tpetra::CrsMatrix. Inherits all its functionality fro...
EDistribution
Definition: Amesos2_TypeDecl.hpp:89