MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AMGXOperator_decl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_AMGXOPERATOR_DECL_HPP
11 #define MUELU_AMGXOPERATOR_DECL_HPP
12 
13 #if defined(HAVE_MUELU_AMGX)
15 
16 #include <Tpetra_Operator.hpp>
17 #include <Tpetra_CrsMatrix.hpp>
18 #include <Tpetra_MultiVector.hpp>
19 #include <Tpetra_Distributor.hpp>
20 #include <Tpetra_HashTable.hpp>
21 #include <Tpetra_Import.hpp>
22 #include <Tpetra_Import_Util.hpp>
23 
24 #include "MueLu_Exceptions.hpp"
25 #include "MueLu_TimeMonitor.hpp"
26 #include "MueLu_TpetraOperator.hpp"
27 #include "MueLu_VerboseObject.hpp"
28 
29 #include <cuda_runtime.h>
30 #include <amgx_c.h>
31 
32 namespace MueLu {
33 
40 template <class Scalar,
41  class LocalOrdinal,
42  class GlobalOrdinal,
43  class Node>
44 class AMGXOperator : public TpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>, public BaseClass {
45  private:
46  typedef Scalar SC;
47  typedef LocalOrdinal LO;
48  typedef GlobalOrdinal GO;
49  typedef Node NO;
50 
53 
54  public:
56 
57 
60 
62  virtual ~AMGXOperator() {}
63 
65 
68  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
69  }
70 
73  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
74  }
75 
77 
83  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
84  }
85 
87  bool hasTransposeApply() const {
88  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
89  }
90 
92  throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
93  }
94 
95  private:
96 };
97 
104 template <class Node>
105 class AMGXOperator<double, int, int, Node> : public TpetraOperator<double, int, int, Node> {
106  private:
107  typedef double SC;
108  typedef int LO;
109  typedef int GO;
110  typedef Node NO;
111 
114 
115  void printMaps(Teuchos::RCP<const Teuchos::Comm<int> >& comm, const std::vector<std::vector<int> >& vec, const std::vector<int>& perm,
116  const int* nbrs, const Map& map, const std::string& label) {
117  for (int p = 0; p < comm->getSize(); p++) {
118  if (comm->getRank() == p) {
119  std::cout << "========\n"
120  << label << ", lid (gid), PID " << p << "\n========" << std::endl;
121 
122  for (size_t i = 0; i < vec.size(); ++i) {
123  std::cout << " neighbor " << nbrs[i] << " :";
124  for (size_t j = 0; j < vec[i].size(); ++j)
125  std::cout << " " << vec[i][j] << " (" << map.getGlobalElement(perm[vec[i][j]]) << ")";
126  std::cout << std::endl;
127  }
128  std::cout << std::endl;
129  } else {
130  sleep(1);
131  }
132  comm->barrier();
133  }
134  }
135 
136  public:
138 
140  RCP<const Teuchos::Comm<int> > comm = inA->getRowMap()->getComm();
141  int numProcs = comm->getSize();
142  int myRank = comm->getRank();
143 
144  RCP<Teuchos::Time> amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize");
145  amgxTimer->start();
146  // Initialize
147  // AMGX_SAFE_CALL(AMGX_initialize());
148  // AMGX_SAFE_CALL(AMGX_initialize_plugins());
149 
150  /*system*/
151  // AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback));
152  AMGX_SAFE_CALL(AMGX_install_signal_handler());
153  Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true);
154  if (configs.isParameter("json file")) {
155  AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char*)&configs.get<std::string>("json file")[0]));
156  } else {
157  std::ostringstream oss;
158  oss << "";
160  for (itr = configs.begin(); itr != configs.end(); ++itr) {
161  const std::string& name = configs.name(itr);
162  const ParameterEntry& entry = configs.entry(itr);
163  oss << name << "=" << filterValueToString(entry) << ", ";
164  }
165  oss << "\0";
166  std::string configString = oss.str();
167  if (configString == "") {
168  // print msg that using defaults
169  // GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n";
170  }
171  AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str()));
172  }
173 
174  // TODO: we probably need to add "exception_handling=1" to the parameter list
175  // to switch on internal error handling (with no need for AMGX_SAFE_CALL)
176 
177  // AMGX_SAFE_CALL(AMGX_config_add_parameters(&Config_, "exception_handling=1"))
178 
179 #define NEW_COMM
180 #ifdef NEW_COMM
181  // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring
182  // FIXME: fix for serial comm
183  RCP<const Teuchos::MpiComm<int> > tmpic = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());
184  TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm");
185 
186  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
187  MPI_Comm mpiComm = *rawMpiComm;
188 #endif
189 
190  // Construct AMGX resources
191  if (numProcs == 1) {
192  AMGX_resources_create_simple(&Resources_, Config_);
193 
194  } else {
195  int numGPUDevices;
196  cudaGetDeviceCount(&numGPUDevices);
197  int device[] = {(comm->getRank() % numGPUDevices)};
198 
199  AMGX_config_add_parameters(&Config_, "communicator=MPI");
200 #ifdef NEW_COMM
201  AMGX_resources_create(&Resources_, Config_, &mpiComm, 1 /* number of GPU devices utilized by this rank */, device);
202 #else
203  AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1 /* number of GPU devices utilized by this rank */, device);
204 #endif
205  }
206 
207  AMGX_Mode mode = AMGX_mode_dDDI;
208  AMGX_solver_create(&Solver_, Resources_, mode, Config_);
209  AMGX_matrix_create(&A_, Resources_, mode);
210  AMGX_vector_create(&X_, Resources_, mode);
211  AMGX_vector_create(&Y_, Resources_, mode);
212 
213  amgxTimer->stop();
214  amgxTimer->incrementNumCalls();
215 
216  std::vector<int> amgx2muelu;
217 
218  // Construct AMGX communication pattern
219  if (numProcs > 1) {
220  RCP<const Tpetra::Import<LO, GO, NO> > importer = inA->getCrsGraph()->getImporter();
221 
222  TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object.");
223 
224  Tpetra::Distributor distributor = importer->getDistributor();
225 
226  Array<int> sendRanks = distributor.getProcsTo();
227  Array<int> recvRanks = distributor.getProcsFrom();
228 
229  std::sort(sendRanks.begin(), sendRanks.end());
230  std::sort(recvRanks.begin(), recvRanks.end());
231 
232  bool match = true;
233  if (sendRanks.size() != recvRanks.size()) {
234  match = false;
235  } else {
236  for (int i = 0; i < sendRanks.size(); i++) {
237  if (recvRanks[i] != sendRanks[i])
238  match = false;
239  break;
240  }
241  }
243  "AMGX requires that the processors that we send to and receive from are the same. "
244  "This is not the case: we send to {"
245  << sendRanks << "} and receive from {" << recvRanks << "}");
246 
247  int num_neighbors = sendRanks.size(); // does not include the calling process
248  const int* neighbors = &sendRanks[0];
249 
250  // Later on, we'll have to organize the send and recv data by PIDs,
251  // i.e, a vector V of vectors, where V[i] is PID i's vector of data.
252  // Hence we need to be able to quickly look up an array index
253  // associated with each PID.
254  Tpetra::Details::HashTable<int, int> hashTable(3 * num_neighbors);
255  for (int i = 0; i < num_neighbors; i++)
256  hashTable.add(neighbors[i], i);
257 
258  // Get some information out
259  ArrayView<const int> exportLIDs = importer->getExportLIDs();
260  ArrayView<const int> exportPIDs = importer->getExportPIDs();
261  Array<int> importPIDs;
262  Tpetra::Import_Util::getPids(*importer, importPIDs, true /* make local -1 */);
263 
264  // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation
265  RCP<const Map> rowMap = inA->getRowMap();
266  RCP<const Map> colMap = inA->getColMap();
267 
268  int N = rowMap->getLocalNumElements(), Nc = colMap->getLocalNumElements();
269  muelu2amgx_.resize(Nc, -1);
270 
271  int numUniqExports = 0;
272  for (int i = 0; i < exportLIDs.size(); i++)
273  if (muelu2amgx_[exportLIDs[i]] == -1) {
274  numUniqExports++;
275  muelu2amgx_[exportLIDs[i]] = -2;
276  }
277 
278  int localOffset = 0, exportOffset = N - numUniqExports;
279  // Go through exported LIDs and put them at the end of LIDs
280  for (int i = 0; i < exportLIDs.size(); i++)
281  if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique
282  muelu2amgx_[exportLIDs[i]] = exportOffset++;
283  // Go through all non-export LIDs, and put them at the beginning of LIDs
284  for (int i = 0; i < N; i++)
285  if (muelu2amgx_[i] == -1)
286  muelu2amgx_[i] = localOffset++;
287  // Go through the tail (imported LIDs), and order those by neighbors
288  int importOffset = N;
289  for (int k = 0; k < num_neighbors; k++)
290  for (int i = 0; i < importPIDs.size(); i++)
291  if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k)
292  muelu2amgx_[i] = importOffset++;
293 
294  amgx2muelu.resize(muelu2amgx_.size());
295  for (int i = 0; i < (int)muelu2amgx_.size(); i++)
296  amgx2muelu[muelu2amgx_[i]] = i;
297 
298  // Construct send arrays
299  std::vector<std::vector<int> > sendDatas(num_neighbors);
300  std::vector<int> send_sizes(num_neighbors, 0);
301  for (int i = 0; i < exportPIDs.size(); i++) {
302  int index = hashTable.get(exportPIDs[i]);
303  sendDatas[index].push_back(muelu2amgx_[exportLIDs[i]]);
304  send_sizes[index]++;
305  }
306  // FIXME: sendDatas must be sorted (based on GIDs)
307 
308  std::vector<const int*> send_maps(num_neighbors);
309  for (int i = 0; i < num_neighbors; i++)
310  send_maps[i] = &(sendDatas[i][0]);
311 
312  // Debugging
313  // printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector");
314 
315  // Construct recv arrays
316  std::vector<std::vector<int> > recvDatas(num_neighbors);
317  std::vector<int> recv_sizes(num_neighbors, 0);
318  for (int i = 0; i < importPIDs.size(); i++)
319  if (importPIDs[i] != -1) {
320  int index = hashTable.get(importPIDs[i]);
321  recvDatas[index].push_back(muelu2amgx_[i]);
322  recv_sizes[index]++;
323  }
324  // FIXME: recvDatas must be sorted (based on GIDs)
325 
326  std::vector<const int*> recv_maps(num_neighbors);
327  for (int i = 0; i < num_neighbors; i++)
328  recv_maps[i] = &(recvDatas[i][0]);
329 
330  // Debugging
331  // printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector");
332 
333  AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0]));
334 
335  AMGX_vector_bind(X_, A_);
336  AMGX_vector_bind(Y_, A_);
337  }
338 
339  RCP<Teuchos::Time> matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix");
340  matrixTransformTimer->start();
341 
345  inA->getAllValues(ia_s, ja, a);
346 
347  ArrayRCP<int> ia(ia_s.size());
348  for (int i = 0; i < ia.size(); i++)
349  ia[i] = Teuchos::as<int>(ia_s[i]);
350 
351  N_ = inA->getLocalNumRows();
352  int nnz = inA->getLocalNumEntries();
353 
354  matrixTransformTimer->stop();
355  matrixTransformTimer->incrementNumCalls();
356 
357  // Upload matrix
358  // TODO Do we need to pin memory here through AMGX_pin_memory?
359  RCP<Teuchos::Time> matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix CPU->GPU");
360  matrixTimer->start();
361  if (numProcs == 1) {
362  AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);
363 
364  } else {
365  // Transform the matrix
366  std::vector<int> ia_new(ia.size());
367  std::vector<int> ja_new(ja.size());
368  std::vector<double> a_new(a.size());
369 
370  ia_new[0] = 0;
371  for (int i = 0; i < N_; i++) {
372  int oldRow = amgx2muelu[i];
373 
374  ia_new[i + 1] = ia_new[i] + (ia[oldRow + 1] - ia[oldRow]);
375 
376  for (int j = ia[oldRow]; j < ia[oldRow + 1]; j++) {
377  int offset = j - ia[oldRow];
378  ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
379  a_new[ia_new[i] + offset] = a[j];
380  }
381  // Do bubble sort on two arrays
382  // NOTE: There are multiple possible optimizations here (even of bubble sort)
383  bool swapped;
384  do {
385  swapped = false;
386 
387  for (int j = ia_new[i]; j < ia_new[i + 1] - 1; j++)
388  if (ja_new[j] > ja_new[j + 1]) {
389  std::swap(ja_new[j], ja_new[j + 1]);
390  std::swap(a_new[j], a_new[j + 1]);
391  swapped = true;
392  }
393  } while (swapped == true);
394  }
395 
396  AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
397  }
398  matrixTimer->stop();
399  matrixTimer->incrementNumCalls();
400 
401  domainMap_ = inA->getDomainMap();
402  rangeMap_ = inA->getRangeMap();
403 
404  RCP<Teuchos::Time> realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: setup (total)");
405  realSetupTimer->start();
406  AMGX_solver_setup(Solver_, A_);
407  realSetupTimer->stop();
408  realSetupTimer->incrementNumCalls();
409 
410  vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU");
411  vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector GPU->CPU");
412  solverTimer_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: Solve (total)");
413  }
414 
416  virtual ~AMGXOperator() {
417  // Comment this out if you need rebuild to work. This causes AMGX_solver_destroy memory issues.
418  AMGX_SAFE_CALL(AMGX_solver_destroy(Solver_));
419  AMGX_SAFE_CALL(AMGX_vector_destroy(X_));
420  AMGX_SAFE_CALL(AMGX_vector_destroy(Y_));
421  AMGX_SAFE_CALL(AMGX_matrix_destroy(A_));
422  AMGX_SAFE_CALL(AMGX_resources_destroy(Resources_));
423  AMGX_SAFE_CALL(AMGX_config_destroy(Config_));
424  }
425 
428 
431 
433 
439 
441  bool hasTransposeApply() const;
442 
444  throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
445  }
446 
447  std::string filterValueToString(const Teuchos::ParameterEntry& entry) {
448  return (entry.isList() ? std::string("...") : toString(entry.getAny()));
449  }
450 
451  int sizeA() {
452  int sizeX, sizeY, n;
453  AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY);
454  return n;
455  }
456 
457  int iters() {
458  int it;
459  AMGX_solver_get_iterations_number(Solver_, &it);
460  return it;
461  }
462 
463  AMGX_SOLVE_STATUS getStatus() {
464  AMGX_SOLVE_STATUS status;
465  AMGX_solver_get_status(Solver_, &status);
466  return status;
467  }
468 
469  private:
470  AMGX_solver_handle Solver_;
471  AMGX_resources_handle Resources_;
472  AMGX_config_handle Config_;
473  AMGX_matrix_handle A_;
474  AMGX_vector_handle X_;
475  AMGX_vector_handle Y_;
476  int N_;
477 
480 
481  std::vector<int> muelu2amgx_;
482 
486 };
487 
488 } // namespace MueLu
489 
490 #endif // HAVE_MUELU_AMGX
491 #endif // MUELU_AMGXOPERATOR_DECL_HPP
const std::string & name() const
ConstIterator end() const
MueLu::DefaultLocalOrdinal LocalOrdinal
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &inA, Teuchos::ParameterList &paramListIn)
T & get(const std::string &name, T def_value)
size_t getLocalNumElements() const
void printMaps(Teuchos::RCP< const Teuchos::Comm< int > > &comm, const std::vector< std::vector< int > > &vec, const std::vector< int > &perm, const int *nbrs, const Map &map, const std::string &label)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
size_type size() const
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
size_type size() const
virtual ~AMGXOperator()
Destructor.
MueLu::DefaultNode Node
static RCP< Time > getNewTimer(const std::string &name)
bool isParameter(const std::string &name) const
void start(bool reset=false)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
double stop()
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X.
params_t::ConstIterator ConstIterator
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
ConstIterator begin() const
iterator end()
std::string filterValueToString(const Teuchos::ParameterEntry &entry)
const ParameterEntry & entry(ConstIterator i) const
any & getAny(bool activeQry=true)
Teuchos::RCP< const Map > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
ValueType get(const KeyType key)
void add(const KeyType key, const ValueType value)
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &InA, Teuchos::ParameterList &paramListIn)
Constructor.
size_type size() const
Scalar SC
Tpetra::Map< LO, GO, NO > Map
Wraps an existing MueLu::Hierarchy as a Tpetra::Operator.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Exception throws to report errors in the internal logical of the program.
void incrementNumCalls()
iterator begin()
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
Adapter for AmgX library from Nvidia.
bool is_null() const