MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AMGXOperator_decl.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AMGXOPERATOR_DECL_HPP
47 #define MUELU_AMGXOPERATOR_DECL_HPP
48 
49 #if defined(HAVE_MUELU_AMGX)
51 
52 #include <Tpetra_Operator.hpp>
53 #include <Tpetra_CrsMatrix.hpp>
54 #include <Tpetra_MultiVector.hpp>
55 #include <Tpetra_Distributor.hpp>
56 #include <Tpetra_HashTable.hpp>
57 #include <Tpetra_Import.hpp>
58 #include <Tpetra_Import_Util.hpp>
59 
60 #include "MueLu_Exceptions.hpp"
61 #include "MueLu_TimeMonitor.hpp"
62 #include "MueLu_TpetraOperator.hpp"
63 #include "MueLu_VerboseObject.hpp"
64 
65 #include <cuda_runtime.h>
66 #include <amgx_c.h>
67 
68 namespace MueLu {
69 
76 template <class Scalar,
77  class LocalOrdinal,
78  class GlobalOrdinal,
79  class Node>
80 class AMGXOperator : public TpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>, public BaseClass {
81  private:
82  typedef Scalar SC;
83  typedef LocalOrdinal LO;
84  typedef GlobalOrdinal GO;
85  typedef Node NO;
86 
89 
90  public:
92 
93 
96 
98  virtual ~AMGXOperator() {}
99 
101 
104  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
105  }
106 
109  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
110  }
111 
113 
119  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
120  }
121 
123  bool hasTransposeApply() const {
124  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
125  }
126 
128  throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
129  }
130 
131  private:
132 };
133 
140 template <class Node>
141 class AMGXOperator<double, int, int, Node> : public TpetraOperator<double, int, int, Node> {
142  private:
143  typedef double SC;
144  typedef int LO;
145  typedef int GO;
146  typedef Node NO;
147 
150 
151  void printMaps(Teuchos::RCP<const Teuchos::Comm<int> >& comm, const std::vector<std::vector<int> >& vec, const std::vector<int>& perm,
152  const int* nbrs, const Map& map, const std::string& label) {
153  for (int p = 0; p < comm->getSize(); p++) {
154  if (comm->getRank() == p) {
155  std::cout << "========\n"
156  << label << ", lid (gid), PID " << p << "\n========" << std::endl;
157 
158  for (size_t i = 0; i < vec.size(); ++i) {
159  std::cout << " neighbor " << nbrs[i] << " :";
160  for (size_t j = 0; j < vec[i].size(); ++j)
161  std::cout << " " << vec[i][j] << " (" << map.getGlobalElement(perm[vec[i][j]]) << ")";
162  std::cout << std::endl;
163  }
164  std::cout << std::endl;
165  } else {
166  sleep(1);
167  }
168  comm->barrier();
169  }
170  }
171 
172  public:
174 
176  RCP<const Teuchos::Comm<int> > comm = inA->getRowMap()->getComm();
177  int numProcs = comm->getSize();
178  int myRank = comm->getRank();
179 
180  RCP<Teuchos::Time> amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize");
181  amgxTimer->start();
182  // Initialize
183  // AMGX_SAFE_CALL(AMGX_initialize());
184  // AMGX_SAFE_CALL(AMGX_initialize_plugins());
185 
186  /*system*/
187  // AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback));
188  AMGX_SAFE_CALL(AMGX_install_signal_handler());
189  Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true);
190  if (configs.isParameter("json file")) {
191  AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char*)&configs.get<std::string>("json file")[0]));
192  } else {
193  std::ostringstream oss;
194  oss << "";
196  for (itr = configs.begin(); itr != configs.end(); ++itr) {
197  const std::string& name = configs.name(itr);
198  const ParameterEntry& entry = configs.entry(itr);
199  oss << name << "=" << filterValueToString(entry) << ", ";
200  }
201  oss << "\0";
202  std::string configString = oss.str();
203  if (configString == "") {
204  // print msg that using defaults
205  // GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n";
206  }
207  AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str()));
208  }
209 
210  // TODO: we probably need to add "exception_handling=1" to the parameter list
211  // to switch on internal error handling (with no need for AMGX_SAFE_CALL)
212 
213  // AMGX_SAFE_CALL(AMGX_config_add_parameters(&Config_, "exception_handling=1"))
214 
215 #define NEW_COMM
216 #ifdef NEW_COMM
217  // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring
218  // FIXME: fix for serial comm
219  RCP<const Teuchos::MpiComm<int> > tmpic = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());
220  TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm");
221 
222  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
223  MPI_Comm mpiComm = *rawMpiComm;
224 #endif
225 
226  // Construct AMGX resources
227  if (numProcs == 1) {
228  AMGX_resources_create_simple(&Resources_, Config_);
229 
230  } else {
231  int numGPUDevices;
232  cudaGetDeviceCount(&numGPUDevices);
233  int device[] = {(comm->getRank() % numGPUDevices)};
234 
235  AMGX_config_add_parameters(&Config_, "communicator=MPI");
236 #ifdef NEW_COMM
237  AMGX_resources_create(&Resources_, Config_, &mpiComm, 1 /* number of GPU devices utilized by this rank */, device);
238 #else
239  AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1 /* number of GPU devices utilized by this rank */, device);
240 #endif
241  }
242 
243  AMGX_Mode mode = AMGX_mode_dDDI;
244  AMGX_solver_create(&Solver_, Resources_, mode, Config_);
245  AMGX_matrix_create(&A_, Resources_, mode);
246  AMGX_vector_create(&X_, Resources_, mode);
247  AMGX_vector_create(&Y_, Resources_, mode);
248 
249  amgxTimer->stop();
250  amgxTimer->incrementNumCalls();
251 
252  std::vector<int> amgx2muelu;
253 
254  // Construct AMGX communication pattern
255  if (numProcs > 1) {
256  RCP<const Tpetra::Import<LO, GO, NO> > importer = inA->getCrsGraph()->getImporter();
257 
258  TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object.");
259 
260  Tpetra::Distributor distributor = importer->getDistributor();
261 
262  Array<int> sendRanks = distributor.getProcsTo();
263  Array<int> recvRanks = distributor.getProcsFrom();
264 
265  std::sort(sendRanks.begin(), sendRanks.end());
266  std::sort(recvRanks.begin(), recvRanks.end());
267 
268  bool match = true;
269  if (sendRanks.size() != recvRanks.size()) {
270  match = false;
271  } else {
272  for (int i = 0; i < sendRanks.size(); i++) {
273  if (recvRanks[i] != sendRanks[i])
274  match = false;
275  break;
276  }
277  }
279  "AMGX requires that the processors that we send to and receive from are the same. "
280  "This is not the case: we send to {"
281  << sendRanks << "} and receive from {" << recvRanks << "}");
282 
283  int num_neighbors = sendRanks.size(); // does not include the calling process
284  const int* neighbors = &sendRanks[0];
285 
286  // Later on, we'll have to organize the send and recv data by PIDs,
287  // i.e, a vector V of vectors, where V[i] is PID i's vector of data.
288  // Hence we need to be able to quickly look up an array index
289  // associated with each PID.
290  Tpetra::Details::HashTable<int, int> hashTable(3 * num_neighbors);
291  for (int i = 0; i < num_neighbors; i++)
292  hashTable.add(neighbors[i], i);
293 
294  // Get some information out
295  ArrayView<const int> exportLIDs = importer->getExportLIDs();
296  ArrayView<const int> exportPIDs = importer->getExportPIDs();
297  Array<int> importPIDs;
298  Tpetra::Import_Util::getPids(*importer, importPIDs, true /* make local -1 */);
299 
300  // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation
301  RCP<const Map> rowMap = inA->getRowMap();
302  RCP<const Map> colMap = inA->getColMap();
303 
304  int N = rowMap->getLocalNumElements(), Nc = colMap->getLocalNumElements();
305  muelu2amgx_.resize(Nc, -1);
306 
307  int numUniqExports = 0;
308  for (int i = 0; i < exportLIDs.size(); i++)
309  if (muelu2amgx_[exportLIDs[i]] == -1) {
310  numUniqExports++;
311  muelu2amgx_[exportLIDs[i]] = -2;
312  }
313 
314  int localOffset = 0, exportOffset = N - numUniqExports;
315  // Go through exported LIDs and put them at the end of LIDs
316  for (int i = 0; i < exportLIDs.size(); i++)
317  if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique
318  muelu2amgx_[exportLIDs[i]] = exportOffset++;
319  // Go through all non-export LIDs, and put them at the beginning of LIDs
320  for (int i = 0; i < N; i++)
321  if (muelu2amgx_[i] == -1)
322  muelu2amgx_[i] = localOffset++;
323  // Go through the tail (imported LIDs), and order those by neighbors
324  int importOffset = N;
325  for (int k = 0; k < num_neighbors; k++)
326  for (int i = 0; i < importPIDs.size(); i++)
327  if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k)
328  muelu2amgx_[i] = importOffset++;
329 
330  amgx2muelu.resize(muelu2amgx_.size());
331  for (int i = 0; i < (int)muelu2amgx_.size(); i++)
332  amgx2muelu[muelu2amgx_[i]] = i;
333 
334  // Construct send arrays
335  std::vector<std::vector<int> > sendDatas(num_neighbors);
336  std::vector<int> send_sizes(num_neighbors, 0);
337  for (int i = 0; i < exportPIDs.size(); i++) {
338  int index = hashTable.get(exportPIDs[i]);
339  sendDatas[index].push_back(muelu2amgx_[exportLIDs[i]]);
340  send_sizes[index]++;
341  }
342  // FIXME: sendDatas must be sorted (based on GIDs)
343 
344  std::vector<const int*> send_maps(num_neighbors);
345  for (int i = 0; i < num_neighbors; i++)
346  send_maps[i] = &(sendDatas[i][0]);
347 
348  // Debugging
349  // printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector");
350 
351  // Construct recv arrays
352  std::vector<std::vector<int> > recvDatas(num_neighbors);
353  std::vector<int> recv_sizes(num_neighbors, 0);
354  for (int i = 0; i < importPIDs.size(); i++)
355  if (importPIDs[i] != -1) {
356  int index = hashTable.get(importPIDs[i]);
357  recvDatas[index].push_back(muelu2amgx_[i]);
358  recv_sizes[index]++;
359  }
360  // FIXME: recvDatas must be sorted (based on GIDs)
361 
362  std::vector<const int*> recv_maps(num_neighbors);
363  for (int i = 0; i < num_neighbors; i++)
364  recv_maps[i] = &(recvDatas[i][0]);
365 
366  // Debugging
367  // printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector");
368 
369  AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0]));
370 
371  AMGX_vector_bind(X_, A_);
372  AMGX_vector_bind(Y_, A_);
373  }
374 
375  RCP<Teuchos::Time> matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix");
376  matrixTransformTimer->start();
377 
381  inA->getAllValues(ia_s, ja, a);
382 
383  ArrayRCP<int> ia(ia_s.size());
384  for (int i = 0; i < ia.size(); i++)
385  ia[i] = Teuchos::as<int>(ia_s[i]);
386 
387  N_ = inA->getLocalNumRows();
388  int nnz = inA->getLocalNumEntries();
389 
390  matrixTransformTimer->stop();
391  matrixTransformTimer->incrementNumCalls();
392 
393  // Upload matrix
394  // TODO Do we need to pin memory here through AMGX_pin_memory?
395  RCP<Teuchos::Time> matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix CPU->GPU");
396  matrixTimer->start();
397  if (numProcs == 1) {
398  AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);
399 
400  } else {
401  // Transform the matrix
402  std::vector<int> ia_new(ia.size());
403  std::vector<int> ja_new(ja.size());
404  std::vector<double> a_new(a.size());
405 
406  ia_new[0] = 0;
407  for (int i = 0; i < N_; i++) {
408  int oldRow = amgx2muelu[i];
409 
410  ia_new[i + 1] = ia_new[i] + (ia[oldRow + 1] - ia[oldRow]);
411 
412  for (int j = ia[oldRow]; j < ia[oldRow + 1]; j++) {
413  int offset = j - ia[oldRow];
414  ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
415  a_new[ia_new[i] + offset] = a[j];
416  }
417  // Do bubble sort on two arrays
418  // NOTE: There are multiple possible optimizations here (even of bubble sort)
419  bool swapped;
420  do {
421  swapped = false;
422 
423  for (int j = ia_new[i]; j < ia_new[i + 1] - 1; j++)
424  if (ja_new[j] > ja_new[j + 1]) {
425  std::swap(ja_new[j], ja_new[j + 1]);
426  std::swap(a_new[j], a_new[j + 1]);
427  swapped = true;
428  }
429  } while (swapped == true);
430  }
431 
432  AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
433  }
434  matrixTimer->stop();
435  matrixTimer->incrementNumCalls();
436 
437  domainMap_ = inA->getDomainMap();
438  rangeMap_ = inA->getRangeMap();
439 
440  RCP<Teuchos::Time> realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: setup (total)");
441  realSetupTimer->start();
442  AMGX_solver_setup(Solver_, A_);
443  realSetupTimer->stop();
444  realSetupTimer->incrementNumCalls();
445 
446  vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU");
447  vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector GPU->CPU");
448  solverTimer_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: Solve (total)");
449  }
450 
452  virtual ~AMGXOperator() {
453  // Comment this out if you need rebuild to work. This causes AMGX_solver_destroy memory issues.
454  AMGX_SAFE_CALL(AMGX_solver_destroy(Solver_));
455  AMGX_SAFE_CALL(AMGX_vector_destroy(X_));
456  AMGX_SAFE_CALL(AMGX_vector_destroy(Y_));
457  AMGX_SAFE_CALL(AMGX_matrix_destroy(A_));
458  AMGX_SAFE_CALL(AMGX_resources_destroy(Resources_));
459  AMGX_SAFE_CALL(AMGX_config_destroy(Config_));
460  }
461 
464 
467 
469 
475 
477  bool hasTransposeApply() const;
478 
480  throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
481  }
482 
483  std::string filterValueToString(const Teuchos::ParameterEntry& entry) {
484  return (entry.isList() ? std::string("...") : toString(entry.getAny()));
485  }
486 
487  int sizeA() {
488  int sizeX, sizeY, n;
489  AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY);
490  return n;
491  }
492 
493  int iters() {
494  int it;
495  AMGX_solver_get_iterations_number(Solver_, &it);
496  return it;
497  }
498 
499  AMGX_SOLVE_STATUS getStatus() {
500  AMGX_SOLVE_STATUS status;
501  AMGX_solver_get_status(Solver_, &status);
502  return status;
503  }
504 
505  private:
506  AMGX_solver_handle Solver_;
507  AMGX_resources_handle Resources_;
508  AMGX_config_handle Config_;
509  AMGX_matrix_handle A_;
510  AMGX_vector_handle X_;
511  AMGX_vector_handle Y_;
512  int N_;
513 
516 
517  std::vector<int> muelu2amgx_;
518 
522 };
523 
524 } // namespace MueLu
525 
526 #endif // HAVE_MUELU_AMGX
527 #endif // MUELU_AMGXOPERATOR_DECL_HPP
const std::string & name() const
ConstIterator end() const
MueLu::DefaultLocalOrdinal LocalOrdinal
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &inA, Teuchos::ParameterList &paramListIn)
T & get(const std::string &name, T def_value)
size_t getLocalNumElements() const
void printMaps(Teuchos::RCP< const Teuchos::Comm< int > > &comm, const std::vector< std::vector< int > > &vec, const std::vector< int > &perm, const int *nbrs, const Map &map, const std::string &label)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
size_type size() const
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
size_type size() const
virtual ~AMGXOperator()
Destructor.
MueLu::DefaultNode Node
static RCP< Time > getNewTimer(const std::string &name)
bool isParameter(const std::string &name) const
void start(bool reset=false)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
double stop()
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X.
params_t::ConstIterator ConstIterator
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
ConstIterator begin() const
iterator end()
std::string filterValueToString(const Teuchos::ParameterEntry &entry)
const ParameterEntry & entry(ConstIterator i) const
any & getAny(bool activeQry=true)
Teuchos::RCP< const Map > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
ValueType get(const KeyType key)
void add(const KeyType key, const ValueType value)
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &InA, Teuchos::ParameterList &paramListIn)
Constructor.
size_type size() const
Scalar SC
Tpetra::Map< LO, GO, NO > Map
Wraps an existing MueLu::Hierarchy as a Tpetra::Operator.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Exception throws to report errors in the internal logical of the program.
void incrementNumCalls()
iterator begin()
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
Adapter for AmgX library from Nvidia.
bool is_null() const