MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AMGXOperator_decl.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AMGXOPERATOR_DECL_HPP
47 #define MUELU_AMGXOPERATOR_DECL_HPP
48 
49 #if defined (HAVE_MUELU_AMGX)
51 
52 #include <Tpetra_Operator.hpp>
53 #include <Tpetra_CrsMatrix.hpp>
54 #include <Tpetra_MultiVector.hpp>
55 #include <Tpetra_Distributor.hpp>
56 #include <Tpetra_HashTable.hpp>
57 #include <Tpetra_Import.hpp>
58 #include <Tpetra_Import_Util.hpp>
59 
60 #include "MueLu_Exceptions.hpp"
61 #include "MueLu_TimeMonitor.hpp"
62 #include "MueLu_TpetraOperator.hpp"
63 #include "MueLu_VerboseObject.hpp"
64 
65 #include <cuda_runtime.h>
66 #include <amgx_c.h>
67 
68 namespace MueLu {
69 
70 
77  template <class Scalar,
78  class LocalOrdinal,
79  class GlobalOrdinal,
80  class Node>
81  class AMGXOperator : public TpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>, public BaseClass {
82  private:
83  typedef Scalar SC;
84  typedef LocalOrdinal LO;
85  typedef GlobalOrdinal GO;
86  typedef Node NO;
87 
88  typedef Tpetra::Map<LO,GO,NO> Map;
89  typedef Tpetra::MultiVector<SC,LO,GO,NO> MultiVector;
90 
91  public:
92 
94 
95 
97  AMGXOperator(const Teuchos::RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > &InA, Teuchos::ParameterList &paramListIn) { }
98 
100  virtual ~AMGXOperator() { }
101 
103 
106  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
107  }
108 
111  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
112  }
113 
115 
120  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(), Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const {
121  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
122  }
123 
125  bool hasTransposeApply() const{
126  throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
127  }
128 
130  throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
131  }
132 
133  private:
134  };
135 
142  template<class Node>
143  class AMGXOperator<double, int, int, Node> : public TpetraOperator<double, int, int, Node> {
144  private:
145  typedef double SC;
146  typedef int LO;
147  typedef int GO;
148  typedef Node NO;
149 
150  typedef Tpetra::Map<LO,GO,NO> Map;
151  typedef Tpetra::MultiVector<SC,LO,GO,NO> MultiVector;
152 
153  void printMaps(Teuchos::RCP<const Teuchos::Comm<int> >& comm, const std::vector<std::vector<int> >& vec, const std::vector<int>& perm,
154  const int* nbrs, const Map& map, const std::string& label) {
155  for (int p = 0; p < comm->getSize(); p++) {
156  if (comm->getRank() == p) {
157  std::cout << "========\n" << label << ", lid (gid), PID " << p << "\n========" << std::endl;
158 
159  for (size_t i = 0; i < vec.size(); ++i) {
160  std::cout << " neighbor " << nbrs[i] << " :";
161  for (size_t j = 0; j < vec[i].size(); ++j)
162  std::cout << " " << vec[i][j] << " (" << map.getGlobalElement(perm[vec[i][j]]) << ")";
163  std::cout << std::endl;
164  }
165  std::cout << std::endl;
166  } else {
167  sleep(1);
168  }
169  comm->barrier();
170  }
171  }
172 
173  public:
174 
176 
177  AMGXOperator(const Teuchos::RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > &inA, Teuchos::ParameterList &paramListIn) {
178  RCP<const Teuchos::Comm<int> > comm = inA->getRowMap()->getComm();
179  int numProcs = comm->getSize();
180  int myRank = comm->getRank();
181 
182  RCP<Teuchos::Time> amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize");
183  amgxTimer->start();
184  // Initialize
185  AMGX_SAFE_CALL(AMGX_initialize());
186  AMGX_SAFE_CALL(AMGX_initialize_plugins());
187 
188  /*system*/
189  //AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback));
190  AMGX_SAFE_CALL(AMGX_install_signal_handler());
191  Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true);
192  if (configs.isParameter("json file")) {
193  AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char *) &configs.get<std::string>("json file")[0]));
194  } else {
195  std::ostringstream oss;
196  oss << "";
198  for (itr = configs.begin(); itr != configs.end(); ++itr) {
199  const std::string& name = configs.name(itr);
200  const ParameterEntry& entry = configs.entry(itr);
201  oss << name << "=" << filterValueToString(entry) << ", ";
202  }
203  oss << "\0";
204  std::string configString = oss.str();
205  if (configString == "") {
206  //print msg that using defaults
207  //GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n";
208  }
209  AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str()));
210  }
211 
212  // TODO: we probably need to add "exception_handling=1" to the parameter list
213  // to switch on internal error handling (with no need for AMGX_SAFE_CALL)
214 
215 #define NEW_COMM
216 #ifdef NEW_COMM
217  // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring
218  // FIXME: fix for serial comm
219  RCP<const Teuchos::MpiComm<int> > tmpic = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());
220  TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm");
221 
222  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
223  MPI_Comm mpiComm = *rawMpiComm;
224 #endif
225 
226  // Construct AMGX resources
227  if (numProcs == 1) {
228  AMGX_resources_create_simple(&Resources_, Config_);
229 
230  } else {
231  int numGPUDevices;
232  cudaGetDeviceCount(&numGPUDevices);
233  int device[] = {(comm->getRank() % numGPUDevices)};
234 
235  AMGX_config_add_parameters(&Config_, "communicator=MPI");
236 #ifdef NEW_COMM
237  AMGX_resources_create(&Resources_, Config_, &mpiComm, 1/* number of GPU devices utilized by this rank */, device);
238 #else
239  AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1/* number of GPU devices utilized by this rank */, device);
240 #endif
241  }
242 
243  AMGX_Mode mode = AMGX_mode_dDDI;
244  AMGX_solver_create(&Solver_, Resources_, mode, Config_);
245  AMGX_matrix_create(&A_, Resources_, mode);
246  AMGX_vector_create(&X_, Resources_, mode);
247  AMGX_vector_create(&Y_, Resources_, mode);
248 
249  amgxTimer->stop();
250  amgxTimer->incrementNumCalls();
251 
252  std::vector<int> amgx2muelu;
253 
254  // Construct AMGX communication pattern
255  if (numProcs > 1) {
256  RCP<const Tpetra::Import<LO,GO,NO> > importer = inA->getCrsGraph()->getImporter();
257 
258  TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object.");
259 
260  Tpetra::Distributor distributor = importer->getDistributor();
261 
262  Array<int> sendRanks = distributor.getProcsTo();
263  Array<int> recvRanks = distributor.getProcsFrom();
264 
265  std::sort(sendRanks.begin(), sendRanks.end());
266  std::sort(recvRanks.begin(), recvRanks.end());
267 
268  bool match = true;
269  if (sendRanks.size() != recvRanks.size()) {
270  match = false;
271  } else {
272  for (int i = 0; i < sendRanks.size(); i++) {
273  if (recvRanks[i] != sendRanks[i])
274  match = false;
275  break;
276  }
277  }
278  TEUCHOS_TEST_FOR_EXCEPTION(!match, MueLu::Exceptions::RuntimeError, "AMGX requires that the processors that we send to and receive from are the same. "
279  "This is not the case: we send to {" << sendRanks << "} and receive from {" << recvRanks << "}");
280 
281  int num_neighbors = sendRanks.size(); // does not include the calling process
282  const int* neighbors = &sendRanks[0];
283 
284  // Later on, we'll have to organize the send and recv data by PIDs,
285  // i.e, a vector V of vectors, where V[i] is PID i's vector of data.
286  // Hence we need to be able to quickly look up an array index
287  // associated with each PID.
288  Tpetra::Details::HashTable<int,int> hashTable(3*num_neighbors);
289  for (int i = 0; i < num_neighbors; i++)
290  hashTable.add(neighbors[i], i);
291 
292  // Get some information out
293  ArrayView<const int> exportLIDs = importer->getExportLIDs();
294  ArrayView<const int> exportPIDs = importer->getExportPIDs();
295  Array<int> importPIDs;
296  Tpetra::Import_Util::getPids(*importer, importPIDs, true/* make local -1 */);
297 
298  // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation
299  RCP<const Map> rowMap = inA->getRowMap();
300  RCP<const Map> colMap = inA->getColMap();
301 
302  int N = rowMap->getNodeNumElements(), Nc = colMap->getNodeNumElements();
303  muelu2amgx_.resize(Nc, -1);
304 
305  int numUniqExports = 0;
306  for (int i = 0; i < exportLIDs.size(); i++)
307  if (muelu2amgx_[exportLIDs[i]] == -1) {
308  numUniqExports++;
309  muelu2amgx_[exportLIDs[i]] = -2;
310  }
311 
312  int localOffset = 0, exportOffset = N - numUniqExports;
313  // Go through exported LIDs and put them at the end of LIDs
314  for (int i = 0; i < exportLIDs.size(); i++)
315  if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique
316  muelu2amgx_[exportLIDs[i]] = exportOffset++;
317  // Go through all non-export LIDs, and put them at the beginning of LIDs
318  for (int i = 0; i < N; i++)
319  if (muelu2amgx_[i] == -1)
320  muelu2amgx_[i] = localOffset++;
321  // Go through the tail (imported LIDs), and order those by neighbors
322  int importOffset = N;
323  for (int k = 0; k < num_neighbors; k++)
324  for (int i = 0; i < importPIDs.size(); i++)
325  if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k)
326  muelu2amgx_[i] = importOffset++;
327 
328  amgx2muelu.resize(muelu2amgx_.size());
329  for (int i = 0; i < (int)muelu2amgx_.size(); i++)
330  amgx2muelu[muelu2amgx_[i]] = i;
331 
332  // Construct send arrays
333  std::vector<std::vector<int> > sendDatas (num_neighbors);
334  std::vector<int> send_sizes(num_neighbors, 0);
335  for (int i = 0; i < exportPIDs.size(); i++) {
336  int index = hashTable.get(exportPIDs[i]);
337  sendDatas [index].push_back(muelu2amgx_[exportLIDs[i]]);
338  send_sizes[index]++;
339  }
340  // FIXME: sendDatas must be sorted (based on GIDs)
341 
342  std::vector<const int*> send_maps(num_neighbors);
343  for (int i = 0; i < num_neighbors; i++)
344  send_maps[i] = &(sendDatas[i][0]);
345 
346  // Debugging
347  // printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector");
348 
349  // Construct recv arrays
350  std::vector<std::vector<int> > recvDatas (num_neighbors);
351  std::vector<int> recv_sizes(num_neighbors, 0);
352  for (int i = 0; i < importPIDs.size(); i++)
353  if (importPIDs[i] != -1) {
354  int index = hashTable.get(importPIDs[i]);
355  recvDatas [index].push_back(muelu2amgx_[i]);
356  recv_sizes[index]++;
357  }
358  // FIXME: recvDatas must be sorted (based on GIDs)
359 
360  std::vector<const int*> recv_maps(num_neighbors);
361  for (int i = 0; i < num_neighbors; i++)
362  recv_maps[i] = &(recvDatas[i][0]);
363 
364  // Debugging
365  // printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector");
366 
367  AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0]));
368 
369  AMGX_vector_bind(X_, A_);
370  AMGX_vector_bind(Y_, A_);
371  }
372 
373  RCP<Teuchos::Time> matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix");
374  matrixTransformTimer->start();
375 
379  inA->getAllValues(ia_s, ja, a);
380 
381  ArrayRCP<int> ia(ia_s.size());
382  for (int i = 0; i < ia.size(); i++)
383  ia[i] = Teuchos::as<int>(ia_s[i]);
384 
385  N_ = inA->getNodeNumRows();
386  int nnz = inA->getNodeNumEntries();
387 
388  matrixTransformTimer->stop();
389  matrixTransformTimer->incrementNumCalls();
390 
391 
392  // Upload matrix
393  // TODO Do we need to pin memory here through AMGX_pin_memory?
394  RCP<Teuchos::Time> matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix CPU->GPU");
395  matrixTimer->start();
396  if (numProcs == 1) {
397  AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);
398 
399  } else {
400  // Transform the matrix
401  std::vector<int> ia_new(ia.size());
402  std::vector<int> ja_new(ja.size());
403  std::vector<double> a_new (a.size());
404 
405  ia_new[0] = 0;
406  for (int i = 0; i < N_; i++) {
407  int oldRow = amgx2muelu[i];
408 
409  ia_new[i+1] = ia_new[i] + (ia[oldRow+1] - ia[oldRow]);
410 
411  for (int j = ia[oldRow]; j < ia[oldRow+1]; j++) {
412  int offset = j - ia[oldRow];
413  ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
414  a_new [ia_new[i] + offset] = a[j];
415  }
416  // Do bubble sort on two arrays
417  // NOTE: There are multiple possible optimizations here (even of bubble sort)
418  bool swapped;
419  do {
420  swapped = false;
421 
422  for (int j = ia_new[i]; j < ia_new[i+1]-1; j++)
423  if (ja_new[j] > ja_new[j+1]) {
424  std::swap(ja_new[j], ja_new[j+1]);
425  std::swap(a_new [j], a_new [j+1]);
426  swapped = true;
427  }
428  } while (swapped == true);
429  }
430 
431  AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
432  }
433  matrixTimer->stop();
434  matrixTimer->incrementNumCalls();
435 
436  domainMap_ = inA->getDomainMap();
437  rangeMap_ = inA->getRangeMap();
438 
439  RCP<Teuchos::Time> realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: real setup");
440  realSetupTimer->start();
441  AMGX_solver_setup(Solver_, A_);
442  realSetupTimer->stop();
443  realSetupTimer->incrementNumCalls();
444 
445  vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU");
446  vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector GPU->CPU");
447  }
448 
450  virtual ~AMGXOperator() { }
451 
454 
457 
459 
465 
467  bool hasTransposeApply() const;
468 
470  throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
471  }
472 
473  std::string filterValueToString(const Teuchos::ParameterEntry& entry ) {
474  return ( entry.isList() ? std::string("...") : toString(entry.getAny()) );
475  }
476 
477  int sizeA() {
478  int sizeX, sizeY, n;
479  AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY);
480  return n;
481  }
482 
483  int iters() {
484  int it;
485  AMGX_solver_get_iterations_number(Solver_, &it);
486  return it;
487  }
488 
489  AMGX_SOLVE_STATUS getStatus() {
490  AMGX_SOLVE_STATUS status;
491  AMGX_solver_get_status(Solver_, &status);
492  return status;
493  }
494 
495 
496  private:
497  AMGX_solver_handle Solver_;
498  AMGX_resources_handle Resources_;
499  AMGX_config_handle Config_;
500  AMGX_matrix_handle A_;
501  AMGX_vector_handle X_;
502  AMGX_vector_handle Y_;
503  int N_;
504 
507 
508  std::vector<int> muelu2amgx_;
509 
512  };
513 
514 } // namespace
515 
516 #endif //HAVE_MUELU_AMGX
517 #endif // MUELU_AMGXOPERATOR_DECL_HPP
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
const std::string & name() const
ConstIterator end() const
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &inA, Teuchos::ParameterList &paramListIn)
T & get(const std::string &name, T def_value)
void printMaps(Teuchos::RCP< const Teuchos::Comm< int > > &comm, const std::vector< std::vector< int > > &vec, const std::vector< int > &perm, const int *nbrs, const Map &map, const std::string &label)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
size_type size() const
size_type size() const
virtual ~AMGXOperator()
Destructor.
Tpetra::Map< LO, GO, NO > Map
static RCP< Time > getNewTimer(const std::string &name)
bool isParameter(const std::string &name) const
void start(bool reset=false)
double stop()
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X.
params_t::ConstIterator ConstIterator
ConstIterator begin() const
iterator end()
std::string filterValueToString(const Teuchos::ParameterEntry &entry)
const ParameterEntry & entry(ConstIterator i) const
any & getAny(bool activeQry=true)
Teuchos::RCP< const Map > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &InA, Teuchos::ParameterList &paramListIn)
Constructor.
size_type size() const
Scalar SC
Wraps an existing MueLu::Hierarchy as a Tpetra::Operator.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Exception throws to report errors in the internal logical of the program.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
void incrementNumCalls()
iterator begin()
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
Adapter for AmgX library from Nvidia.
bool is_null() const