46 #ifndef MUELU_AMGXOPERATOR_DECL_HPP
47 #define MUELU_AMGXOPERATOR_DECL_HPP
49 #if defined(HAVE_MUELU_AMGX)
52 #include <Tpetra_Operator.hpp>
53 #include <Tpetra_CrsMatrix.hpp>
54 #include <Tpetra_MultiVector.hpp>
55 #include <Tpetra_Distributor.hpp>
56 #include <Tpetra_HashTable.hpp>
57 #include <Tpetra_Import.hpp>
62 #include "MueLu_TpetraOperator.hpp"
65 #include <cuda_runtime.h>
80 class AMGXOperator :
public TpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>,
public BaseClass {
140 template <
class Node>
152 const int* nbrs,
const Map& map,
const std::string& label) {
153 for (
int p = 0; p < comm->getSize(); p++) {
154 if (comm->getRank() == p) {
155 std::cout <<
"========\n"
156 << label <<
", lid (gid), PID " << p <<
"\n========" << std::endl;
158 for (
size_t i = 0; i < vec.size(); ++i) {
159 std::cout <<
" neighbor " << nbrs[i] <<
" :";
160 for (
size_t j = 0; j < vec[i].size(); ++j)
161 std::cout <<
" " << vec[i][j] <<
" (" << map.
getGlobalElement(perm[vec[i][j]]) <<
")";
162 std::cout << std::endl;
164 std::cout << std::endl;
177 int numProcs = comm->getSize();
178 int myRank = comm->getRank();
188 AMGX_SAFE_CALL(AMGX_install_signal_handler());
191 AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (
const char*)&configs.
get<std::string>(
"json file")[0]));
193 std::ostringstream oss;
196 for (itr = configs.
begin(); itr != configs.
end(); ++itr) {
197 const std::string& name = configs.
name(itr);
199 oss << name <<
"=" << filterValueToString(entry) <<
", ";
202 std::string configString = oss.str();
203 if (configString ==
"") {
207 AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str()));
223 MPI_Comm mpiComm = *rawMpiComm;
228 AMGX_resources_create_simple(&Resources_, Config_);
232 cudaGetDeviceCount(&numGPUDevices);
233 int device[] = {(comm->getRank() % numGPUDevices)};
235 AMGX_config_add_parameters(&Config_,
"communicator=MPI");
237 AMGX_resources_create(&Resources_, Config_, &mpiComm, 1 , device);
239 AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1 , device);
243 AMGX_Mode mode = AMGX_mode_dDDI;
244 AMGX_solver_create(&Solver_, Resources_, mode, Config_);
245 AMGX_matrix_create(&A_, Resources_, mode);
246 AMGX_vector_create(&X_, Resources_, mode);
247 AMGX_vector_create(&Y_, Resources_, mode);
252 std::vector<int> amgx2muelu;
262 Array<int> sendRanks = distributor.getProcsTo();
263 Array<int> recvRanks = distributor.getProcsFrom();
265 std::sort(sendRanks.
begin(), sendRanks.
end());
266 std::sort(recvRanks.
begin(), recvRanks.
end());
269 if (sendRanks.
size() != recvRanks.
size()) {
272 for (
int i = 0; i < sendRanks.
size(); i++) {
273 if (recvRanks[i] != sendRanks[i])
279 "AMGX requires that the processors that we send to and receive from are the same. "
280 "This is not the case: we send to {"
281 << sendRanks <<
"} and receive from {" << recvRanks <<
"}");
283 int num_neighbors = sendRanks.
size();
284 const int* neighbors = &sendRanks[0];
291 for (
int i = 0; i < num_neighbors; i++)
292 hashTable.
add(neighbors[i], i);
298 Tpetra::Import_Util::getPids(*importer, importPIDs,
true );
305 muelu2amgx_.resize(Nc, -1);
307 int numUniqExports = 0;
308 for (
int i = 0; i < exportLIDs.
size(); i++)
309 if (muelu2amgx_[exportLIDs[i]] == -1) {
311 muelu2amgx_[exportLIDs[i]] = -2;
314 int localOffset = 0, exportOffset = N - numUniqExports;
316 for (
int i = 0; i < exportLIDs.
size(); i++)
317 if (muelu2amgx_[exportLIDs[i]] < 0)
318 muelu2amgx_[exportLIDs[i]] = exportOffset++;
320 for (
int i = 0; i < N; i++)
321 if (muelu2amgx_[i] == -1)
322 muelu2amgx_[i] = localOffset++;
324 int importOffset = N;
325 for (
int k = 0; k < num_neighbors; k++)
326 for (
int i = 0; i < importPIDs.
size(); i++)
327 if (importPIDs[i] != -1 && hashTable.
get(importPIDs[i]) == k)
328 muelu2amgx_[i] = importOffset++;
330 amgx2muelu.resize(muelu2amgx_.size());
331 for (
int i = 0; i < (int)muelu2amgx_.size(); i++)
332 amgx2muelu[muelu2amgx_[i]] = i;
335 std::vector<std::vector<int> > sendDatas(num_neighbors);
336 std::vector<int> send_sizes(num_neighbors, 0);
337 for (
int i = 0; i < exportPIDs.
size(); i++) {
338 int index = hashTable.
get(exportPIDs[i]);
339 sendDatas[index].push_back(muelu2amgx_[exportLIDs[i]]);
344 std::vector<const int*> send_maps(num_neighbors);
345 for (
int i = 0; i < num_neighbors; i++)
346 send_maps[i] = &(sendDatas[i][0]);
352 std::vector<std::vector<int> > recvDatas(num_neighbors);
353 std::vector<int> recv_sizes(num_neighbors, 0);
354 for (
int i = 0; i < importPIDs.
size(); i++)
355 if (importPIDs[i] != -1) {
356 int index = hashTable.
get(importPIDs[i]);
357 recvDatas[index].push_back(muelu2amgx_[i]);
362 std::vector<const int*> recv_maps(num_neighbors);
363 for (
int i = 0; i < num_neighbors; i++)
364 recv_maps[i] = &(recvDatas[i][0]);
369 AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0]));
371 AMGX_vector_bind(X_, A_);
372 AMGX_vector_bind(Y_, A_);
376 matrixTransformTimer->
start();
381 inA->getAllValues(ia_s, ja, a);
384 for (
int i = 0; i < ia.size(); i++)
385 ia[i] = Teuchos::as<int>(ia_s[i]);
387 N_ = inA->getLocalNumRows();
388 int nnz = inA->getLocalNumEntries();
390 matrixTransformTimer->
stop();
396 matrixTimer->
start();
398 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);
402 std::vector<int> ia_new(ia.size());
403 std::vector<int> ja_new(ja.
size());
404 std::vector<double> a_new(a.
size());
407 for (
int i = 0; i < N_; i++) {
408 int oldRow = amgx2muelu[i];
410 ia_new[i + 1] = ia_new[i] + (ia[oldRow + 1] - ia[oldRow]);
412 for (
int j = ia[oldRow]; j < ia[oldRow + 1]; j++) {
413 int offset = j - ia[oldRow];
414 ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
415 a_new[ia_new[i] + offset] = a[j];
423 for (
int j = ia_new[i]; j < ia_new[i + 1] - 1; j++)
424 if (ja_new[j] > ja_new[j + 1]) {
425 std::swap(ja_new[j], ja_new[j + 1]);
426 std::swap(a_new[j], a_new[j + 1]);
429 }
while (swapped ==
true);
432 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
437 domainMap_ = inA->getDomainMap();
438 rangeMap_ = inA->getRangeMap();
441 realSetupTimer->
start();
442 AMGX_solver_setup(Solver_, A_);
443 realSetupTimer->
stop();
454 AMGX_SAFE_CALL(AMGX_solver_destroy(Solver_));
455 AMGX_SAFE_CALL(AMGX_vector_destroy(X_));
456 AMGX_SAFE_CALL(AMGX_vector_destroy(Y_));
457 AMGX_SAFE_CALL(AMGX_matrix_destroy(A_));
458 AMGX_SAFE_CALL(AMGX_resources_destroy(Resources_));
459 AMGX_SAFE_CALL(AMGX_config_destroy(Config_));
489 AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY);
495 AMGX_solver_get_iterations_number(Solver_, &it);
500 AMGX_SOLVE_STATUS status;
501 AMGX_solver_get_status(Solver_, &status);
509 AMGX_matrix_handle
A_;
510 AMGX_vector_handle
X_;
511 AMGX_vector_handle
Y_;
526 #endif // HAVE_MUELU_AMGX
527 #endif // MUELU_AMGXOPERATOR_DECL_HPP
RCP< const Map > domainMap_
const std::string & name() const
virtual ~AMGXOperator()
Destructor.
ConstIterator end() const
MueLu::DefaultLocalOrdinal LocalOrdinal
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &inA, Teuchos::ParameterList ¶mListIn)
T & get(const std::string &name, T def_value)
AMGX_config_handle Config_
size_t getLocalNumElements() const
void printMaps(Teuchos::RCP< const Teuchos::Comm< int > > &comm, const std::vector< std::vector< int > > &vec, const std::vector< int > &perm, const int *nbrs, const Map &map, const std::string &label)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
AMGX_solver_handle Solver_
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
std::vector< int > muelu2amgx_
virtual ~AMGXOperator()
Destructor.
RCP< const Map > rangeMap_
RCP< Teuchos::Time > solverTimer_
static RCP< Time > getNewTimer(const std::string &name)
Tpetra::Map< LO, GO, NO > Map
bool isParameter(const std::string &name) const
void start(bool reset=false)
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X.
params_t::ConstIterator ConstIterator
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
ConstIterator begin() const
std::string filterValueToString(const Teuchos::ParameterEntry &entry)
const ParameterEntry & entry(ConstIterator i) const
any & getAny(bool activeQry=true)
Teuchos::RCP< const Map > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
ValueType get(const KeyType key)
void add(const KeyType key, const ValueType value)
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &InA, Teuchos::ParameterList ¶mListIn)
Constructor.
AMGX_resources_handle Resources_
Tpetra::Map< LO, GO, NO > Map
Wraps an existing MueLu::Hierarchy as a Tpetra::Operator.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
Exception throws to report errors in the internal logical of the program.
RCP< Teuchos::Time > vectorTimer1_
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
RCP< Teuchos::Time > vectorTimer2_
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
AMGX_SOLVE_STATUS getStatus()
Adapter for AmgX library from Nvidia.