Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Util.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
18 #ifndef AMESOS2_UTIL_HPP
19 #define AMESOS2_UTIL_HPP
20 
21 #include "Amesos2_config.h"
22 
23 #include "Teuchos_RCP.hpp"
24 #include "Teuchos_BLAS_types.hpp"
25 #include "Teuchos_Array.hpp"
26 #include "Teuchos_ArrayView.hpp"
27 #include "Teuchos_FancyOStream.hpp"
28 
29 #include <Tpetra_Map.hpp>
30 #include <Tpetra_DistObject_decl.hpp>
31 #include <Tpetra_ComputeGatherMap.hpp> // added for gather map... where is the best place??
32 
33 #include "Amesos2_TypeDecl.hpp"
34 #include "Amesos2_Meta.hpp"
36 
37 #ifdef HAVE_AMESOS2_EPETRA
38 #include <Epetra_Map.h>
39 #endif
40 
41 #ifdef HAVE_AMESOS2_METIS
42 #include "metis.h" // to discuss, remove from header?
43 #endif
44 
45 namespace Amesos2 {
46 
47  namespace Util {
48 
55  using Teuchos::RCP;
56  using Teuchos::ArrayView;
57 
74  template <typename LO, typename GO, typename GS, typename Node>
75  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
76  getGatherMap( const Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > &map );
77 
78 
79  template <typename LO, typename GO, typename GS, typename Node>
80  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
81  getDistributionMap(EDistribution distribution,
82  GS num_global_elements,
83  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
84  GO indexBase = 0,
85  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >& map = Teuchos::null);
86 
87 
88 #ifdef HAVE_AMESOS2_EPETRA
89 
95  template <typename LO, typename GO, typename GS, typename Node>
96  RCP<Tpetra::Map<LO,GO,Node> >
97  epetra_map_to_tpetra_map(const Epetra_BlockMap& map);
98 
104  template <typename LO, typename GO, typename GS, typename Node>
105  RCP<Epetra_Map>
106  tpetra_map_to_epetra_map(const Tpetra::Map<LO,GO,Node>& map);
107 
113  const RCP<const Teuchos::Comm<int> > to_teuchos_comm(RCP<const Epetra_Comm> c);
114 
120  const RCP<const Epetra_Comm> to_epetra_comm(RCP<const Teuchos::Comm<int> > c);
121 #endif // HAVE_AMESOS2_EPETRA
122 
128  template <typename Scalar,
129  typename GlobalOrdinal,
130  typename GlobalSizeT>
131  void transpose(ArrayView<Scalar> vals,
132  ArrayView<GlobalOrdinal> indices,
133  ArrayView<GlobalSizeT> ptr,
134  ArrayView<Scalar> trans_vals,
135  ArrayView<GlobalOrdinal> trans_indices,
136  ArrayView<GlobalSizeT> trans_ptr);
137 
151  template <typename Scalar1, typename Scalar2>
152  void scale(ArrayView<Scalar1> vals, size_t l,
153  size_t ld, ArrayView<Scalar2> s);
154 
173  template <typename Scalar1, typename Scalar2, class BinaryOp>
174  void scale(ArrayView<Scalar1> vals, size_t l,
175  size_t ld, ArrayView<Scalar2> s, BinaryOp binary_op);
176 
177 
179  void printLine( Teuchos::FancyOStream &out );
180 
181  // Helper function used to convert Kokkos::complex pointer
182  // to std::complex pointer; needed for optimized code path
183  // when retrieving the CRS raw pointers
184  template < class T0, class T1 >
185  struct getStdCplxType
186  {
187  using common_type = typename std::common_type<T0,T1>::type;
188  using type = common_type;
189  };
190 
191  template < class T0, class T1 >
192  struct getStdCplxType< T0, T1* >
193  {
194  using common_type = typename std::common_type<T0,T1>::type;
195  using type = common_type;
196  };
197 
198 #if defined(HAVE_TEUCHOS_COMPLEX) && defined(HAVE_AMESOS2_KOKKOS)
199  template < class T0 >
200  struct getStdCplxType< T0, Kokkos::complex<T0>* >
201  {
202  using type = std::complex<T0>;
203  };
204 
205  template < class T0 , class T1 >
206  struct getStdCplxType< T0, Kokkos::complex<T1>* >
207  {
208  using common_type = typename std::common_type<T0,T1>::type;
209  using type = std::complex<common_type>;
210  };
211 #endif
212 
214  // Matrix/MultiVector Utilities //
216 
217 
218 
232  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
234  {
235  static void do_get(const Teuchos::Ptr<const M> mat,
236  KV_S& nzvals,
237  KV_GO& indices,
238  KV_GS& pointers,
239  typename KV_GS::value_type& nnz,
240  const Teuchos::Ptr<
241  const Tpetra::Map<typename M::local_ordinal_t,
242  typename M::global_ordinal_t,
243  typename M::node_t> > map,
244  EDistribution distribution,
245  EStorage_Ordering ordering)
246  {
247  Op::template apply_kokkos_view<KV_S, KV_GO, KV_GS>(mat, nzvals,
248  indices, pointers, nnz, map, distribution, ordering);
249  }
250  };
251 
252  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
253  struct diff_gs_helper_kokkos_view
254  {
255  static void do_get(const Teuchos::Ptr<const M> mat,
256  KV_S& nzvals,
257  KV_GO& indices,
258  KV_GS& pointers,
259  typename KV_GS::value_type& nnz,
260  const Teuchos::Ptr<
261  const Tpetra::Map<typename M::local_ordinal_t,
262  typename M::global_ordinal_t,
263  typename M::node_t> > map,
264  EDistribution distribution,
265  EStorage_Ordering ordering)
266  {
267  typedef typename M::global_size_t mat_gs_t;
268  typedef typename Kokkos::View<mat_gs_t*, Kokkos::HostSpace> KV_TMP;
269  size_t i, size = pointers.extent(0);
270  KV_TMP pointers_tmp(Kokkos::ViewAllocateWithoutInitializing("pointers_tmp"), size);
271 
272  mat_gs_t nnz_tmp = 0;
273  Op::template apply_kokkos_view<KV_S, KV_GO, KV_TMP>(mat, nzvals,
274  indices, pointers_tmp, nnz_tmp, Teuchos::ptrInArg(*map), distribution, ordering);
275  nnz = Teuchos::as<typename KV_GS::value_type>(nnz_tmp);
276 
277  typedef typename KV_GS::value_type view_gs_t;
278  for (i = 0; i < size; ++i){
279  pointers(i) = Teuchos::as<view_gs_t>(pointers_tmp(i));
280  }
281  nnz = Teuchos::as<view_gs_t>(nnz_tmp);
282  }
283  };
284 
285  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
286  struct same_go_helper_kokkos_view
287  {
288  static void do_get(const Teuchos::Ptr<const M> mat,
289  KV_S& nzvals,
290  KV_GO& indices,
291  KV_GS& pointers,
292  typename KV_GS::value_type& nnz,
293  const Teuchos::Ptr<
294  const Tpetra::Map<typename M::local_ordinal_t,
295  typename M::global_ordinal_t,
296  typename M::node_t> > map,
297  EDistribution distribution,
298  EStorage_Ordering ordering)
299  {
300  typedef typename M::global_size_t mat_gs_t;
301  typedef typename KV_GS::value_type view_gs_t;
302  std::conditional_t<std::is_same_v<view_gs_t,mat_gs_t>,
303  same_gs_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op>,
304  diff_gs_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op> >::do_get(mat, nzvals, indices,
305  pointers, nnz, map,
306  distribution, ordering);
307  }
308  };
309 
310  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
311  struct diff_go_helper_kokkos_view
312  {
313  static void do_get(const Teuchos::Ptr<const M> mat,
314  KV_S& nzvals,
315  KV_GO& indices,
316  KV_GS& pointers,
317  typename KV_GS::value_type& nnz,
318  const Teuchos::Ptr<
319  const Tpetra::Map<typename M::local_ordinal_t,
320  typename M::global_ordinal_t,
321  typename M::node_t> > map,
322  EDistribution distribution,
323  EStorage_Ordering ordering)
324  {
325  typedef typename M::global_ordinal_t mat_go_t;
326  typedef typename M::global_size_t mat_gs_t;
327  typedef typename Kokkos::View<mat_go_t*, Kokkos::HostSpace> KV_TMP;
328  size_t i, size = indices.extent(0);
329  KV_TMP indices_tmp(Kokkos::ViewAllocateWithoutInitializing("indices_tmp"), size);
330 
331  typedef typename KV_GO::value_type view_go_t;
332  typedef typename KV_GS::value_type view_gs_t;
333  std::conditional_t<std::is_same_v<view_gs_t,mat_gs_t>,
334  same_gs_helper_kokkos_view<M, KV_S, KV_TMP, KV_GS, Op>,
335  diff_gs_helper_kokkos_view<M, KV_S, KV_TMP, KV_GS, Op> >::do_get(mat, nzvals, indices_tmp,
336  pointers, nnz, map,
337  distribution, ordering);
338  for (i = 0; i < size; ++i){
339  indices(i) = Teuchos::as<view_go_t>(indices_tmp(i));
340  }
341  }
342  };
343 
344  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
345  struct same_scalar_helper_kokkos_view
346  {
347  static void do_get(const Teuchos::Ptr<const M> mat,
348  KV_S& nzvals,
349  KV_GO& indices,
350  KV_GS& pointers,
351  typename KV_GS::value_type& nnz,
352  const Teuchos::Ptr<
353  const Tpetra::Map<typename M::local_ordinal_t,
354  typename M::global_ordinal_t,
355  typename M::node_t> > map,
356  EDistribution distribution,
357  EStorage_Ordering ordering)
358  {
359  typedef typename M::global_ordinal_t mat_go_t;
360  typedef typename KV_GO::value_type view_go_t;
361  std::conditional_t<std::is_same_v<view_go_t, mat_go_t>,
362  same_go_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op>,
363  diff_go_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op> >::do_get(mat, nzvals, indices,
364  pointers, nnz, map,
365  distribution, ordering);
366  }
367  };
368 
369  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
370  struct diff_scalar_helper_kokkos_view
371  {
372  static void do_get(const Teuchos::Ptr<const M> mat,
373  KV_S& nzvals,
374  KV_GO& indices,
375  KV_GS& pointers,
376  typename KV_GS::value_type& nnz,
377  const Teuchos::Ptr<
378  const Tpetra::Map<typename M::local_ordinal_t,
379  typename M::global_ordinal_t,
380  typename M::node_t> > map,
381  EDistribution distribution,
382  EStorage_Ordering ordering)
383  {
384  typedef typename M::global_ordinal_t mat_go_t;
385  typedef typename Kokkos::ArithTraits<typename M::scalar_t>::val_type mat_scalar_t;
386  typedef typename Kokkos::View<mat_scalar_t*, Kokkos::HostSpace> KV_TMP;
387  size_t i, size = nzvals.extent(0);
388  KV_TMP nzvals_tmp(Kokkos::ViewAllocateWithoutInitializing("nzvals_tmp"), size);
389 
390  typedef typename KV_S::value_type view_scalar_t;
391  typedef typename KV_GO::value_type view_go_t;
392  std::conditional_t<std::is_same_v<view_go_t, mat_go_t>,
393  same_go_helper_kokkos_view<M, KV_TMP, KV_GO, KV_GS, Op>,
394  diff_go_helper_kokkos_view<M, KV_TMP, KV_GO, KV_GS, Op> >::do_get(mat, nzvals_tmp, indices,
395  pointers, nnz, map,
396  distribution, ordering);
397 
398  for (i = 0; i < size; ++i){
399  nzvals(i) = Teuchos::as<view_scalar_t>(nzvals_tmp(i));
400  }
401  }
402  };
403 
404 
405  template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS, class Op>
406  struct get_cxs_helper_kokkos_view
407  {
408  static void do_get(const Teuchos::Ptr<const Matrix> mat,
409  KV_S& nzvals,
410  KV_GO& indices,
411  KV_GS& pointers,
412  typename KV_GS::value_type& nnz,
413  EDistribution distribution,
414  EStorage_Ordering ordering=ARBITRARY,
415  typename KV_GO::value_type indexBase = 0)
416  {
417  typedef typename Matrix::local_ordinal_t lo_t;
418  typedef typename Matrix::global_ordinal_t go_t;
419  typedef typename Matrix::global_size_t gs_t;
420  typedef typename Matrix::node_t node_t;
421 
422  const Teuchos::RCP<const Tpetra::Map<lo_t,go_t,node_t> > map
423  = getDistributionMap<lo_t,go_t,gs_t,node_t>(distribution,
424  Op::get_dimension(mat),
425  mat->getComm(),
426  indexBase,
427  Op::getMapFromMatrix(mat) //getMap must be the map returned, NOT rowmap or colmap
428  );
429  do_get(mat, nzvals, indices, pointers, nnz, Teuchos::ptrInArg(*map), distribution, ordering);
430  }
431 
436  static void do_get(const Teuchos::Ptr<const Matrix> mat,
437  KV_S& nzvals,
438  KV_GO& indices,
439  KV_GS& pointers,
440  typename KV_GS::value_type& nnz,
441  EDistribution distribution, // Does this one need a distribution argument??
442  EStorage_Ordering ordering=ARBITRARY)
443  {
444  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
445  typename Matrix::global_ordinal_t,
446  typename Matrix::node_t> > map
447  = Op::getMap(mat);
448  do_get(mat, nzvals, indices, pointers, nnz, Teuchos::ptrInArg(*map), distribution, ordering);
449  }
450 
455  static void do_get(const Teuchos::Ptr<const Matrix> mat,
456  KV_S& nzvals,
457  KV_GO& indices,
458  KV_GS& pointers,
459  typename KV_GS::value_type& nnz,
460  const Teuchos::Ptr<
461  const Tpetra::Map<typename Matrix::local_ordinal_t,
462  typename Matrix::global_ordinal_t,
463  typename Matrix::node_t> > map,
464  EDistribution distribution,
465  EStorage_Ordering ordering=ARBITRARY)
466  {
467  typedef typename Matrix::scalar_t mat_scalar;
468  typedef typename KV_S::value_type view_scalar_t;
469 
470  std::conditional_t<std::is_same_v<mat_scalar,view_scalar_t>,
471  same_scalar_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,Op>,
472  diff_scalar_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,Op> >::do_get(mat,
473  nzvals, indices,
474  pointers, nnz,
475  map,
476  distribution, ordering);
477  }
478  };
479 
480 #ifndef DOXYGEN_SHOULD_SKIP_THIS
481  /*
482  * These two function-like classes are meant to be used as the \c
483  * Op template parameter for the \c get_cxs_helper template class.
484  */
485  template<class Matrix>
486  struct get_ccs_func
487  {
488  template<typename KV_S, typename KV_GO, typename KV_GS>
489  static void apply_kokkos_view(const Teuchos::Ptr<const Matrix> mat,
490  KV_S& nzvals,
491  KV_GO& rowind,
492  KV_GS& colptr,
493  typename Matrix::global_size_t& nnz,
494  const Teuchos::Ptr<
495  const Tpetra::Map<typename Matrix::local_ordinal_t,
496  typename Matrix::global_ordinal_t,
497  typename Matrix::node_t> > map,
498  EDistribution distribution,
499  EStorage_Ordering ordering)
500  {
501  mat->getCcs_kokkos_view(nzvals, rowind, colptr, nnz, map, ordering, distribution);
502  }
503 
504  static
505  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
506  typename Matrix::global_ordinal_t,
507  typename Matrix::node_t> >
508  getMapFromMatrix(const Teuchos::Ptr<const Matrix> mat)
509  {
510  return mat->getMap(); // returns Teuchos::null if mat is Epetra_CrsMatrix
511  }
512 
513  static
514  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
515  typename Matrix::global_ordinal_t,
516  typename Matrix::node_t> >
517  getMap(const Teuchos::Ptr<const Matrix> mat)
518  {
519  return mat->getColMap();
520  }
521 
522  static
523  typename Matrix::global_size_t
524  get_dimension(const Teuchos::Ptr<const Matrix> mat)
525  {
526  return mat->getGlobalNumCols();
527  }
528  };
529 
530  template<class Matrix>
531  struct get_crs_func
532  {
533  template<typename KV_S, typename KV_GO, typename KV_GS>
534  static void apply_kokkos_view(const Teuchos::Ptr<const Matrix> mat,
535  KV_S& nzvals,
536  KV_GO& colind,
537  KV_GS& rowptr,
538  typename Matrix::global_size_t& nnz,
539  const Teuchos::Ptr<
540  const Tpetra::Map<typename Matrix::local_ordinal_t,
541  typename Matrix::global_ordinal_t,
542  typename Matrix::node_t> > map,
543  EDistribution distribution,
544  EStorage_Ordering ordering)
545  {
546  mat->getCrs_kokkos_view(nzvals, colind, rowptr, nnz, map, ordering, distribution);
547  }
548 
549  static
550  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
551  typename Matrix::global_ordinal_t,
552  typename Matrix::node_t> >
553  getMapFromMatrix(const Teuchos::Ptr<const Matrix> mat)
554  {
555  return mat->getMap(); // returns Teuchos::null if mat is Epetra_CrsMatrix
556  }
557 
558  static
559  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
560  typename Matrix::global_ordinal_t,
561  typename Matrix::node_t> >
562  getMap(const Teuchos::Ptr<const Matrix> mat)
563  {
564  return mat->getRowMap();
565  }
566 
567  static
568  typename Matrix::global_size_t
569  get_dimension(const Teuchos::Ptr<const Matrix> mat)
570  {
571  return mat->getGlobalNumRows();
572  }
573  };
574 #endif // DOXYGEN_SHOULD_SKIP_THIS
575 
613  template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS>
614  struct get_ccs_helper_kokkos_view : get_cxs_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,get_ccs_func<Matrix> >
615  {};
616 
624  template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS>
625  struct get_crs_helper_kokkos_view : get_cxs_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,get_crs_func<Matrix> >
626  {};
627  /* End Matrix/MultiVector Utilities */
628 
629 
631  // Definitions //
633 
634 
635  template <typename LO, typename GO, typename GS, typename Node>
636  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
637  getGatherMap( const Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > &map )
638  {
639  //RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream( Teuchos::null ); // may need to pass an osstream to computeGatherMap for debugging cases...
640  Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > gather_map = Tpetra::Details::computeGatherMap(map, Teuchos::null);
641  return gather_map;
642  }
643 
644 
645  template <typename LO, typename GO, typename GS, typename Node>
646  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
647  getDistributionMap(EDistribution distribution,
648  GS num_global_elements,
649  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
650  GO indexBase,
651  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >& map)
652  {
653  // TODO: Need to add indexBase to cases other than ROOTED
654  // We do not support these maps in any solver now.
655  switch( distribution ){
656  case DISTRIBUTED:
657  case DISTRIBUTED_NO_OVERLAP:
658  return Tpetra::createUniformContigMapWithNode<LO,GO, Node>(num_global_elements, comm);
659  case GLOBALLY_REPLICATED:
660  return Tpetra::createLocalMapWithNode<LO,GO, Node>(num_global_elements, comm);
661  case ROOTED:
662  {
663  int rank = Teuchos::rank(*comm);
664  size_t my_num_elems = Teuchos::OrdinalTraits<size_t>::zero();
665  if( rank == 0 ) { my_num_elems = num_global_elements; }
666 
667  return rcp(new Tpetra::Map<LO,GO, Node>(num_global_elements,
668  my_num_elems, indexBase, comm));
669  }
670  case CONTIGUOUS_AND_ROOTED:
671  {
672  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> > gathermap
673  = getGatherMap<LO,GO,GS,Node>( map ); //getMap must be the map returned, NOT rowmap or colmap
674  return gathermap;
675  }
676  default:
677  TEUCHOS_TEST_FOR_EXCEPTION( true,
678  std::logic_error,
679  "Control should never reach this point. "
680  "Please contact the Amesos2 developers." );
681  }
682  }
683 
684 
685 #ifdef HAVE_AMESOS2_EPETRA
686 
687  //#pragma message "include 3"
688  //#include <Epetra_Map.h>
689 
690  template <typename LO, typename GO, typename GS, typename Node>
691  Teuchos::RCP<Tpetra::Map<LO,GO,Node> >
692  epetra_map_to_tpetra_map(const Epetra_BlockMap& map)
693  {
694  using Teuchos::as;
695  using Teuchos::rcp;
696 
697  int num_my_elements = map.NumMyElements();
698  Teuchos::Array<int> my_global_elements(num_my_elements);
699  map.MyGlobalElements(my_global_elements.getRawPtr());
700 
701  Teuchos::Array<GO> my_gbl_inds_buf;
702  Teuchos::ArrayView<GO> my_gbl_inds;
703  if (! std::is_same<int, GO>::value) {
704  my_gbl_inds_buf.resize (num_my_elements);
705  my_gbl_inds = my_gbl_inds_buf ();
706  for (int k = 0; k < num_my_elements; ++k) {
707  my_gbl_inds[k] = static_cast<GO> (my_global_elements[k]);
708  }
709  }
710  else {
711  using Teuchos::av_reinterpret_cast;
712  my_gbl_inds = av_reinterpret_cast<GO> (my_global_elements ());
713  }
714 
715  typedef Tpetra::Map<LO,GO,Node> map_t;
716  RCP<map_t> tmap = rcp(new map_t(Teuchos::OrdinalTraits<GS>::invalid(),
717  my_gbl_inds(),
718  as<GO>(map.IndexBase()),
719  to_teuchos_comm(Teuchos::rcpFromRef(map.Comm()))));
720  return tmap;
721  }
722 
723  template <typename LO, typename GO, typename GS, typename Node>
724  Teuchos::RCP<Epetra_Map>
725  tpetra_map_to_epetra_map(const Tpetra::Map<LO,GO,Node>& map)
726  {
727  using Teuchos::as;
728 
729  Teuchos::Array<GO> elements_tmp;
730  elements_tmp = map.getLocalElementList();
731  int num_my_elements = elements_tmp.size();
732  Teuchos::Array<int> my_global_elements(num_my_elements);
733  for (int i = 0; i < num_my_elements; ++i){
734  my_global_elements[i] = as<int>(elements_tmp[i]);
735  }
736 
737  using Teuchos::rcp;
738  RCP<Epetra_Map> emap = rcp(new Epetra_Map(-1,
739  num_my_elements,
740  my_global_elements.getRawPtr(),
741  as<GO>(map.getIndexBase()),
742  *to_epetra_comm(map.getComm())));
743  return emap;
744  }
745 #endif // HAVE_AMESOS2_EPETRA
746 
747  template <typename Scalar,
748  typename GlobalOrdinal,
749  typename GlobalSizeT>
750  void transpose(Teuchos::ArrayView<Scalar> vals,
751  Teuchos::ArrayView<GlobalOrdinal> indices,
752  Teuchos::ArrayView<GlobalSizeT> ptr,
753  Teuchos::ArrayView<Scalar> trans_vals,
754  Teuchos::ArrayView<GlobalOrdinal> trans_indices,
755  Teuchos::ArrayView<GlobalSizeT> trans_ptr)
756  {
757  /* We have a compressed-row storage format of this matrix. We
758  * transform this into a compressed-column format using a
759  * distribution-counting sort algorithm, which is described by
760  * D.E. Knuth in TAOCP Vol 3, 2nd ed pg 78.
761  */
762 
763 #ifdef HAVE_AMESOS2_DEBUG
764  typename Teuchos::ArrayView<GlobalOrdinal>::iterator ind_it, ind_begin, ind_end;
765  ind_begin = indices.begin();
766  ind_end = indices.end();
767  size_t min_trans_ptr_size = *std::max_element(ind_begin, ind_end) + 1;
768  TEUCHOS_TEST_FOR_EXCEPTION( Teuchos::as<size_t>(trans_ptr.size()) < min_trans_ptr_size,
769  std::invalid_argument,
770  "Transpose pointer size not large enough." );
771  TEUCHOS_TEST_FOR_EXCEPTION( trans_vals.size() < vals.size(),
772  std::invalid_argument,
773  "Transpose values array not large enough." );
774  TEUCHOS_TEST_FOR_EXCEPTION( trans_indices.size() < indices.size(),
775  std::invalid_argument,
776  "Transpose indices array not large enough." );
777 #else
778  typename Teuchos::ArrayView<GlobalOrdinal>::iterator ind_it, ind_end;
779 #endif
780  // Count the number of entries in each column
781  Teuchos::Array<GlobalSizeT> count(trans_ptr.size(), 0);
782  ind_end = indices.end();
783  for( ind_it = indices.begin(); ind_it != ind_end; ++ind_it ){
784  ++(count[(*ind_it) + 1]);
785  }
786  // Accumulate
787  typename Teuchos::Array<GlobalSizeT>::iterator cnt_it, cnt_end;
788  cnt_end = count.end();
789  for( cnt_it = count.begin() + 1; cnt_it != cnt_end; ++cnt_it ){
790  *cnt_it = *cnt_it + *(cnt_it - 1);
791  }
792  // This becomes the array of column pointers
793  trans_ptr.assign(count);
794 
795  /* Move the nonzero values into their final place in nzval, based on the
796  * counts found previously.
797  *
798  * This sequence deviates from Knuth's algorithm a bit, following more
799  * closely the description presented in Gustavson, Fred G. "Two Fast
800  * Algorithms for Sparse Matrices: Multiplication and Permuted
801  * Transposition" ACM Trans. Math. Softw. volume 4, number 3, 1978, pages
802  * 250--269, http://doi.acm.org/10.1145/355791.355796.
803  *
804  * The output indices end up in sorted order
805  */
806 
807  GlobalSizeT size = ptr.size();
808  for( GlobalSizeT i = 0; i < size - 1; ++i ){
809  GlobalOrdinal u = ptr[i];
810  GlobalOrdinal v = ptr[i + 1];
811  for( GlobalOrdinal j = u; j < v; ++j ){
812  GlobalOrdinal k = count[indices[j]];
813  trans_vals[k] = vals[j];
814  trans_indices[k] = i;
815  ++(count[indices[j]]);
816  }
817  }
818  }
819 
820 
821  template <typename Scalar1, typename Scalar2>
822  void
823  scale(Teuchos::ArrayView<Scalar1> vals, size_t l,
824  size_t ld, Teuchos::ArrayView<Scalar2> s)
825  {
826  size_t vals_size = vals.size();
827 #ifdef HAVE_AMESOS2_DEBUG
828  size_t s_size = s.size();
829  TEUCHOS_TEST_FOR_EXCEPTION( s_size < l,
830  std::invalid_argument,
831  "Scale vector must have length at least that of the vector" );
832 #endif
833  size_t i, s_i;
834  for( i = 0, s_i = 0; i < vals_size; ++i, ++s_i ){
835  if( s_i == l ){
836  // bring i to the next multiple of ld
837  i += ld - s_i;
838  s_i = 0;
839  }
840  vals[i] *= s[s_i];
841  }
842  }
843 
844  template <typename Scalar1, typename Scalar2, class BinaryOp>
845  void
846  scale(Teuchos::ArrayView<Scalar1> vals, size_t l,
847  size_t ld, Teuchos::ArrayView<Scalar2> s,
848  BinaryOp binary_op)
849  {
850  size_t vals_size = vals.size();
851 #ifdef HAVE_AMESOS2_DEBUG
852  size_t s_size = s.size();
853  TEUCHOS_TEST_FOR_EXCEPTION( s_size < l,
854  std::invalid_argument,
855  "Scale vector must have length at least that of the vector" );
856 #endif
857  size_t i, s_i;
858  for( i = 0, s_i = 0; i < vals_size; ++i, ++s_i ){
859  if( s_i == l ){
860  // bring i to the next multiple of ld
861  i += ld - s_i;
862  s_i = 0;
863  }
864  vals[i] = binary_op(vals[i], s[s_i]);
865  }
866  }
867 
868  template<class row_ptr_view_t, class cols_view_t, class per_view_t>
869  void
870  reorder(row_ptr_view_t & row_ptr, cols_view_t & cols,
871  per_view_t & perm, per_view_t & peri, size_t & nnz,
872  bool permute_matrix)
873  {
874  #ifndef HAVE_AMESOS2_METIS
875  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
876  "Cannot reorder for cuSolver because no METIS is available.");
877  #else
878  typedef typename cols_view_t::value_type ordinal_type;
879  typedef typename row_ptr_view_t::value_type size_type;
880 
881  // begin on host where we'll run metis reorder
882  auto host_row_ptr = Kokkos::create_mirror_view(row_ptr);
883  auto host_cols = Kokkos::create_mirror_view(cols);
884  Kokkos::deep_copy(host_row_ptr, row_ptr);
885  Kokkos::deep_copy(host_cols, cols);
886 
887  // strip out the diagonals - metis will just crash with them included.
888  // make space for the stripped version
889  typedef Kokkos::View<idx_t*, Kokkos::HostSpace> host_metis_array;
890  const ordinal_type size = row_ptr.size() - 1;
891  size_type max_nnz = host_row_ptr(size);
892  host_metis_array host_strip_diag_row_ptr(
893  Kokkos::ViewAllocateWithoutInitializing("host_strip_diag_row_ptr"),
894  size+1);
895  host_metis_array host_strip_diag_cols(
896  Kokkos::ViewAllocateWithoutInitializing("host_strip_diag_cols"),
897  max_nnz);
898 
899  size_type new_nnz = 0;
900  for(ordinal_type i = 0; i < size; ++i) {
901  host_strip_diag_row_ptr(i) = new_nnz;
902  for(size_type j = host_row_ptr(i); j < host_row_ptr(i+1); ++j) {
903  if (i != host_cols(j)) {
904  host_strip_diag_cols(new_nnz++) = host_cols(j);
905  }
906  }
907  }
908  host_strip_diag_row_ptr(size) = new_nnz;
909 
910  // we'll get original permutations on host
911  host_metis_array host_perm(
912  Kokkos::ViewAllocateWithoutInitializing("host_perm"), size);
913  host_metis_array host_peri(
914  Kokkos::ViewAllocateWithoutInitializing("host_peri"), size);
915 
916  // If we want to remove metis.h included in this header we can move this
917  // to the cpp, but we need to decide how to handle the idx_t declaration.
918  idx_t metis_size = size;
919  int err = METIS_NodeND(&metis_size, host_strip_diag_row_ptr.data(), host_strip_diag_cols.data(),
920  NULL, NULL, host_perm.data(), host_peri.data());
921 
922  TEUCHOS_TEST_FOR_EXCEPTION(err != METIS_OK, std::runtime_error,
923  "METIS_NodeND failed to sort matrix.");
924 
925  // put the permutations on our saved device ptrs
926  // these will be used to permute x and b when we solve
927  typedef typename cols_view_t::execution_space exec_space_t;
928  auto device_perm = Kokkos::create_mirror_view(exec_space_t(), host_perm);
929  auto device_peri = Kokkos::create_mirror_view(exec_space_t(), host_peri);
930  deep_copy(device_perm, host_perm);
931  deep_copy(device_peri, host_peri);
932 
933  // also set the permutation which may need to convert the type from
934  // metis to the native ordinal_type
935  deep_copy_or_assign_view(perm, device_perm);
936  deep_copy_or_assign_view(peri, device_peri);
937 
938  if (permute_matrix) {
939  // we'll permute matrix on device to a new set of arrays
940  row_ptr_view_t new_row_ptr(
941  Kokkos::ViewAllocateWithoutInitializing("new_row_ptr"), row_ptr.size());
942  cols_view_t new_cols(
943  Kokkos::ViewAllocateWithoutInitializing("new_cols"), cols.size() - new_nnz/2);
944 
945  // permute row indices
946  Kokkos::RangePolicy<exec_space_t> policy_row(0, row_ptr.size());
947  Kokkos::parallel_scan(policy_row, KOKKOS_LAMBDA(
948  ordinal_type i, size_type & update, const bool &final) {
949  if(final) {
950  new_row_ptr(i) = update;
951  }
952  if(i < size) {
953  ordinal_type count = 0;
954  const ordinal_type row = device_perm(i);
955  for(ordinal_type k = row_ptr(row); k < row_ptr(row + 1); ++k) {
956  const ordinal_type j = device_peri(cols(k));
957  count += (i >= j);
958  }
959  update += count;
960  }
961  });
962 
963  // permute col indices
964  Kokkos::RangePolicy<exec_space_t> policy_col(0, size);
965  Kokkos::parallel_for(policy_col, KOKKOS_LAMBDA(ordinal_type i) {
966  const ordinal_type kbeg = new_row_ptr(i);
967  const ordinal_type row = device_perm(i);
968  const ordinal_type col_beg = row_ptr(row);
969  const ordinal_type col_end = row_ptr(row + 1);
970  const ordinal_type nk = col_end - col_beg;
971  for(ordinal_type k = 0, t = 0; k < nk; ++k) {
972  const ordinal_type tk = kbeg + t;
973  const ordinal_type sk = col_beg + k;
974  const ordinal_type j = device_peri(cols(sk));
975  if(i >= j) {
976  new_cols(tk) = j;
977  ++t;
978  }
979  }
980  });
981 
982  // finally set the inputs to the new sorted arrays
983  row_ptr = new_row_ptr;
984  cols = new_cols;
985  }
986 
987  nnz = new_nnz;
988  #endif // HAVE_AMESOS2_METIS
989  }
990 
991  template<class values_view_t, class row_ptr_view_t,
992  class cols_view_t, class per_view_t>
993  void
994  reorder_values(values_view_t & values, const row_ptr_view_t & orig_row_ptr,
995  const row_ptr_view_t & new_row_ptr,
996  const cols_view_t & orig_cols, const per_view_t & perm, const per_view_t & peri,
997  size_t nnz)
998  {
999  typedef typename cols_view_t::value_type ordinal_type;
1000  typedef typename cols_view_t::execution_space exec_space_t;
1001 
1002  auto device_perm = Kokkos::create_mirror_view(exec_space_t(), perm);
1003  auto device_peri = Kokkos::create_mirror_view(exec_space_t(), peri);
1004  deep_copy(device_perm, perm);
1005  deep_copy(device_peri, peri);
1006 
1007  const ordinal_type size = orig_row_ptr.size() - 1;
1008 
1009  auto host_orig_row_ptr = Kokkos::create_mirror_view(orig_row_ptr);
1010  auto new_nnz = host_orig_row_ptr(size); // TODO: Maybe optimize this by caching
1011 
1012  values_view_t new_values(
1013  Kokkos::ViewAllocateWithoutInitializing("new_values"), values.size() - new_nnz/2);
1014 
1015  // permute col indices
1016  Kokkos::RangePolicy<exec_space_t> policy_col(0, size);
1017  Kokkos::parallel_for(policy_col, KOKKOS_LAMBDA(ordinal_type i) {
1018  const ordinal_type kbeg = new_row_ptr(i);
1019  const ordinal_type row = device_perm(i);
1020  const ordinal_type col_beg = orig_row_ptr(row);
1021  const ordinal_type col_end = orig_row_ptr(row + 1);
1022  const ordinal_type nk = col_end - col_beg;
1023  for(ordinal_type k = 0, t = 0; k < nk; ++k) {
1024  const ordinal_type tk = kbeg + t;
1025  const ordinal_type sk = col_beg + k;
1026  const ordinal_type j = device_peri(orig_cols(sk));
1027  if(i >= j) {
1028  new_values(tk) = values(sk);
1029  ++t;
1030  }
1031  }
1032  });
1033 
1034  values = new_values;
1035  }
1036 
1037  template<class array_view_t, class per_view_t>
1038  void
1039  apply_reorder_permutation(const array_view_t & array,
1040  array_view_t & permuted_array, const per_view_t & permutation) {
1041  if(permuted_array.extent(0) != array.extent(0) || permuted_array.extent(1) != array.extent(1)) {
1042  permuted_array = array_view_t(
1043  Kokkos::ViewAllocateWithoutInitializing("permuted_array"),
1044  array.extent(0), array.extent(1));
1045  }
1046  typedef typename array_view_t::execution_space exec_space_t;
1047  Kokkos::RangePolicy<exec_space_t> policy(0, array.extent(0));
1048  Kokkos::parallel_for(policy, KOKKOS_LAMBDA(size_t i) {
1049  for(size_t j = 0; j < array.extent(1); ++j) {
1050  permuted_array(i, j) = array(permutation(i), j);
1051  }
1052  });
1053  }
1054 
1057  } // end namespace Util
1058 
1059 } // end namespace Amesos2
1060 
1061 #endif // #ifndef AMESOS2_UTIL_HPP
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
const int size
Definition: klu2_simple.cpp:50
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
A generic base class for the CRS and CCS helpers.
Definition: Amesos2_Util.hpp:233
Provides some simple meta-programming utilities for Amesos2.
EStorage_Ordering
Definition: Amesos2_TypeDecl.hpp:107
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
Copy or assign views based on memory spaces.
const RCP< const Teuchos::Comm< int > > to_teuchos_comm(RCP< const Epetra_Comm > c)
Transform an Epetra_Comm object into a Teuchos::Comm object.
void printLine(Teuchos::FancyOStream &out)
Prints a line of 70 &quot;-&quot;s on std::cout.
Definition: Amesos2_Util.cpp:85
const RCP< const Epetra_Comm > to_epetra_comm(RCP< const Teuchos::Comm< int > > c)
Transfrom a Teuchos::Comm object into an Epetra_Comm object.
const Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > getGatherMap(const Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > &map)
Gets a Tpetra::Map described by the EDistribution.
Definition: Amesos2_Util.hpp:637
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:625
RCP< Epetra_Map > tpetra_map_to_epetra_map(const Tpetra::Map< LO, GO, Node > &map)
Transform a Tpetra::Map object into an Epetra_Map.
Definition: Amesos2_Util.hpp:725
Enum and other types declarations for Amesos2.
RCP< Tpetra::Map< LO, GO, Node > > epetra_map_to_tpetra_map(const Epetra_BlockMap &map)
Transform an Epetra_Map object into a Tpetra::Map.
Definition: Amesos2_Util.hpp:692
EDistribution
Definition: Amesos2_TypeDecl.hpp:89