Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Util.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
52 #ifndef AMESOS2_UTIL_HPP
53 #define AMESOS2_UTIL_HPP
54 
55 #include "Amesos2_config.h"
56 
57 #include "Teuchos_RCP.hpp"
58 #include "Teuchos_BLAS_types.hpp"
59 #include "Teuchos_Array.hpp"
60 #include "Teuchos_ArrayView.hpp"
61 #include "Teuchos_FancyOStream.hpp"
62 
63 #include <Tpetra_Map.hpp>
64 #include <Tpetra_DistObject_decl.hpp>
65 #include <Tpetra_ComputeGatherMap.hpp> // added for gather map... where is the best place??
66 
67 #include "Amesos2_TypeDecl.hpp"
68 #include "Amesos2_Meta.hpp"
70 
71 #ifdef HAVE_AMESOS2_EPETRA
72 #include <Epetra_Map.h>
73 #endif
74 
75 #ifdef HAVE_AMESOS2_METIS
76 #include "metis.h" // to discuss, remove from header?
77 #endif
78 
79 namespace Amesos2 {
80 
81  namespace Util {
82 
89  using Teuchos::RCP;
90  using Teuchos::ArrayView;
91 
108  template <typename LO, typename GO, typename GS, typename Node>
109  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
110  getGatherMap( const Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > &map );
111 
112 
113  template <typename LO, typename GO, typename GS, typename Node>
114  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
115  getDistributionMap(EDistribution distribution,
116  GS num_global_elements,
117  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
118  GO indexBase = 0,
119  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >& map = Teuchos::null);
120 
121 
122 #ifdef HAVE_AMESOS2_EPETRA
123 
129  template <typename LO, typename GO, typename GS, typename Node>
130  RCP<Tpetra::Map<LO,GO,Node> >
131  epetra_map_to_tpetra_map(const Epetra_BlockMap& map);
132 
138  template <typename LO, typename GO, typename GS, typename Node>
139  RCP<Epetra_Map>
140  tpetra_map_to_epetra_map(const Tpetra::Map<LO,GO,Node>& map);
141 
147  const RCP<const Teuchos::Comm<int> > to_teuchos_comm(RCP<const Epetra_Comm> c);
148 
154  const RCP<const Epetra_Comm> to_epetra_comm(RCP<const Teuchos::Comm<int> > c);
155 #endif // HAVE_AMESOS2_EPETRA
156 
162  template <typename Scalar,
163  typename GlobalOrdinal,
164  typename GlobalSizeT>
165  void transpose(ArrayView<Scalar> vals,
166  ArrayView<GlobalOrdinal> indices,
167  ArrayView<GlobalSizeT> ptr,
168  ArrayView<Scalar> trans_vals,
169  ArrayView<GlobalOrdinal> trans_indices,
170  ArrayView<GlobalSizeT> trans_ptr);
171 
185  template <typename Scalar1, typename Scalar2>
186  void scale(ArrayView<Scalar1> vals, size_t l,
187  size_t ld, ArrayView<Scalar2> s);
188 
207  template <typename Scalar1, typename Scalar2, class BinaryOp>
208  void scale(ArrayView<Scalar1> vals, size_t l,
209  size_t ld, ArrayView<Scalar2> s, BinaryOp binary_op);
210 
211 
213  void printLine( Teuchos::FancyOStream &out );
214 
215  // Helper function used to convert Kokkos::complex pointer
216  // to std::complex pointer; needed for optimized code path
217  // when retrieving the CRS raw pointers
218  template < class T0, class T1 >
219  struct getStdCplxType
220  {
221  using common_type = typename std::common_type<T0,T1>::type;
222  using type = common_type;
223  };
224 
225  template < class T0, class T1 >
226  struct getStdCplxType< T0, T1* >
227  {
228  using common_type = typename std::common_type<T0,T1>::type;
229  using type = common_type;
230  };
231 
232 #if defined(HAVE_TEUCHOS_COMPLEX) && defined(HAVE_AMESOS2_KOKKOS)
233  template < class T0 >
234  struct getStdCplxType< T0, Kokkos::complex<T0>* >
235  {
236  using type = std::complex<T0>;
237  };
238 
239  template < class T0 , class T1 >
240  struct getStdCplxType< T0, Kokkos::complex<T1>* >
241  {
242  using common_type = typename std::common_type<T0,T1>::type;
243  using type = std::complex<common_type>;
244  };
245 #endif
246 
248  // Matrix/MultiVector Utilities //
250 
251 
252 
266  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
268  {
269  static void do_get(const Teuchos::Ptr<const M> mat,
270  KV_S& nzvals,
271  KV_GO& indices,
272  KV_GS& pointers,
273  typename KV_GS::value_type& nnz,
274  const Teuchos::Ptr<
275  const Tpetra::Map<typename M::local_ordinal_t,
276  typename M::global_ordinal_t,
277  typename M::node_t> > map,
278  EDistribution distribution,
279  EStorage_Ordering ordering)
280  {
281  Op::template apply_kokkos_view<KV_S, KV_GO, KV_GS>(mat, nzvals,
282  indices, pointers, nnz, map, distribution, ordering);
283  }
284  };
285 
286  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
287  struct diff_gs_helper_kokkos_view
288  {
289  static void do_get(const Teuchos::Ptr<const M> mat,
290  KV_S& nzvals,
291  KV_GO& indices,
292  KV_GS& pointers,
293  typename KV_GS::value_type& nnz,
294  const Teuchos::Ptr<
295  const Tpetra::Map<typename M::local_ordinal_t,
296  typename M::global_ordinal_t,
297  typename M::node_t> > map,
298  EDistribution distribution,
299  EStorage_Ordering ordering)
300  {
301  typedef typename M::global_size_t mat_gs_t;
302  typedef typename Kokkos::View<mat_gs_t*, Kokkos::HostSpace> KV_TMP;
303  size_t i, size = pointers.extent(0);
304  KV_TMP pointers_tmp(Kokkos::ViewAllocateWithoutInitializing("pointers_tmp"), size);
305 
306  mat_gs_t nnz_tmp = 0;
307  Op::template apply_kokkos_view<KV_S, KV_GO, KV_TMP>(mat, nzvals,
308  indices, pointers_tmp, nnz_tmp, Teuchos::ptrInArg(*map), distribution, ordering);
309  nnz = Teuchos::as<typename KV_GS::value_type>(nnz_tmp);
310 
311  typedef typename KV_GS::value_type view_gs_t;
312  for (i = 0; i < size; ++i){
313  pointers(i) = Teuchos::as<view_gs_t>(pointers_tmp(i));
314  }
315  nnz = Teuchos::as<view_gs_t>(nnz_tmp);
316  }
317  };
318 
319  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
320  struct same_go_helper_kokkos_view
321  {
322  static void do_get(const Teuchos::Ptr<const M> mat,
323  KV_S& nzvals,
324  KV_GO& indices,
325  KV_GS& pointers,
326  typename KV_GS::value_type& nnz,
327  const Teuchos::Ptr<
328  const Tpetra::Map<typename M::local_ordinal_t,
329  typename M::global_ordinal_t,
330  typename M::node_t> > map,
331  EDistribution distribution,
332  EStorage_Ordering ordering)
333  {
334  typedef typename M::global_size_t mat_gs_t;
335  typedef typename KV_GS::value_type view_gs_t;
336  std::conditional_t<std::is_same_v<view_gs_t,mat_gs_t>,
337  same_gs_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op>,
338  diff_gs_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op> >::do_get(mat, nzvals, indices,
339  pointers, nnz, map,
340  distribution, ordering);
341  }
342  };
343 
344  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
345  struct diff_go_helper_kokkos_view
346  {
347  static void do_get(const Teuchos::Ptr<const M> mat,
348  KV_S& nzvals,
349  KV_GO& indices,
350  KV_GS& pointers,
351  typename KV_GS::value_type& nnz,
352  const Teuchos::Ptr<
353  const Tpetra::Map<typename M::local_ordinal_t,
354  typename M::global_ordinal_t,
355  typename M::node_t> > map,
356  EDistribution distribution,
357  EStorage_Ordering ordering)
358  {
359  typedef typename M::global_ordinal_t mat_go_t;
360  typedef typename M::global_size_t mat_gs_t;
361  typedef typename Kokkos::View<mat_go_t*, Kokkos::HostSpace> KV_TMP;
362  size_t i, size = indices.extent(0);
363  KV_TMP indices_tmp(Kokkos::ViewAllocateWithoutInitializing("indices_tmp"), size);
364 
365  typedef typename KV_GO::value_type view_go_t;
366  typedef typename KV_GS::value_type view_gs_t;
367  std::conditional_t<std::is_same_v<view_gs_t,mat_gs_t>,
368  same_gs_helper_kokkos_view<M, KV_S, KV_TMP, KV_GS, Op>,
369  diff_gs_helper_kokkos_view<M, KV_S, KV_TMP, KV_GS, Op> >::do_get(mat, nzvals, indices_tmp,
370  pointers, nnz, map,
371  distribution, ordering);
372  for (i = 0; i < size; ++i){
373  indices(i) = Teuchos::as<view_go_t>(indices_tmp(i));
374  }
375  }
376  };
377 
378  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
379  struct same_scalar_helper_kokkos_view
380  {
381  static void do_get(const Teuchos::Ptr<const M> mat,
382  KV_S& nzvals,
383  KV_GO& indices,
384  KV_GS& pointers,
385  typename KV_GS::value_type& nnz,
386  const Teuchos::Ptr<
387  const Tpetra::Map<typename M::local_ordinal_t,
388  typename M::global_ordinal_t,
389  typename M::node_t> > map,
390  EDistribution distribution,
391  EStorage_Ordering ordering)
392  {
393  typedef typename M::global_ordinal_t mat_go_t;
394  typedef typename KV_GO::value_type view_go_t;
395  std::conditional_t<std::is_same_v<view_go_t, mat_go_t>,
396  same_go_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op>,
397  diff_go_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op> >::do_get(mat, nzvals, indices,
398  pointers, nnz, map,
399  distribution, ordering);
400  }
401  };
402 
403  template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
404  struct diff_scalar_helper_kokkos_view
405  {
406  static void do_get(const Teuchos::Ptr<const M> mat,
407  KV_S& nzvals,
408  KV_GO& indices,
409  KV_GS& pointers,
410  typename KV_GS::value_type& nnz,
411  const Teuchos::Ptr<
412  const Tpetra::Map<typename M::local_ordinal_t,
413  typename M::global_ordinal_t,
414  typename M::node_t> > map,
415  EDistribution distribution,
416  EStorage_Ordering ordering)
417  {
418  typedef typename M::global_ordinal_t mat_go_t;
419  typedef typename Kokkos::ArithTraits<typename M::scalar_t>::val_type mat_scalar_t;
420  typedef typename Kokkos::View<mat_scalar_t*, Kokkos::HostSpace> KV_TMP;
421  size_t i, size = nzvals.extent(0);
422  KV_TMP nzvals_tmp(Kokkos::ViewAllocateWithoutInitializing("nzvals_tmp"), size);
423 
424  typedef typename KV_S::value_type view_scalar_t;
425  typedef typename KV_GO::value_type view_go_t;
426  std::conditional_t<std::is_same_v<view_go_t, mat_go_t>,
427  same_go_helper_kokkos_view<M, KV_TMP, KV_GO, KV_GS, Op>,
428  diff_go_helper_kokkos_view<M, KV_TMP, KV_GO, KV_GS, Op> >::do_get(mat, nzvals_tmp, indices,
429  pointers, nnz, map,
430  distribution, ordering);
431 
432  for (i = 0; i < size; ++i){
433  nzvals(i) = Teuchos::as<view_scalar_t>(nzvals_tmp(i));
434  }
435  }
436  };
437 
438 
439  template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS, class Op>
440  struct get_cxs_helper_kokkos_view
441  {
442  static void do_get(const Teuchos::Ptr<const Matrix> mat,
443  KV_S& nzvals,
444  KV_GO& indices,
445  KV_GS& pointers,
446  typename KV_GS::value_type& nnz,
447  EDistribution distribution,
448  EStorage_Ordering ordering=ARBITRARY,
449  typename KV_GO::value_type indexBase = 0)
450  {
451  typedef typename Matrix::local_ordinal_t lo_t;
452  typedef typename Matrix::global_ordinal_t go_t;
453  typedef typename Matrix::global_size_t gs_t;
454  typedef typename Matrix::node_t node_t;
455 
456  const Teuchos::RCP<const Tpetra::Map<lo_t,go_t,node_t> > map
457  = getDistributionMap<lo_t,go_t,gs_t,node_t>(distribution,
458  Op::get_dimension(mat),
459  mat->getComm(),
460  indexBase,
461  Op::getMapFromMatrix(mat) //getMap must be the map returned, NOT rowmap or colmap
462  );
463  do_get(mat, nzvals, indices, pointers, nnz, Teuchos::ptrInArg(*map), distribution, ordering);
464  }
465 
470  static void do_get(const Teuchos::Ptr<const Matrix> mat,
471  KV_S& nzvals,
472  KV_GO& indices,
473  KV_GS& pointers,
474  typename KV_GS::value_type& nnz,
475  EDistribution distribution, // Does this one need a distribution argument??
476  EStorage_Ordering ordering=ARBITRARY)
477  {
478  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
479  typename Matrix::global_ordinal_t,
480  typename Matrix::node_t> > map
481  = Op::getMap(mat);
482  do_get(mat, nzvals, indices, pointers, nnz, Teuchos::ptrInArg(*map), distribution, ordering);
483  }
484 
489  static void do_get(const Teuchos::Ptr<const Matrix> mat,
490  KV_S& nzvals,
491  KV_GO& indices,
492  KV_GS& pointers,
493  typename KV_GS::value_type& nnz,
494  const Teuchos::Ptr<
495  const Tpetra::Map<typename Matrix::local_ordinal_t,
496  typename Matrix::global_ordinal_t,
497  typename Matrix::node_t> > map,
498  EDistribution distribution,
499  EStorage_Ordering ordering=ARBITRARY)
500  {
501  typedef typename Matrix::scalar_t mat_scalar;
502  typedef typename KV_S::value_type view_scalar_t;
503 
504  std::conditional_t<std::is_same_v<mat_scalar,view_scalar_t>,
505  same_scalar_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,Op>,
506  diff_scalar_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,Op> >::do_get(mat,
507  nzvals, indices,
508  pointers, nnz,
509  map,
510  distribution, ordering);
511  }
512  };
513 
514 #ifndef DOXYGEN_SHOULD_SKIP_THIS
515  /*
516  * These two function-like classes are meant to be used as the \c
517  * Op template parameter for the \c get_cxs_helper template class.
518  */
519  template<class Matrix>
520  struct get_ccs_func
521  {
522  template<typename KV_S, typename KV_GO, typename KV_GS>
523  static void apply_kokkos_view(const Teuchos::Ptr<const Matrix> mat,
524  KV_S& nzvals,
525  KV_GO& rowind,
526  KV_GS& colptr,
527  typename Matrix::global_size_t& nnz,
528  const Teuchos::Ptr<
529  const Tpetra::Map<typename Matrix::local_ordinal_t,
530  typename Matrix::global_ordinal_t,
531  typename Matrix::node_t> > map,
532  EDistribution distribution,
533  EStorage_Ordering ordering)
534  {
535  mat->getCcs_kokkos_view(nzvals, rowind, colptr, nnz, map, ordering, distribution);
536  }
537 
538  static
539  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
540  typename Matrix::global_ordinal_t,
541  typename Matrix::node_t> >
542  getMapFromMatrix(const Teuchos::Ptr<const Matrix> mat)
543  {
544  return mat->getMap(); // returns Teuchos::null if mat is Epetra_CrsMatrix
545  }
546 
547  static
548  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
549  typename Matrix::global_ordinal_t,
550  typename Matrix::node_t> >
551  getMap(const Teuchos::Ptr<const Matrix> mat)
552  {
553  return mat->getColMap();
554  }
555 
556  static
557  typename Matrix::global_size_t
558  get_dimension(const Teuchos::Ptr<const Matrix> mat)
559  {
560  return mat->getGlobalNumCols();
561  }
562  };
563 
564  template<class Matrix>
565  struct get_crs_func
566  {
567  template<typename KV_S, typename KV_GO, typename KV_GS>
568  static void apply_kokkos_view(const Teuchos::Ptr<const Matrix> mat,
569  KV_S& nzvals,
570  KV_GO& colind,
571  KV_GS& rowptr,
572  typename Matrix::global_size_t& nnz,
573  const Teuchos::Ptr<
574  const Tpetra::Map<typename Matrix::local_ordinal_t,
575  typename Matrix::global_ordinal_t,
576  typename Matrix::node_t> > map,
577  EDistribution distribution,
578  EStorage_Ordering ordering)
579  {
580  mat->getCrs_kokkos_view(nzvals, colind, rowptr, nnz, map, ordering, distribution);
581  }
582 
583  static
584  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
585  typename Matrix::global_ordinal_t,
586  typename Matrix::node_t> >
587  getMapFromMatrix(const Teuchos::Ptr<const Matrix> mat)
588  {
589  return mat->getMap(); // returns Teuchos::null if mat is Epetra_CrsMatrix
590  }
591 
592  static
593  const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
594  typename Matrix::global_ordinal_t,
595  typename Matrix::node_t> >
596  getMap(const Teuchos::Ptr<const Matrix> mat)
597  {
598  return mat->getRowMap();
599  }
600 
601  static
602  typename Matrix::global_size_t
603  get_dimension(const Teuchos::Ptr<const Matrix> mat)
604  {
605  return mat->getGlobalNumRows();
606  }
607  };
608 #endif // DOXYGEN_SHOULD_SKIP_THIS
609 
647  template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS>
648  struct get_ccs_helper_kokkos_view : get_cxs_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,get_ccs_func<Matrix> >
649  {};
650 
658  template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS>
659  struct get_crs_helper_kokkos_view : get_cxs_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,get_crs_func<Matrix> >
660  {};
661  /* End Matrix/MultiVector Utilities */
662 
663 
665  // Definitions //
667 
668 
669  template <typename LO, typename GO, typename GS, typename Node>
670  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
671  getGatherMap( const Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > &map )
672  {
673  //RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream( Teuchos::null ); // may need to pass an osstream to computeGatherMap for debugging cases...
674  Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > gather_map = Tpetra::Details::computeGatherMap(map, Teuchos::null);
675  return gather_map;
676  }
677 
678 
679  template <typename LO, typename GO, typename GS, typename Node>
680  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
681  getDistributionMap(EDistribution distribution,
682  GS num_global_elements,
683  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
684  GO indexBase,
685  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >& map)
686  {
687  // TODO: Need to add indexBase to cases other than ROOTED
688  // We do not support these maps in any solver now.
689  switch( distribution ){
690  case DISTRIBUTED:
691  case DISTRIBUTED_NO_OVERLAP:
692  return Tpetra::createUniformContigMapWithNode<LO,GO, Node>(num_global_elements, comm);
693  case GLOBALLY_REPLICATED:
694  return Tpetra::createLocalMapWithNode<LO,GO, Node>(num_global_elements, comm);
695  case ROOTED:
696  {
697  int rank = Teuchos::rank(*comm);
698  size_t my_num_elems = Teuchos::OrdinalTraits<size_t>::zero();
699  if( rank == 0 ) { my_num_elems = num_global_elements; }
700 
701  return rcp(new Tpetra::Map<LO,GO, Node>(num_global_elements,
702  my_num_elems, indexBase, comm));
703  }
704  case CONTIGUOUS_AND_ROOTED:
705  {
706  const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> > gathermap
707  = getGatherMap<LO,GO,GS,Node>( map ); //getMap must be the map returned, NOT rowmap or colmap
708  return gathermap;
709  }
710  default:
711  TEUCHOS_TEST_FOR_EXCEPTION( true,
712  std::logic_error,
713  "Control should never reach this point. "
714  "Please contact the Amesos2 developers." );
715  }
716  }
717 
718 
719 #ifdef HAVE_AMESOS2_EPETRA
720 
721  //#pragma message "include 3"
722  //#include <Epetra_Map.h>
723 
724  template <typename LO, typename GO, typename GS, typename Node>
725  Teuchos::RCP<Tpetra::Map<LO,GO,Node> >
726  epetra_map_to_tpetra_map(const Epetra_BlockMap& map)
727  {
728  using Teuchos::as;
729  using Teuchos::rcp;
730 
731  int num_my_elements = map.NumMyElements();
732  Teuchos::Array<int> my_global_elements(num_my_elements);
733  map.MyGlobalElements(my_global_elements.getRawPtr());
734 
735  Teuchos::Array<GO> my_gbl_inds_buf;
736  Teuchos::ArrayView<GO> my_gbl_inds;
737  if (! std::is_same<int, GO>::value) {
738  my_gbl_inds_buf.resize (num_my_elements);
739  my_gbl_inds = my_gbl_inds_buf ();
740  for (int k = 0; k < num_my_elements; ++k) {
741  my_gbl_inds[k] = static_cast<GO> (my_global_elements[k]);
742  }
743  }
744  else {
745  using Teuchos::av_reinterpret_cast;
746  my_gbl_inds = av_reinterpret_cast<GO> (my_global_elements ());
747  }
748 
749  typedef Tpetra::Map<LO,GO,Node> map_t;
750  RCP<map_t> tmap = rcp(new map_t(Teuchos::OrdinalTraits<GS>::invalid(),
751  my_gbl_inds(),
752  as<GO>(map.IndexBase()),
753  to_teuchos_comm(Teuchos::rcpFromRef(map.Comm()))));
754  return tmap;
755  }
756 
757  template <typename LO, typename GO, typename GS, typename Node>
758  Teuchos::RCP<Epetra_Map>
759  tpetra_map_to_epetra_map(const Tpetra::Map<LO,GO,Node>& map)
760  {
761  using Teuchos::as;
762 
763  Teuchos::Array<GO> elements_tmp;
764  elements_tmp = map.getLocalElementList();
765  int num_my_elements = elements_tmp.size();
766  Teuchos::Array<int> my_global_elements(num_my_elements);
767  for (int i = 0; i < num_my_elements; ++i){
768  my_global_elements[i] = as<int>(elements_tmp[i]);
769  }
770 
771  using Teuchos::rcp;
772  RCP<Epetra_Map> emap = rcp(new Epetra_Map(-1,
773  num_my_elements,
774  my_global_elements.getRawPtr(),
775  as<GO>(map.getIndexBase()),
776  *to_epetra_comm(map.getComm())));
777  return emap;
778  }
779 #endif // HAVE_AMESOS2_EPETRA
780 
781  template <typename Scalar,
782  typename GlobalOrdinal,
783  typename GlobalSizeT>
784  void transpose(Teuchos::ArrayView<Scalar> vals,
785  Teuchos::ArrayView<GlobalOrdinal> indices,
786  Teuchos::ArrayView<GlobalSizeT> ptr,
787  Teuchos::ArrayView<Scalar> trans_vals,
788  Teuchos::ArrayView<GlobalOrdinal> trans_indices,
789  Teuchos::ArrayView<GlobalSizeT> trans_ptr)
790  {
791  /* We have a compressed-row storage format of this matrix. We
792  * transform this into a compressed-column format using a
793  * distribution-counting sort algorithm, which is described by
794  * D.E. Knuth in TAOCP Vol 3, 2nd ed pg 78.
795  */
796 
797 #ifdef HAVE_AMESOS2_DEBUG
798  typename Teuchos::ArrayView<GlobalOrdinal>::iterator ind_it, ind_begin, ind_end;
799  ind_begin = indices.begin();
800  ind_end = indices.end();
801  size_t min_trans_ptr_size = *std::max_element(ind_begin, ind_end) + 1;
802  TEUCHOS_TEST_FOR_EXCEPTION( Teuchos::as<size_t>(trans_ptr.size()) < min_trans_ptr_size,
803  std::invalid_argument,
804  "Transpose pointer size not large enough." );
805  TEUCHOS_TEST_FOR_EXCEPTION( trans_vals.size() < vals.size(),
806  std::invalid_argument,
807  "Transpose values array not large enough." );
808  TEUCHOS_TEST_FOR_EXCEPTION( trans_indices.size() < indices.size(),
809  std::invalid_argument,
810  "Transpose indices array not large enough." );
811 #else
812  typename Teuchos::ArrayView<GlobalOrdinal>::iterator ind_it, ind_end;
813 #endif
814  // Count the number of entries in each column
815  Teuchos::Array<GlobalSizeT> count(trans_ptr.size(), 0);
816  ind_end = indices.end();
817  for( ind_it = indices.begin(); ind_it != ind_end; ++ind_it ){
818  ++(count[(*ind_it) + 1]);
819  }
820  // Accumulate
821  typename Teuchos::Array<GlobalSizeT>::iterator cnt_it, cnt_end;
822  cnt_end = count.end();
823  for( cnt_it = count.begin() + 1; cnt_it != cnt_end; ++cnt_it ){
824  *cnt_it = *cnt_it + *(cnt_it - 1);
825  }
826  // This becomes the array of column pointers
827  trans_ptr.assign(count);
828 
829  /* Move the nonzero values into their final place in nzval, based on the
830  * counts found previously.
831  *
832  * This sequence deviates from Knuth's algorithm a bit, following more
833  * closely the description presented in Gustavson, Fred G. "Two Fast
834  * Algorithms for Sparse Matrices: Multiplication and Permuted
835  * Transposition" ACM Trans. Math. Softw. volume 4, number 3, 1978, pages
836  * 250--269, http://doi.acm.org/10.1145/355791.355796.
837  *
838  * The output indices end up in sorted order
839  */
840 
841  GlobalSizeT size = ptr.size();
842  for( GlobalSizeT i = 0; i < size - 1; ++i ){
843  GlobalOrdinal u = ptr[i];
844  GlobalOrdinal v = ptr[i + 1];
845  for( GlobalOrdinal j = u; j < v; ++j ){
846  GlobalOrdinal k = count[indices[j]];
847  trans_vals[k] = vals[j];
848  trans_indices[k] = i;
849  ++(count[indices[j]]);
850  }
851  }
852  }
853 
854 
855  template <typename Scalar1, typename Scalar2>
856  void
857  scale(Teuchos::ArrayView<Scalar1> vals, size_t l,
858  size_t ld, Teuchos::ArrayView<Scalar2> s)
859  {
860  size_t vals_size = vals.size();
861 #ifdef HAVE_AMESOS2_DEBUG
862  size_t s_size = s.size();
863  TEUCHOS_TEST_FOR_EXCEPTION( s_size < l,
864  std::invalid_argument,
865  "Scale vector must have length at least that of the vector" );
866 #endif
867  size_t i, s_i;
868  for( i = 0, s_i = 0; i < vals_size; ++i, ++s_i ){
869  if( s_i == l ){
870  // bring i to the next multiple of ld
871  i += ld - s_i;
872  s_i = 0;
873  }
874  vals[i] *= s[s_i];
875  }
876  }
877 
878  template <typename Scalar1, typename Scalar2, class BinaryOp>
879  void
880  scale(Teuchos::ArrayView<Scalar1> vals, size_t l,
881  size_t ld, Teuchos::ArrayView<Scalar2> s,
882  BinaryOp binary_op)
883  {
884  size_t vals_size = vals.size();
885 #ifdef HAVE_AMESOS2_DEBUG
886  size_t s_size = s.size();
887  TEUCHOS_TEST_FOR_EXCEPTION( s_size < l,
888  std::invalid_argument,
889  "Scale vector must have length at least that of the vector" );
890 #endif
891  size_t i, s_i;
892  for( i = 0, s_i = 0; i < vals_size; ++i, ++s_i ){
893  if( s_i == l ){
894  // bring i to the next multiple of ld
895  i += ld - s_i;
896  s_i = 0;
897  }
898  vals[i] = binary_op(vals[i], s[s_i]);
899  }
900  }
901 
902  template<class row_ptr_view_t, class cols_view_t, class per_view_t>
903  void
904  reorder(row_ptr_view_t & row_ptr, cols_view_t & cols,
905  per_view_t & perm, per_view_t & peri, size_t & nnz,
906  bool permute_matrix)
907  {
908  #ifndef HAVE_AMESOS2_METIS
909  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
910  "Cannot reorder for cuSolver because no METIS is available.");
911  #else
912  typedef typename cols_view_t::value_type ordinal_type;
913  typedef typename row_ptr_view_t::value_type size_type;
914 
915  // begin on host where we'll run metis reorder
916  auto host_row_ptr = Kokkos::create_mirror_view(row_ptr);
917  auto host_cols = Kokkos::create_mirror_view(cols);
918  Kokkos::deep_copy(host_row_ptr, row_ptr);
919  Kokkos::deep_copy(host_cols, cols);
920 
921  // strip out the diagonals - metis will just crash with them included.
922  // make space for the stripped version
923  typedef Kokkos::View<idx_t*, Kokkos::HostSpace> host_metis_array;
924  const ordinal_type size = row_ptr.size() - 1;
925  size_type max_nnz = host_row_ptr(size);
926  host_metis_array host_strip_diag_row_ptr(
927  Kokkos::ViewAllocateWithoutInitializing("host_strip_diag_row_ptr"),
928  size+1);
929  host_metis_array host_strip_diag_cols(
930  Kokkos::ViewAllocateWithoutInitializing("host_strip_diag_cols"),
931  max_nnz);
932 
933  size_type new_nnz = 0;
934  for(ordinal_type i = 0; i < size; ++i) {
935  host_strip_diag_row_ptr(i) = new_nnz;
936  for(size_type j = host_row_ptr(i); j < host_row_ptr(i+1); ++j) {
937  if (i != host_cols(j)) {
938  host_strip_diag_cols(new_nnz++) = host_cols(j);
939  }
940  }
941  }
942  host_strip_diag_row_ptr(size) = new_nnz;
943 
944  // we'll get original permutations on host
945  host_metis_array host_perm(
946  Kokkos::ViewAllocateWithoutInitializing("host_perm"), size);
947  host_metis_array host_peri(
948  Kokkos::ViewAllocateWithoutInitializing("host_peri"), size);
949 
950  // If we want to remove metis.h included in this header we can move this
951  // to the cpp, but we need to decide how to handle the idx_t declaration.
952  idx_t metis_size = size;
953  int err = METIS_NodeND(&metis_size, host_strip_diag_row_ptr.data(), host_strip_diag_cols.data(),
954  NULL, NULL, host_perm.data(), host_peri.data());
955 
956  TEUCHOS_TEST_FOR_EXCEPTION(err != METIS_OK, std::runtime_error,
957  "METIS_NodeND failed to sort matrix.");
958 
959  // put the permutations on our saved device ptrs
960  // these will be used to permute x and b when we solve
961  typedef typename cols_view_t::execution_space exec_space_t;
962  auto device_perm = Kokkos::create_mirror_view(exec_space_t(), host_perm);
963  auto device_peri = Kokkos::create_mirror_view(exec_space_t(), host_peri);
964  deep_copy(device_perm, host_perm);
965  deep_copy(device_peri, host_peri);
966 
967  // also set the permutation which may need to convert the type from
968  // metis to the native ordinal_type
969  deep_copy_or_assign_view(perm, device_perm);
970  deep_copy_or_assign_view(peri, device_peri);
971 
972  if (permute_matrix) {
973  // we'll permute matrix on device to a new set of arrays
974  row_ptr_view_t new_row_ptr(
975  Kokkos::ViewAllocateWithoutInitializing("new_row_ptr"), row_ptr.size());
976  cols_view_t new_cols(
977  Kokkos::ViewAllocateWithoutInitializing("new_cols"), cols.size() - new_nnz/2);
978 
979  // permute row indices
980  Kokkos::RangePolicy<exec_space_t> policy_row(0, row_ptr.size());
981  Kokkos::parallel_scan(policy_row, KOKKOS_LAMBDA(
982  ordinal_type i, size_type & update, const bool &final) {
983  if(final) {
984  new_row_ptr(i) = update;
985  }
986  if(i < size) {
987  ordinal_type count = 0;
988  const ordinal_type row = device_perm(i);
989  for(ordinal_type k = row_ptr(row); k < row_ptr(row + 1); ++k) {
990  const ordinal_type j = device_peri(cols(k));
991  count += (i >= j);
992  }
993  update += count;
994  }
995  });
996 
997  // permute col indices
998  Kokkos::RangePolicy<exec_space_t> policy_col(0, size);
999  Kokkos::parallel_for(policy_col, KOKKOS_LAMBDA(ordinal_type i) {
1000  const ordinal_type kbeg = new_row_ptr(i);
1001  const ordinal_type row = device_perm(i);
1002  const ordinal_type col_beg = row_ptr(row);
1003  const ordinal_type col_end = row_ptr(row + 1);
1004  const ordinal_type nk = col_end - col_beg;
1005  for(ordinal_type k = 0, t = 0; k < nk; ++k) {
1006  const ordinal_type tk = kbeg + t;
1007  const ordinal_type sk = col_beg + k;
1008  const ordinal_type j = device_peri(cols(sk));
1009  if(i >= j) {
1010  new_cols(tk) = j;
1011  ++t;
1012  }
1013  }
1014  });
1015 
1016  // finally set the inputs to the new sorted arrays
1017  row_ptr = new_row_ptr;
1018  cols = new_cols;
1019  }
1020 
1021  nnz = new_nnz;
1022  #endif // HAVE_AMESOS2_METIS
1023  }
1024 
1025  template<class values_view_t, class row_ptr_view_t,
1026  class cols_view_t, class per_view_t>
1027  void
1028  reorder_values(values_view_t & values, const row_ptr_view_t & orig_row_ptr,
1029  const row_ptr_view_t & new_row_ptr,
1030  const cols_view_t & orig_cols, const per_view_t & perm, const per_view_t & peri,
1031  size_t nnz)
1032  {
1033  typedef typename cols_view_t::value_type ordinal_type;
1034  typedef typename cols_view_t::execution_space exec_space_t;
1035 
1036  auto device_perm = Kokkos::create_mirror_view(exec_space_t(), perm);
1037  auto device_peri = Kokkos::create_mirror_view(exec_space_t(), peri);
1038  deep_copy(device_perm, perm);
1039  deep_copy(device_peri, peri);
1040 
1041  const ordinal_type size = orig_row_ptr.size() - 1;
1042 
1043  auto host_orig_row_ptr = Kokkos::create_mirror_view(orig_row_ptr);
1044  auto new_nnz = host_orig_row_ptr(size); // TODO: Maybe optimize this by caching
1045 
1046  values_view_t new_values(
1047  Kokkos::ViewAllocateWithoutInitializing("new_values"), values.size() - new_nnz/2);
1048 
1049  // permute col indices
1050  Kokkos::RangePolicy<exec_space_t> policy_col(0, size);
1051  Kokkos::parallel_for(policy_col, KOKKOS_LAMBDA(ordinal_type i) {
1052  const ordinal_type kbeg = new_row_ptr(i);
1053  const ordinal_type row = device_perm(i);
1054  const ordinal_type col_beg = orig_row_ptr(row);
1055  const ordinal_type col_end = orig_row_ptr(row + 1);
1056  const ordinal_type nk = col_end - col_beg;
1057  for(ordinal_type k = 0, t = 0; k < nk; ++k) {
1058  const ordinal_type tk = kbeg + t;
1059  const ordinal_type sk = col_beg + k;
1060  const ordinal_type j = device_peri(orig_cols(sk));
1061  if(i >= j) {
1062  new_values(tk) = values(sk);
1063  ++t;
1064  }
1065  }
1066  });
1067 
1068  values = new_values;
1069  }
1070 
1071  template<class array_view_t, class per_view_t>
1072  void
1073  apply_reorder_permutation(const array_view_t & array,
1074  array_view_t & permuted_array, const per_view_t & permutation) {
1075  if(permuted_array.extent(0) != array.extent(0) || permuted_array.extent(1) != array.extent(1)) {
1076  permuted_array = array_view_t(
1077  Kokkos::ViewAllocateWithoutInitializing("permuted_array"),
1078  array.extent(0), array.extent(1));
1079  }
1080  typedef typename array_view_t::execution_space exec_space_t;
1081  Kokkos::RangePolicy<exec_space_t> policy(0, array.extent(0));
1082  Kokkos::parallel_for(policy, KOKKOS_LAMBDA(size_t i) {
1083  for(size_t j = 0; j < array.extent(1); ++j) {
1084  permuted_array(i, j) = array(permutation(i), j);
1085  }
1086  });
1087  }
1088 
1091  } // end namespace Util
1092 
1093 } // end namespace Amesos2
1094 
1095 #endif // #ifndef AMESOS2_UTIL_HPP
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:648
const int size
Definition: klu2_simple.cpp:74
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
A generic base class for the CRS and CCS helpers.
Definition: Amesos2_Util.hpp:267
Provides some simple meta-programming utilities for Amesos2.
EStorage_Ordering
Definition: Amesos2_TypeDecl.hpp:141
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
Copy or assign views based on memory spaces.
const RCP< const Teuchos::Comm< int > > to_teuchos_comm(RCP< const Epetra_Comm > c)
Transform an Epetra_Comm object into a Teuchos::Comm object.
void printLine(Teuchos::FancyOStream &out)
Prints a line of 70 &quot;-&quot;s on std::cout.
Definition: Amesos2_Util.cpp:119
const RCP< const Epetra_Comm > to_epetra_comm(RCP< const Teuchos::Comm< int > > c)
Transfrom a Teuchos::Comm object into an Epetra_Comm object.
const Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > getGatherMap(const Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > &map)
Gets a Tpetra::Map described by the EDistribution.
Definition: Amesos2_Util.hpp:671
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:659
RCP< Epetra_Map > tpetra_map_to_epetra_map(const Tpetra::Map< LO, GO, Node > &map)
Transform a Tpetra::Map object into an Epetra_Map.
Definition: Amesos2_Util.hpp:759
Enum and other types declarations for Amesos2.
RCP< Tpetra::Map< LO, GO, Node > > epetra_map_to_tpetra_map(const Epetra_BlockMap &map)
Transform an Epetra_Map object into a Tpetra::Map.
Definition: Amesos2_Util.hpp:726
EDistribution
Definition: Amesos2_TypeDecl.hpp:123