1 #ifndef _ZOLTAN2_2GHOSTLAYER_HPP_
2 #define _ZOLTAN2_2GHOSTLAYER_HPP_
5 #include <unordered_map>
22 #include "Tpetra_Core.hpp"
23 #include "Teuchos_RCP.hpp"
24 #include "Tpetra_Import.hpp"
25 #include "Tpetra_FEMultiVector.hpp"
27 #include "KokkosKernels_Handle.hpp"
28 #include "KokkosKernels_IOUtils.hpp"
29 #include "KokkosGraph_Distance1Color.hpp"
30 #include "KokkosGraph_Distance1ColorHandle.hpp"
40 template <
typename Adapter>
50 using map_t = Tpetra::Map<lno_t,gno_t>;
52 using femv_t = Tpetra::FEMultiVector<femv_scalar_t, lno_t, gno_t>;
56 using host_exec =
typename femv_t::host_view_type::device_type::execution_space;
57 using host_mem =
typename femv_t::host_view_type::device_type::memory_space;
61 gettimeofday(&tp, NULL);
62 return ((
double) (tp.tv_sec) + 1e-6 * tp.tv_usec);
83 virtual void colorInterior(
const size_t nVtx,
84 Kokkos::View<lno_t*, device_type > adjs_view,
85 Kokkos::View<offset_t*,device_type > offset_view,
86 Teuchos::RCP<femv_t> femv,
87 Kokkos::View<lno_t*, device_type> vertex_list,
88 size_t vertex_list_size = 0,
89 bool recolor=
false) = 0;
92 virtual void colorInterior_serial(
const size_t nVtx,
93 typename Kokkos::View<lno_t*, device_type >::HostMirror adjs_view,
94 typename Kokkos::View<offset_t*,device_type >::HostMirror offset_view,
95 Teuchos::RCP<femv_t> femv,
96 typename Kokkos::View<lno_t*, device_type>::HostMirror vertex_list,
97 size_t vertex_list_size = 0,
98 bool recolor=
false) = 0;
191 Kokkos::View<offset_t*, device_type > dist_offsets_dev,
192 Kokkos::View<lno_t*, device_type > dist_adjs_dev,
193 Kokkos::View<int*,device_type > femv_colors,
194 Kokkos::View<lno_t*, device_type > boundary_verts_view,
199 Kokkos::MemoryTraits<Kokkos::Atomic>> verts_to_recolor_size_atomic,
202 Kokkos::View<
size_t*,
204 Kokkos::MemoryTraits<Kokkos::Atomic>> verts_to_send_size_atomic,
205 Kokkos::View<size_t*, device_type> recoloringSize,
212 bool recolor_degrees) = 0;
216 typename Kokkos::View<offset_t*, device_type >::HostMirror dist_offsets_host,
217 typename Kokkos::View<lno_t*, device_type >::HostMirror dist_adjs_host,
218 typename Kokkos::View<int*,device_type >::HostMirror femv_colors,
219 typename Kokkos::View<lno_t*, device_type >::HostMirror boundary_verts_view,
220 typename Kokkos::View<lno_t*,device_type>::HostMirror verts_to_recolor_view,
221 typename Kokkos::View<int*,device_type>::HostMirror verts_to_recolor_size_atomic,
222 typename Kokkos::View<lno_t*,device_type>::HostMirror verts_to_send_view,
223 typename Kokkos::View<size_t*,device_type>::HostMirror verts_to_send_size_atomic,
224 typename Kokkos::View<size_t*, device_type>::HostMirror recoloringSize,
225 typename Kokkos::View<int*, device_type>::HostMirror rand,
226 typename Kokkos::View<gno_t*,device_type>::HostMirror gid,
227 typename Kokkos::View<gno_t*,device_type>::HostMirror ghost_degrees,
228 bool recolor_degrees) = 0;
257 Kokkos::View<offset_t*, device_type> dist_offsets_dev,
258 Kokkos::View<lno_t*, device_type> dist_adjs_dev,
259 typename Kokkos::View<offset_t*, device_type>::HostMirror dist_offsets_host,
260 typename Kokkos::View<lno_t*, device_type>::HostMirror dist_adjs_host,
261 Kokkos::View<lno_t*, device_type>& boundary_verts,
264 Kokkos::View<
size_t*,
266 Kokkos::MemoryTraits<Kokkos::Atomic>> verts_to_send_size_atomic) = 0;
270 RCP<Teuchos::ParameterList>
pl;
272 RCP<const Teuchos::Comm<int> >
comm;
345 void constructSecondGhostLayer(std::vector<gno_t>& ownedPlusGhosts,
346 const std::vector<int>& owners,
347 ArrayView<const gno_t> adjs,
348 ArrayView<const offset_t> offsets,
349 RCP<const map_t> mapOwned,
350 std::vector< gno_t>& adjs_2GL,
351 std::vector< offset_t>& offsets_2GL) {
355 std::vector<int> sendcounts(
comm->getSize(),0);
356 std::vector<size_t> sdispls(
comm->getSize()+1,0);
359 if(
verbose) std::cout<<
comm->getRank()<<
": building sendcounts\n";
360 for(
size_t i = 0; i < owners.size(); i++){
361 if(owners[i] !=
comm->getRank()&& owners[i] !=-1) sendcounts[owners[i]]++;
364 if(
verbose) std::cout<<
comm->getRank()<<
": building sdispls\n";
365 size_t sendcount = 0;
366 for(
int i = 1; i <
comm->getSize()+1; i++){
367 sdispls[i] = sdispls[i-1] + sendcounts[i-1];
368 sendcount += sendcounts[i-1];
371 if(
verbose) std::cout<<
comm->getRank()<<
": building idx\n";
372 std::vector<gno_t> idx(
comm->getSize(),0);
373 for(
int i = 0; i <
comm->getSize(); i++){
377 if(
verbose) std::cout<<
comm->getRank()<<
": building sendbuf\n";
379 std::vector<gno_t> sendbuf(sendcount,0);
380 for(
size_t i = offsets.size()-1; i < owners.size(); i++){
381 if(owners[i] !=
comm->getRank() && owners[i] != -1){
382 sendbuf[idx[owners[i]]++] = ownedPlusGhosts[i];
387 if(
verbose) std::cout<<
comm->getRank()<<
": requesting GIDs from owners\n";
388 Teuchos::ArrayView<int> sendcounts_view = Teuchos::arrayViewFromVector(sendcounts);
389 Teuchos::ArrayView<gno_t> sendbuf_view = Teuchos::arrayViewFromVector(sendbuf);
390 Teuchos::ArrayRCP<gno_t> recvbuf;
391 std::vector<int> recvcounts(
comm->getSize(),0);
392 Teuchos::ArrayView<int> recvcounts_view = Teuchos::arrayViewFromVector(recvcounts);
393 Zoltan2::AlltoAllv<gno_t>(*
comm, *
env, sendbuf_view, sendcounts_view, recvbuf, recvcounts_view);
395 if(
verbose) std::cout<<comm->getRank()<<
": done communicating\n";
398 if(
verbose) std::cout<<comm->getRank()<<
": building rdispls\n";
399 gno_t recvcounttotal = 0;
400 std::vector<int> rdispls(comm->getSize()+1,0);
401 for(
size_t i = 1; i<recvcounts.size()+1; i++){
402 rdispls[i] = rdispls[i-1] + recvcounts[i-1];
403 recvcounttotal += recvcounts[i-1];
407 std::vector<offset_t> sendDegrees(recvcounttotal,0);
409 std::vector<int> adjsendcounts(comm->getSize(),0);
410 if(
verbose) std::cout<<comm->getRank()<<
": building adjacency counts\n";
411 for(
int i = 0; i < comm->getSize(); i++){
412 adjsendcounts[i] = 0;
413 for(
int j = rdispls[i]; j < rdispls[i+1]; j++){
414 lno_t lid = mapOwned->getLocalElement(recvbuf[j]);
415 offset_t degree = offsets[lid+1] - offsets[lid];
416 sendDegrees[j] = degree;
418 adjsendcounts[i] += degree;
422 if(
verbose) std::cout<<comm->getRank()<<
": sending degrees back to requestors\n";
423 Teuchos::ArrayView<offset_t> sendDegrees_view = Teuchos::arrayViewFromVector(sendDegrees);
424 Teuchos::ArrayRCP<offset_t> recvDegrees;
425 std::vector<int> recvDegreesCount(comm->getSize(),0);
426 Teuchos::ArrayView<int> recvDegreesCount_view = Teuchos::arrayViewFromVector(recvDegreesCount);
427 Zoltan2::AlltoAllv<offset_t>(*
comm, *
env, sendDegrees_view, recvcounts_view, recvDegrees, recvDegreesCount_view);
431 if(
verbose) std::cout<<comm->getRank()<<
": determining number of rounds necessary\n";
433 for(
int i = 0; i < comm->getSize(); i++){
434 if(adjsendcounts[i]*
sizeof(
gno_t)/ INT_MAX > (size_t)rounds){
435 rounds = (adjsendcounts[i]*
sizeof(
gno_t)/INT_MAX)+1;
441 Teuchos::reduceAll<int>(*
comm, Teuchos::REDUCE_MAX, 1, &rounds, &max_rounds);
443 if(
verbose) std::cout<<comm->getRank()<<
": building per_proc sums\n";
445 std::vector<std::vector<uint64_t>> per_proc_round_adj_sums(max_rounds+1,std::vector<uint64_t>(comm->getSize(),0));
446 std::vector<std::vector<uint64_t>> per_proc_round_vtx_sums(max_rounds+1,std::vector<uint64_t>(comm->getSize(),0));
448 if(
verbose) std::cout<<comm->getRank()<<
": filling per_proc sums\n";
450 for(
int proc_to_send = 0; proc_to_send < comm->getSize(); proc_to_send++){
452 for(
size_t j = sdispls[proc_to_send]; j < sdispls[proc_to_send+1]; j++){
453 if((per_proc_round_adj_sums[curr_round][proc_to_send] + recvDegrees[j])*
sizeof(
gno_t) > INT_MAX){
456 per_proc_round_adj_sums[curr_round][proc_to_send] += recvDegrees[j];
457 per_proc_round_vtx_sums[curr_round][proc_to_send]++;
461 if(
verbose) std::cout<<comm->getRank()<<
": building recv GID schedule\n";
467 std::vector<std::vector<std::vector<gno_t>>> recv_GID_per_proc_per_round(
468 max_rounds+1,std::vector<std::vector<gno_t>>(
469 comm->getSize(),std::vector<gno_t>(0)));
470 for(
int i = 0; i < max_rounds; i++){
471 for(
int j = 0; j < comm->getSize(); j++){
472 recv_GID_per_proc_per_round[i][j] = std::vector<gno_t>(sendcounts[j],0);
476 if(
verbose) std::cout<<comm->getRank()<<
": filling out recv GID schedule\n";
477 for(
int i = 0; i < comm->getSize(); i++){
480 for(
size_t j = sdispls[i]; j < sdispls[i+1]; j++){
481 if(curr_idx > per_proc_round_vtx_sums[curr_round][i]){
485 recv_GID_per_proc_per_round[curr_round][i][curr_idx++] = j;
489 if(
verbose) std::cout<<comm->getRank()<<
": reordering gids and degrees in the order they'll be received\n";
504 std::vector<gno_t> final_gid_vec(sendcount, 0);
505 std::vector<offset_t> final_degree_vec(sendcount,0);
506 gno_t reorder_idx = 0;
507 for(
int i = 0; i < max_rounds; i++){
508 for(
int j = 0; j < comm->getSize(); j++){
509 for(
size_t k = 0; k < per_proc_round_vtx_sums[i][j]; k++){
510 final_gid_vec[reorder_idx] = sendbuf[recv_GID_per_proc_per_round[i][j][k]];
511 final_degree_vec[reorder_idx++] = recvDegrees[recv_GID_per_proc_per_round[i][j][k]];
519 bool reorganized =
false;
520 for(
size_t i = 0; i < sendcount; i++){
521 if(final_gid_vec[i] != sendbuf[i]) reorganized =
true;
527 if(!reorganized && (max_rounds > 1)) std::cout<<comm->getRank()<<
": did not reorgainze GIDs, but probably should have\n";
528 if(reorganized && (max_rounds == 1)) std::cout<<comm->getRank()<<
": reorganized GIDs, but probably should not have\n";
540 for (
size_t i = 0; i < sendcount; i++){
541 ownedPlusGhosts[i+offsets.size()-1] = final_gid_vec[i];
546 std::cout<<comm->getRank()<<
": done remapping\n";
547 std::cout<<comm->getRank()<<
": building ghost offsets\n";
550 std::vector<offset_t> ghost_offsets(sendcount+1,0);
551 for(
size_t i = 1; i < sendcount+1; i++){
552 ghost_offsets[i] = ghost_offsets[i-1] + final_degree_vec[i-1];
556 if(
verbose) std::cout<<comm->getRank()<<
": going through the sending rounds\n";
558 std::vector<uint64_t> curr_idx_per_proc(comm->getSize(),0);
559 for(
int i = 0; i < comm->getSize(); i++) curr_idx_per_proc[i] = rdispls[i];
560 for(
int round = 0; round < max_rounds; round++){
562 std::vector<gno_t> send_adj;
563 std::vector<int> send_adj_counts(comm->getSize(),0);
564 if(
verbose) std::cout<<comm->getRank()<<
": round "<<round<<
", constructing send_adj\n";
566 for(
int curr_proc = 0; curr_proc < comm->getSize(); curr_proc++){
567 uint64_t curr_adj_sum = 0;
569 while( curr_idx_per_proc[curr_proc] < (
size_t)rdispls[curr_proc+1]){
570 lno_t lid = mapOwned->getLocalElement(recvbuf[curr_idx_per_proc[curr_proc]++]);
574 if((curr_adj_sum + (offsets[lid+1]-offsets[lid]))*
sizeof(
gno_t) >= INT_MAX){
579 curr_adj_sum += (offsets[lid+1] - offsets[lid]);
580 for(
offset_t j = offsets[lid]; j < offsets[lid+1]; j++){
581 send_adj.push_back(adjs[j]);
585 send_adj_counts[curr_proc] = curr_adj_sum;
587 if(
verbose) std::cout<<comm->getRank()<<
": round "<<round<<
", sending...\n";
589 Teuchos::ArrayView<gno_t> send_adjs_view = Teuchos::arrayViewFromVector(send_adj);
590 Teuchos::ArrayView<int> adjsendcounts_view = Teuchos::arrayViewFromVector(send_adj_counts);
591 Teuchos::ArrayRCP<gno_t> ghost_adjs;
592 std::vector<int> adjrecvcounts(comm->getSize(),0);
593 Teuchos::ArrayView<int> adjsrecvcounts_view = Teuchos::arrayViewFromVector(adjrecvcounts);
594 Zoltan2::AlltoAllv<gno_t>(*
comm, *
env, send_adjs_view, adjsendcounts_view, ghost_adjs, adjsrecvcounts_view);
598 adjs_2GL.push_back(ghost_adjs[i]);
601 if(
verbose) std::cout<<comm->getRank()<<
": constructing offsets\n";
603 for(
size_t i = 0; i < sendcount+1; i++){
604 offsets_2GL.push_back(ghost_offsets[i]);
606 if(
verbose) std::cout<<comm->getRank()<<
": done building 2nd ghost layer\n";
636 double doOwnedToGhosts(RCP<const map_t> mapOwnedPlusGhosts,
638 typename Kokkos::View<lno_t*,device_type>::HostMirror verts_to_send,
639 typename Kokkos::View<size_t*,device_type>::HostMirror verts_to_send_size,
640 Teuchos::RCP<femv_t> femv,
641 const std::unordered_map<
lno_t, std::vector<int>>& procs_to_send,
644 auto femvColors = femv->getLocalViewHost(Tpetra::Access::ReadWrite);
645 auto colors = subview(femvColors, Kokkos::ALL, 0);
647 int nprocs =
comm->getSize();
648 std::vector<int> sendcnts(
comm->getSize(), 0);
649 std::vector<gno_t> sdispls(
comm->getSize()+1, 0);
652 for(
size_t i = 0; i < verts_to_send_size(0); i++){
653 for(
size_t j = 0; j < procs_to_send.at(verts_to_send(i)).size(); j++){
654 sendcnts[procs_to_send.at(verts_to_send(i))[j]] += 2;
660 std::vector<int> sentcount(nprocs, 0);
662 for(
int i = 1; i <
comm->getSize()+1; i++){
663 sdispls[i] = sdispls[i-1] + sendcnts[i-1];
664 sendsize += sendcnts[i-1];
666 total_sent = sendsize;
667 std::vector<gno_t> sendbuf(sendsize,0);
672 for(
size_t i = 0; i < verts_to_send_size(0); i++){
673 std::vector<int> procs = procs_to_send.at(verts_to_send(i));
674 for(
size_t j = 0; j < procs.size(); j++){
675 size_t idx = sdispls[procs[j]] + sentcount[procs[j]];
676 sentcount[procs[j]] += 2;
677 sendbuf[idx++] = mapOwnedPlusGhosts->getGlobalElement(verts_to_send(i));
678 sendbuf[idx] = colors(verts_to_send(i));
682 Teuchos::ArrayView<int> sendcnts_view = Teuchos::arrayViewFromVector(sendcnts);
683 Teuchos::ArrayView<gno_t> sendbuf_view = Teuchos::arrayViewFromVector(sendbuf);
684 Teuchos::ArrayRCP<gno_t> recvbuf;
685 std::vector<int> recvcnts(
comm->getSize(), 0);
686 Teuchos::ArrayView<int> recvcnts_view = Teuchos::arrayViewFromVector(recvcnts);
690 double comm_total = 0.0;
691 double comm_temp =
timer();
693 Zoltan2::AlltoAllv<gno_t>(*
comm, *
env, sendbuf_view, sendcnts_view, recvbuf, recvcnts_view);
694 comm_total +=
timer() - comm_temp;
698 for(
int i = 0; i < recvcnts_view.size(); i++){
699 recvsize += recvcnts_view[i];
701 total_recvd = recvsize;
703 for(
int i = 0; i < recvsize; i+=2){
704 size_t lid = mapOwnedPlusGhosts->getLocalElement(recvbuf[i]);
705 colors(lid) = recvbuf[i+1];
714 const RCP<const base_adapter_t> &adapter_,
715 const RCP<Teuchos::ParameterList> &pl_,
716 const RCP<Environment> &env_,
717 const RCP<
const Teuchos::Comm<int> > &comm_)
718 :
adapter(adapter_),
pl(pl_), env(env_), comm(comm_){
719 verbose =
pl->get<
bool>(
"verbose",
false);
720 timing =
pl->get<
bool>(
"timing",
false);
725 ArrayView<const gno_t> vtxIDs;
726 ArrayView<StridedData<lno_t, scalar_t> > vwgts;
732 size_t nVtx = model->getVertexList(vtxIDs, vwgts);
736 ArrayView<const gno_t> adjs;
738 ArrayView<const offset_t> offsets;
739 ArrayView<StridedData<lno_t, scalar_t> > ewgts;
740 model->getEdgeList(adjs, offsets, ewgts);
743 std::unordered_map<gno_t,lno_t> globalToLocal;
755 std::vector<gno_t> ownedPlusGhosts;
763 std::vector<int> owners;
766 for(
int i = 0; i < vtxIDs.size(); i++){
767 globalToLocal[vtxIDs[i]] = i;
768 ownedPlusGhosts.push_back(vtxIDs[i]);
769 owners.push_back(comm->getRank());
777 std::vector<lno_t> local_adjs;
778 for(
int i = 0; i < adjs.size(); i++){
779 if(globalToLocal.count(adjs[i])==0){
780 ownedPlusGhosts.push_back(adjs[i]);
781 globalToLocal[adjs[i]] = vtxIDs.size()+nGhosts;
784 local_adjs.push_back(globalToLocal[adjs[i]]);
790 RCP<const map_t> mapOwned = rcp(
new map_t(dummy, vtxIDs, 0, comm));
793 std::vector<gno_t> ghosts;
794 std::vector<int> ghostowners;
795 for(
size_t i = nVtx; i < nVtx+nGhosts; i++){
796 ghosts.push_back(ownedPlusGhosts[i]);
797 ghostowners.push_back(-1);
801 ArrayView<int> owningProcs = Teuchos::arrayViewFromVector(ghostowners);
802 ArrayView<const gno_t> gids = Teuchos::arrayViewFromVector(ghosts);
803 mapOwned->getRemoteIndexList(gids, owningProcs);
806 for(
size_t i = 0; i < ghostowners.size(); i++){
807 owners.push_back(ghostowners[i]);
828 std::vector< gno_t> first_layer_ghost_adjs;
829 std::vector< offset_t> first_layer_ghost_offsets;
830 constructSecondGhostLayer(ownedPlusGhosts,owners, adjs, offsets, mapOwned,
831 first_layer_ghost_adjs, first_layer_ghost_offsets);
835 globalToLocal.clear();
836 for(
size_t i = 0; i < ownedPlusGhosts.size(); i++){
837 globalToLocal[ownedPlusGhosts[i]] = i;
842 for(
int i = 0 ; i < adjs.size(); i++){
843 local_adjs[i] = globalToLocal[adjs[i]];
853 std::vector<lno_t> local_ghost_adjs;
854 for(
size_t i = 0; i< first_layer_ghost_adjs.size(); i++ ){
855 if(globalToLocal.count(first_layer_ghost_adjs[i]) == 0){
856 ownedPlusGhosts.push_back(first_layer_ghost_adjs[i]);
857 globalToLocal[first_layer_ghost_adjs[i]] = vtxIDs.size() + nGhosts + n2Ghosts;
860 local_ghost_adjs.push_back(globalToLocal[first_layer_ghost_adjs[i]]);
864 if(
verbose) std::cout<<comm->getRank()<<
": constructing Tpetra map with copies\n";
865 dummy = Teuchos::OrdinalTraits <Tpetra::global_size_t>::invalid();
866 RCP<const map_t> mapWithCopies = rcp(
new map_t(dummy,
867 Teuchos::arrayViewFromVector(ownedPlusGhosts),
869 if(
verbose) std::cout<<comm->getRank()<<
": done constructing map with copies\n";
871 using import_t = Tpetra::Import<lno_t, gno_t>;
872 Teuchos::RCP<import_t> importer = rcp(
new import_t(mapOwned,
874 if(
verbose) std::cout<<comm->getRank()<<
": done constructing importer\n";
875 Teuchos::RCP<femv_t> femv = rcp(
new femv_t(mapOwned,
877 if(
verbose) std::cout<<comm->getRank()<<
": done constructing femv\n";
881 std::vector<int> rand(ownedPlusGhosts.size());
882 for(
size_t i = 0; i < rand.size(); i++){
883 std::srand(ownedPlusGhosts[i]);
884 rand[i] = std::rand();
888 std::vector<int> ghostOwners2(ownedPlusGhosts.size() -nVtx);
889 std::vector<gno_t> ghosts2(ownedPlusGhosts.size() - nVtx);
890 for(
size_t i = nVtx; i < ownedPlusGhosts.size(); i++) ghosts2[i-nVtx] = ownedPlusGhosts[i];
891 Teuchos::ArrayView<int> owners2 = Teuchos::arrayViewFromVector(ghostOwners2);
892 Teuchos::ArrayView<const gno_t> ghostGIDs = Teuchos::arrayViewFromVector(ghosts2);
893 mapOwned->getRemoteIndexList(ghostGIDs,owners2);
894 if(
verbose) std::cout<<comm->getRank()<<
": done getting ghost owners\n";
899 std::cout<<comm->getRank()<<
": calculating 2GL stats...\n";
901 std::vector<int> sendcounts(comm->getSize(),0);
902 std::vector<gno_t> sdispls(comm->getSize()+1,0);
904 for(
int i = nGhosts; i < ghostGIDs.size(); i++){
905 if(owners2[i] != comm->getRank()&& owners2[i] !=-1) sendcounts[owners2[i]]++;
909 for(
int i = 1; i < comm->getSize()+1; i++){
910 sdispls[i] = sdispls[i-1] + sendcounts[i-1];
911 sendcount += sendcounts[i-1];
913 std::vector<gno_t> idx(comm->getSize(),0);
914 for(
int i = 0; i < comm->getSize(); i++){
918 std::vector<gno_t> sendbuf(sendcount,0);
919 for(
size_t i = nGhosts; i < (size_t)ghostGIDs.size(); i++){
920 if(owners2[i] != comm->getRank() && owners2[i] != -1){
921 sendbuf[idx[owners2[i]]++] = ghostGIDs[i];
925 Teuchos::ArrayView<int> sendcounts_view = Teuchos::arrayViewFromVector(sendcounts);
926 Teuchos::ArrayView<gno_t> sendbuf_view = Teuchos::arrayViewFromVector(sendbuf);
927 Teuchos::ArrayRCP<gno_t> recvbuf;
928 std::vector<int> recvcounts(comm->getSize(),0);
929 Teuchos::ArrayView<int> recvcounts_view = Teuchos::arrayViewFromVector(recvcounts);
930 Zoltan2::AlltoAllv<gno_t>(*
comm, *
env, sendbuf_view, sendcounts_view, recvbuf, recvcounts_view);
931 std::vector<int> is_bndry_send(recvbuf.size(),0);
934 for(
int i = 0; i < recvbuf.size(); i++){
935 size_t lid = mapWithCopies->getLocalElement(recvbuf[i]);
936 is_bndry_send[i] = 0;
938 for(
offset_t j = offsets[lid]; j < offsets[lid+1]; j++){
939 if((
size_t)local_adjs[j] >= nVtx) is_bndry_send[i] = 1;
942 for(
offset_t j = first_layer_ghost_offsets[lid]; j < first_layer_ghost_offsets[lid+1]; j++){
943 if((
size_t)local_ghost_adjs[j] >= nVtx) is_bndry_send[i] = 1;
949 Teuchos::ArrayView<int> is_bndry_send_view = Teuchos::arrayViewFromVector(is_bndry_send);
950 Teuchos::ArrayRCP<int> is_bndry_recv;
951 std::vector<int> bndry_recvcounts(comm->getSize(),0);
952 Teuchos::ArrayView<int> bndry_recvcounts_view = Teuchos::arrayViewFromVector(bndry_recvcounts);
953 Zoltan2::AlltoAllv<int> (*
comm, *
env, is_bndry_send_view, recvcounts_view, is_bndry_recv, bndry_recvcounts_view);
956 int boundaryverts = 0;
957 for(
int i = 0; i < is_bndry_recv.size(); i++){
958 boundaryverts += is_bndry_recv[i];
961 std::cout<<comm->getRank()<<
": "<<boundaryverts<<
" boundary verts out of "<<n2Ghosts<<
" verts in 2GL\n";
966 Teuchos::ArrayView<const lno_t> local_adjs_view = Teuchos::arrayViewFromVector(local_adjs);
970 Teuchos::ArrayView<const offset_t> ghost_offsets = Teuchos::arrayViewFromVector(first_layer_ghost_offsets);
971 Teuchos::ArrayView<const lno_t> ghost_adjacencies = Teuchos::arrayViewFromVector(local_ghost_adjs);
972 Teuchos::ArrayView<const int> rand_view = Teuchos::arrayViewFromVector(rand);
973 Teuchos::ArrayView<const gno_t> gid_view = Teuchos::arrayViewFromVector(ownedPlusGhosts);
977 Teuchos::ArrayView<const lno_t> exportLIDs = importer->getExportLIDs();
978 Teuchos::ArrayView<const int> exportPIDs = importer->getExportPIDs();
982 std::unordered_map<lno_t, std::vector<int>> procs_to_send;
983 for(
int i = 0; i < exportLIDs.size(); i++){
984 procs_to_send[exportLIDs[i]].push_back(exportPIDs[i]);
988 twoGhostLayer(nVtx, nVtx+nGhosts, local_adjs_view, offsets, ghost_adjacencies, ghost_offsets,
989 femv, gid_view, rand_view, owners2, mapWithCopies, procs_to_send);
992 ArrayRCP<int> colors = solution->getColorsRCP();
993 auto femvdata = femv->getData(0);
994 for(
size_t i=0; i<nVtx; i++){
995 colors[i] = femvdata[i];
1055 const Teuchos::ArrayView<const lno_t>& adjs,
1056 const Teuchos::ArrayView<const offset_t>& offsets,
1057 const Teuchos::ArrayView<const lno_t>& ghost_adjs,
1058 const Teuchos::ArrayView<const offset_t>& ghost_offsets,
1059 const Teuchos::RCP<femv_t>& femv,
1060 const Teuchos::ArrayView<const gno_t>& gids,
1061 const Teuchos::ArrayView<const int>& rand,
1062 const Teuchos::ArrayView<const int>& ghost_owners,
1063 RCP<const map_t> mapOwnedPlusGhosts,
1064 const std::unordered_map<
lno_t, std::vector<int>>& procs_to_send){
1066 double total_time = 0.0;
1067 double interior_time = 0.0;
1068 double comm_time = 0.0;
1069 double comp_time = 0.0;
1070 double recoloring_time=0.0;
1071 double conflict_detection = 0.0;
1075 const int numStatisticRecordingRounds = 100;
1078 const size_t n_ghosts = rand.size() - n_local;
1084 std::vector<int> deg_send_cnts(comm->getSize(),0);
1085 std::vector<gno_t> deg_sdispls(comm->getSize()+1,0);
1086 for(
int i = 0; i < ghost_owners.size(); i++){
1087 deg_send_cnts[ghost_owners[i]]++;
1090 gno_t deg_sendsize = 0;
1091 std::vector<int> deg_sentcount(comm->getSize(),0);
1092 for(
int i = 1; i < comm->getSize()+1; i++){
1093 deg_sdispls[i] = deg_sdispls[i-1] + deg_send_cnts[i-1];
1094 deg_sendsize += deg_send_cnts[i-1];
1096 std::vector<gno_t> deg_sendbuf(deg_sendsize,0);
1097 for(
int i = 0; i < ghost_owners.size(); i++){
1098 size_t idx = deg_sdispls[ghost_owners[i]] + deg_sentcount[ghost_owners[i]];
1099 deg_sentcount[ghost_owners[i]]++;
1100 deg_sendbuf[idx] = mapOwnedPlusGhosts->getGlobalElement(i+n_local);
1102 Teuchos::ArrayView<int> deg_send_cnts_view = Teuchos::arrayViewFromVector(deg_send_cnts);
1103 Teuchos::ArrayView<gno_t> deg_sendbuf_view = Teuchos::arrayViewFromVector(deg_sendbuf);
1104 Teuchos::ArrayRCP<gno_t> deg_recvbuf;
1105 std::vector<int> deg_recvcnts(comm->getSize(),0);
1106 Teuchos::ArrayView<int> deg_recvcnts_view = Teuchos::arrayViewFromVector(deg_recvcnts);
1107 Zoltan2::AlltoAllv<gno_t>(*
comm, *
env, deg_sendbuf_view, deg_send_cnts_view, deg_recvbuf, deg_recvcnts_view);
1112 for(
int i = 0; i < deg_recvbuf.size(); i++){
1113 lno_t lid = mapOwnedPlusGhosts->getLocalElement(deg_recvbuf[i]);
1114 deg_recvbuf[i] = offsets[lid+1] - offsets[lid];
1117 ArrayRCP<gno_t> ghost_degrees;
1118 Zoltan2::AlltoAllv<gno_t>(*
comm, *
env, deg_recvbuf(), deg_recvcnts_view, ghost_degrees, deg_send_cnts_view);
1121 Kokkos::View<gno_t*, device_type> ghost_degrees_dev(
"ghost degree view",ghost_degrees.size());
1122 typename Kokkos::View<gno_t*, device_type>::HostMirror ghost_degrees_host = Kokkos::create_mirror(ghost_degrees_dev);
1123 for(
int i = 0; i < ghost_degrees.size(); i++){
1124 lno_t lid = mapOwnedPlusGhosts->getLocalElement(deg_sendbuf[i]);
1125 ghost_degrees_host(lid-n_local) = ghost_degrees[i];
1127 Kokkos::deep_copy(ghost_degrees_dev, ghost_degrees_host);
1130 gno_t recvPerRound[numStatisticRecordingRounds];
1131 gno_t sentPerRound[numStatisticRecordingRounds];
1138 for(
size_t i = 0; i < n_local; i++){
1139 offset_t curr_degree = offsets[i+1] - offsets[i];
1140 if(curr_degree > local_max_degree){
1141 local_max_degree = curr_degree;
1144 Teuchos::reduceAll<int, offset_t>(*
comm, Teuchos::REDUCE_MAX,1, &local_max_degree, &global_max_degree);
1145 if(comm->getRank() == 0 &&
verbose) std::cout<<
"Input has max degree "<<global_max_degree<<
"\n";
1147 if(
verbose) std::cout<<comm->getRank()<<
": constructing Kokkos Views for initial coloring\n";
1151 Kokkos::View<offset_t*, device_type> offsets_dev(
"Host Offset View", offsets.size());
1152 typename Kokkos::View<offset_t*, device_type>::HostMirror offsets_host = Kokkos::create_mirror(offsets_dev);
1153 Kokkos::View<lno_t*, device_type> adjs_dev(
"Host Adjacencies View", adjs.size());
1154 typename Kokkos::View<lno_t*, device_type>::HostMirror adjs_host = Kokkos::create_mirror(adjs_dev);
1155 for(Teuchos_Ordinal i = 0; i < offsets.size(); i++) offsets_host(i) = offsets[i];
1156 for(Teuchos_Ordinal i = 0; i < adjs.size(); i++) adjs_host(i) = adjs[i];
1157 Kokkos::deep_copy(offsets_dev,offsets_host);
1158 Kokkos::deep_copy(adjs_dev, adjs_host);
1162 if(
verbose) std::cout<<comm->getRank()<<
": constructing Kokkos Views for recoloring\n";
1166 Kokkos::View<offset_t*, device_type> dist_degrees_dev(
"Owned+Ghost degree view",rand.size());
1167 typename Kokkos::View<offset_t*, device_type>::HostMirror dist_degrees_host = Kokkos::create_mirror(dist_degrees_dev);
1172 for(Teuchos_Ordinal i = 0; i < offsets.size()-1; i++) dist_degrees_host(i) = offsets[i+1] - offsets[i];
1174 for(Teuchos_Ordinal i = 0; i < ghost_offsets.size()-1; i++) dist_degrees_host(i+n_local) = ghost_offsets[i+1] - ghost_offsets[i];
1176 for(Teuchos_Ordinal i = 0; i < ghost_adjs.size(); i++){
1178 if((
size_t)ghost_adjs[i] >= n_total ){
1179 dist_degrees_host(ghost_adjs[i])++;
1207 Kokkos::View<offset_t*, device_type> dist_offsets_dev(
"Owned+Ghost Offset view", rand.size()+1);
1208 typename Kokkos::View<offset_t*, device_type>::HostMirror dist_offsets_host = Kokkos::create_mirror(dist_offsets_dev);
1211 dist_offsets_host(0) = 0;
1213 for(Teuchos_Ordinal i = 1; i < rand.size()+1; i++){
1214 dist_offsets_host(i) = dist_degrees_host(i-1) + dist_offsets_host(i-1);
1215 total_adjs += dist_degrees_host(i-1);
1217 Kokkos::View<lno_t*, device_type> dist_adjs_dev(
"Owned+Ghost adjacency view", total_adjs);
1218 typename Kokkos::View<lno_t*, device_type>::HostMirror dist_adjs_host = Kokkos::create_mirror(dist_adjs_dev);
1223 for(Teuchos_Ordinal i = 0; i < rand.size(); i++){
1224 dist_degrees_host(i) = 0;
1227 for(Teuchos_Ordinal i = 0; i < adjs.size(); i++) dist_adjs_host(i) = adjs[i];
1228 for(Teuchos_Ordinal i = adjs.size(); i < adjs.size() + ghost_adjs.size(); i++) dist_adjs_host(i) = ghost_adjs[i-adjs.size()];
1232 for(Teuchos_Ordinal i = 0; i < ghost_offsets.size()-1; i++){
1234 for(
offset_t j = ghost_offsets[i]; j < ghost_offsets[i+1]; j++){
1236 if((
size_t)ghost_adjs[j] >= n_total){
1238 dist_adjs_host(dist_offsets_host(ghost_adjs[j]) + dist_degrees_host(ghost_adjs[j])) = i + n_local;
1241 dist_degrees_host(ghost_adjs[j])++;
1246 Kokkos::deep_copy(dist_degrees_dev,dist_degrees_host);
1247 Kokkos::deep_copy(dist_offsets_dev,dist_offsets_host);
1248 Kokkos::deep_copy(dist_adjs_dev, dist_adjs_host);
1251 Kokkos::View<size_t*, device_type> recoloringSize(
"Recoloring Queue Size",1);
1252 typename Kokkos::View<size_t*, device_type>::HostMirror recoloringSize_host = Kokkos::create_mirror(recoloringSize);
1253 recoloringSize_host(0) = 0;
1254 Kokkos::deep_copy(recoloringSize, recoloringSize_host);
1257 if(
verbose) std::cout<<comm->getRank()<<
": constructing rand and GIDs views\n";
1258 Kokkos::View<int*, device_type> rand_dev(
"Random View", rand.size());
1259 typename Kokkos::View<int*, device_type>::HostMirror rand_host = Kokkos::create_mirror(rand_dev);
1260 for(Teuchos_Ordinal i = 0; i < rand.size(); i ++) rand_host(i) = rand[i];
1261 Kokkos::deep_copy(rand_dev,rand_host);
1264 Kokkos::View<gno_t*, device_type> gid_dev(
"GIDs", gids.size());
1265 typename Kokkos::View<gno_t*, device_type>::HostMirror gid_host = Kokkos::create_mirror(gid_dev);
1266 for(Teuchos_Ordinal i = 0; i < gids.size(); i++) gid_host(i) = gids[i];
1267 Kokkos::deep_copy(gid_dev,gid_host);
1273 Kokkos::View<lno_t*, device_type> boundary_verts_dev;
1275 if(
verbose) std::cout<<comm->getRank()<<
": constructing communication and recoloring lists\n";
1281 Kokkos::View<lno_t*, device_type> verts_to_recolor_view(
"verts to recolor", rand.size());
1282 typename Kokkos::View<lno_t*, device_type>::HostMirror verts_to_recolor_host = create_mirror(verts_to_recolor_view);
1285 Kokkos::View<int*, device_type> verts_to_recolor_size(
"verts to recolor size",1);
1286 Kokkos::View<int*, device_type, Kokkos::MemoryTraits<Kokkos::Atomic>> verts_to_recolor_size_atomic = verts_to_recolor_size;
1287 typename Kokkos::View<int*, device_type>::HostMirror verts_to_recolor_size_host = create_mirror(verts_to_recolor_size);
1290 verts_to_recolor_size_host(0) = 0;
1292 Kokkos::deep_copy(verts_to_recolor_size, verts_to_recolor_size_host);
1299 Kokkos::View<lno_t*, device_type> verts_to_send_view(
"verts to send", n_local);
1300 typename Kokkos::View<lno_t*, device_type>::HostMirror verts_to_send_host = create_mirror(verts_to_send_view);
1303 Kokkos::View<size_t*, device_type> verts_to_send_size(
"verts to send size",1);
1304 Kokkos::View<size_t*, device_type, Kokkos::MemoryTraits<Kokkos::Atomic>> verts_to_send_size_atomic = verts_to_send_size;
1305 typename Kokkos::View<size_t*, device_type>::HostMirror verts_to_send_size_host = create_mirror(verts_to_send_size);
1307 verts_to_send_size_host(0) = 0;
1308 Kokkos::deep_copy(verts_to_send_size, verts_to_send_size_host);
1310 if(
verbose) std::cout<<comm->getRank()<<
": Constructing the boundary\n";
1315 constructBoundary(n_local, dist_offsets_dev, dist_adjs_dev, dist_offsets_host, dist_adjs_host, boundary_verts_dev,
1316 verts_to_send_view, verts_to_send_size_atomic);
1321 bool use_vbbit = (global_max_degree < 6000);
1325 if(
timing) comm->barrier();
1326 interior_time =
timer();
1327 total_time =
timer();
1329 this->colorInterior(n_local, adjs_dev, offsets_dev, femv,adjs_dev,0,use_vbbit);
1330 interior_time =
timer() - interior_time;
1331 comp_time = interior_time;
1337 Kokkos::View<int*,device_type> ghost_colors(
"ghost color backups", n_ghosts);
1340 if(
verbose) std::cout<<comm->getRank()<<
": communicating\n";
1345 Kokkos::deep_copy(verts_to_send_host, verts_to_send_view);
1346 Kokkos::deep_copy(verts_to_send_size_host, verts_to_send_size);
1348 comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,n_local,verts_to_send_host,verts_to_send_size_host,femv,procs_to_send,sent,recv);
1349 sentPerRound[0] = sent;
1350 recvPerRound[0] = recv;
1356 auto femvColors = femv->getLocalViewDevice(Tpetra::Access::ReadWrite);
1357 auto femv_colors = subview(femvColors, Kokkos::ALL, 0);
1358 Kokkos::parallel_for(
"get femv colors",
1359 Kokkos::RangePolicy<execution_space, int>(0,n_ghosts),
1360 KOKKOS_LAMBDA(
const int& i){
1361 ghost_colors(i) = femv_colors(i+n_local);
1365 double temp =
timer();
1367 bool recolor_degrees = this->
pl->template get<bool>(
"recolor_degrees",
false);
1368 if(
verbose) std::cout<<comm->getRank()<<
": detecting conflicts\n";
1371 verts_to_send_size_host(0) = 0;
1372 verts_to_recolor_size_host(0) = 0;
1373 recoloringSize_host(0) = 0;
1374 Kokkos::deep_copy(verts_to_send_size, verts_to_send_size_host);
1375 Kokkos::deep_copy(verts_to_recolor_size, verts_to_recolor_size_host);
1376 Kokkos::deep_copy(recoloringSize, recoloringSize_host);
1378 detectConflicts(n_local, dist_offsets_dev, dist_adjs_dev, femv_colors, boundary_verts_dev,
1379 verts_to_recolor_view, verts_to_recolor_size_atomic, verts_to_send_view, verts_to_send_size_atomic,
1380 recoloringSize, rand_dev, gid_dev, ghost_degrees_dev, recolor_degrees);
1384 Kokkos::deep_copy(verts_to_send_host, verts_to_send_view);
1385 Kokkos::deep_copy(verts_to_send_size_host, verts_to_send_size);
1386 Kokkos::deep_copy(recoloringSize_host, recoloringSize);
1387 Kokkos::deep_copy(verts_to_recolor_size_host, verts_to_recolor_size);
1389 if(comm->getSize() > 1){
1390 conflict_detection =
timer() - temp;
1391 comp_time += conflict_detection;
1394 if(
verbose) std::cout<<comm->getRank()<<
": starting to recolor\n";
1396 double totalPerRound[numStatisticRecordingRounds];
1397 double commPerRound[numStatisticRecordingRounds];
1398 double compPerRound[numStatisticRecordingRounds];
1399 double recoloringPerRound[numStatisticRecordingRounds];
1400 double conflictDetectionPerRound[numStatisticRecordingRounds];
1401 uint64_t vertsPerRound[numStatisticRecordingRounds];
1402 uint64_t incorrectGhostsPerRound[numStatisticRecordingRounds];
1403 int distributedRounds = 1;
1404 totalPerRound[0] = interior_time + comm_time + conflict_detection;
1405 recoloringPerRound[0] = 0;
1406 commPerRound[0] = comm_time;
1407 compPerRound[0] = interior_time + conflict_detection;
1408 conflictDetectionPerRound[0] = conflict_detection;
1409 recoloringPerRound[0] = 0;
1410 vertsPerRound[0] = 0;
1411 incorrectGhostsPerRound[0]=0;
1412 typename Kokkos::View<int*, device_type>::HostMirror ghost_colors_host;
1413 typename Kokkos::View<lno_t*, device_type>::HostMirror boundary_verts_host;
1414 size_t serial_threshold = this->
pl->template get<int>(
"serial_threshold",0);
1416 size_t totalConflicts = 0;
1417 size_t localConflicts = recoloringSize_host(0);
1418 Teuchos::reduceAll<int,size_t>(*
comm, Teuchos::REDUCE_SUM, 1, &localConflicts, &totalConflicts);
1419 bool done = !totalConflicts;
1420 if(comm->getSize()==1) done =
true;
1427 if(recoloringSize_host(0) < serial_threshold)
break;
1428 if(distributedRounds < numStatisticRecordingRounds) {
1429 vertsPerRound[distributedRounds] = verts_to_recolor_size_host(0);
1432 if(
timing) comm->barrier();
1433 double recolor_temp =
timer();
1435 if(verts_to_recolor_size_host(0) > 0){
1436 this->colorInterior(femv_colors.size(), dist_adjs_dev, dist_offsets_dev,femv,verts_to_recolor_view,verts_to_recolor_size_host(0),use_vbbit);
1439 if(distributedRounds < numStatisticRecordingRounds){
1440 recoloringPerRound[distributedRounds] =
timer() - recolor_temp;
1441 recoloring_time += recoloringPerRound[distributedRounds];
1442 comp_time += recoloringPerRound[distributedRounds];
1443 compPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
1444 totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
1446 double recoloring_round_time =
timer() - recolor_temp;
1447 recoloring_time += recoloring_round_time;
1448 comp_time += recoloring_round_time;
1455 Kokkos::parallel_for(
"set femv colors",
1456 Kokkos::RangePolicy<execution_space, int>(0,n_ghosts),
1457 KOKKOS_LAMBDA(
const int& i){
1458 femv_colors(i+n_local) = ghost_colors(i);
1466 femvColors = decltype(femvColors)();
1467 femv_colors = decltype(femv_colors)();
1468 double curr_comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,n_local,verts_to_send_host,verts_to_send_size_host,femv,procs_to_send,sent,recv);
1469 comm_time += curr_comm_time;
1471 if(distributedRounds < numStatisticRecordingRounds){
1472 commPerRound[distributedRounds] = curr_comm_time;
1473 recvPerRound[distributedRounds] = recv;
1474 sentPerRound[distributedRounds] = sent;
1475 totalPerRound[distributedRounds] += commPerRound[distributedRounds];
1482 femvColors = femv->getLocalViewDevice(Tpetra::Access::ReadWrite);
1483 femv_colors = subview(femvColors, Kokkos::ALL, 0);
1484 Kokkos::parallel_for(
"get femv colors 2",
1485 Kokkos::RangePolicy<execution_space, int>(0,n_ghosts),
1486 KOKKOS_LAMBDA(
const int& i){
1487 ghost_colors(i) = femv_colors(i+n_local);
1494 verts_to_send_size_host(0) = 0;
1495 verts_to_recolor_size_host(0) = 0;
1496 recoloringSize_host(0) = 0;
1497 Kokkos::deep_copy(verts_to_send_size, verts_to_send_size_host);
1498 Kokkos::deep_copy(verts_to_recolor_size, verts_to_recolor_size_host);
1499 Kokkos::deep_copy(recoloringSize, recoloringSize_host);
1502 double detection_temp =
timer();
1504 detectConflicts(n_local, dist_offsets_dev, dist_adjs_dev,femv_colors, boundary_verts_dev,
1505 verts_to_recolor_view, verts_to_recolor_size_atomic, verts_to_send_view, verts_to_send_size_atomic,
1506 recoloringSize, rand_dev, gid_dev, ghost_degrees_dev, recolor_degrees);
1509 Kokkos::deep_copy(verts_to_send_host, verts_to_send_view);
1510 Kokkos::deep_copy(verts_to_send_size_host, verts_to_send_size);
1512 Kokkos::deep_copy(verts_to_recolor_size_host, verts_to_recolor_size);
1513 Kokkos::deep_copy(recoloringSize_host, recoloringSize);
1515 if(distributedRounds < numStatisticRecordingRounds){
1516 conflictDetectionPerRound[distributedRounds] =
timer() - detection_temp;
1517 conflict_detection += conflictDetectionPerRound[distributedRounds];
1518 compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1519 totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1520 comp_time += conflictDetectionPerRound[distributedRounds];
1522 double conflict_detection_round_time =
timer() - detection_temp;
1523 conflict_detection += conflict_detection_round_time;
1524 comp_time += conflict_detection_round_time;
1527 distributedRounds++;
1528 size_t localDone = recoloringSize_host(0);
1529 size_t globalDone = 0;
1530 Teuchos::reduceAll<int,size_t>(*
comm, Teuchos::REDUCE_SUM, 1, &localDone, &globalDone);
1540 if(recoloringSize_host(0) > 0 || !done){
1541 ghost_colors_host = Kokkos::create_mirror_view_and_copy(
host_mem(),ghost_colors,
"ghost_colors host mirror");
1542 boundary_verts_host = Kokkos::create_mirror_view_and_copy(
host_mem(),boundary_verts_dev,
"boundary_verts host mirror");
1548 femvColors = decltype(femvColors)();
1549 femv_colors = decltype(femv_colors)();
1550 while(recoloringSize_host(0) > 0 || !done){
1551 auto femvColors_host = femv->getLocalViewHost(Tpetra::Access::ReadWrite);
1552 auto colors_host = subview(femvColors_host, Kokkos::ALL, 0);
1553 if(distributedRounds < numStatisticRecordingRounds){
1554 vertsPerRound[distributedRounds] = recoloringSize_host(0);
1556 if(
verbose) std::cout<<comm->getRank()<<
": starting to recolor, serial\n";
1557 if(
timing) comm->barrier();
1559 double recolor_temp =
timer();
1560 if(verts_to_recolor_size_host(0) > 0){
1561 this->colorInterior_serial(colors_host.size(), dist_adjs_host, dist_offsets_host, femv,
1562 verts_to_recolor_host, verts_to_recolor_size_host(0),
true);
1564 if(distributedRounds < numStatisticRecordingRounds){
1565 recoloringPerRound[distributedRounds] =
timer() - recolor_temp;
1566 recoloring_time += recoloringPerRound[distributedRounds];
1567 comp_time += recoloringPerRound[distributedRounds];
1568 compPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
1569 totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
1571 double recoloring_serial_round_time =
timer() - recolor_temp;
1572 recoloring_time += recoloring_serial_round_time;
1573 comp_time += recoloring_serial_round_time;
1578 for(
size_t i = 0; i < n_ghosts; i++){
1579 colors_host(i+n_local) = ghost_colors_host(i);
1582 double curr_comm_time = doOwnedToGhosts(mapOwnedPlusGhosts, n_local,verts_to_send_host, verts_to_send_size_host, femv, procs_to_send, sent,recv);
1583 comm_time += curr_comm_time;
1585 if(distributedRounds < numStatisticRecordingRounds){
1586 commPerRound[distributedRounds] = curr_comm_time;
1587 recvPerRound[distributedRounds] = recv;
1588 sentPerRound[distributedRounds] = sent;
1589 totalPerRound[distributedRounds] += commPerRound[distributedRounds];
1594 for(
size_t i = 0; i < n_ghosts; i++){
1595 ghost_colors_host(i) = colors_host(i+n_local);
1598 if(
timing) comm->barrier();
1599 double detection_temp =
timer();
1602 verts_to_recolor_size_host(0) = 0;
1603 verts_to_send_size_host(0) = 0;
1604 recoloringSize_host(0) = 0;
1607 verts_to_recolor_host, verts_to_recolor_size_host, verts_to_send_host, verts_to_send_size_host,
1608 recoloringSize_host, rand_host, gid_host, ghost_degrees_host, recolor_degrees);
1612 if(distributedRounds < numStatisticRecordingRounds){
1613 conflictDetectionPerRound[distributedRounds] =
timer() - detection_temp;
1614 conflict_detection += conflictDetectionPerRound[distributedRounds];
1615 compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1616 totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1617 comp_time += conflictDetectionPerRound[distributedRounds];
1619 double conflict_detection_serial_round_time =
timer() - detection_temp;
1620 conflict_detection += conflict_detection_serial_round_time;
1621 comp_time += conflict_detection_serial_round_time;
1624 size_t globalDone = 0;
1625 size_t localDone = recoloringSize_host(0);
1626 Teuchos::reduceAll<int,size_t>(*
comm, Teuchos::REDUCE_SUM, 1, &localDone, &globalDone);
1627 distributedRounds++;
1631 total_time =
timer() - total_time;
1634 uint64_t localBoundaryVertices = 0;
1635 for(
size_t i = 0; i < n_local; i++){
1636 for(
offset_t j = offsets[i]; j < offsets[i+1]; j++){
1637 if((
size_t)adjs[j] >= n_local){
1638 localBoundaryVertices++;
1645 uint64_t totalVertsPerRound[numStatisticRecordingRounds];
1646 uint64_t totalBoundarySize = 0;
1647 uint64_t totalIncorrectGhostsPerRound[numStatisticRecordingRounds];
1648 double finalTotalPerRound[numStatisticRecordingRounds];
1649 double maxRecoloringPerRound[numStatisticRecordingRounds];
1650 double minRecoloringPerRound[numStatisticRecordingRounds];
1651 double finalCommPerRound[numStatisticRecordingRounds];
1652 double finalCompPerRound[numStatisticRecordingRounds];
1653 double finalConflictDetectionPerRound[numStatisticRecordingRounds];
1654 gno_t finalRecvPerRound[numStatisticRecordingRounds];
1655 gno_t finalSentPerRound[numStatisticRecordingRounds];
1656 for(
int i = 0; i < numStatisticRecordingRounds; i++){
1657 totalVertsPerRound[i] = 0;
1658 finalTotalPerRound[i] = 0.0;
1659 maxRecoloringPerRound[i] = 0.0;
1660 minRecoloringPerRound[i] = 0.0;
1661 finalCommPerRound[i] = 0.0;
1662 finalCompPerRound[i] = 0.0;
1663 finalConflictDetectionPerRound[i] = 0.0;
1664 finalRecvPerRound[i] = 0;
1665 finalSentPerRound[i] = 0;
1667 Teuchos::reduceAll<int,uint64_t>(*
comm, Teuchos::REDUCE_SUM,1,&localBoundaryVertices, &totalBoundarySize);
1668 Teuchos::reduceAll<int,uint64_t>(*
comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,vertsPerRound,totalVertsPerRound);
1669 Teuchos::reduceAll<int,uint64_t>(*
comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,incorrectGhostsPerRound,totalIncorrectGhostsPerRound);
1670 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,totalPerRound, finalTotalPerRound);
1671 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,recoloringPerRound,maxRecoloringPerRound);
1672 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MIN,numStatisticRecordingRounds,recoloringPerRound,minRecoloringPerRound);
1673 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,commPerRound,finalCommPerRound);
1674 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,compPerRound,finalCompPerRound);
1675 Teuchos::reduceAll<int,double>(*
comm,
1676 Teuchos::REDUCE_MAX,numStatisticRecordingRounds,conflictDetectionPerRound,finalConflictDetectionPerRound);
1677 Teuchos::reduceAll<int,gno_t> (*
comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,recvPerRound,finalRecvPerRound);
1678 Teuchos::reduceAll<int,gno_t> (*
comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,sentPerRound,finalSentPerRound);
1679 std::cout <<
"Rank " << comm->getRank()
1680 <<
": boundary size: " << localBoundaryVertices << std::endl;
1681 if(comm->getRank() == 0)
1682 std::cout <<
"Total boundary size: " << totalBoundarySize << std::endl;
1683 for(
int i = 0; i < std::min((
int)distributedRounds,numStatisticRecordingRounds); i++){
1684 std::cout <<
"Rank " << comm->getRank()
1685 <<
": recolor " << vertsPerRound[i]
1686 <<
" vertices in round " << i << std::endl;
1687 std::cout <<
"Rank " << comm->getRank()
1688 <<
" sentbuf had " << sentPerRound[i]
1689 <<
" entries in round " << i << std::endl;
1690 if(comm->getRank()==0){
1691 std::cout <<
"recolored " << totalVertsPerRound[i]
1692 <<
" vertices in round " << i << std::endl;
1693 std::cout << totalIncorrectGhostsPerRound[i]
1694 <<
" inconsistent ghosts in round " << i << std::endl;
1695 std::cout <<
"total time in round " << i
1696 <<
": " << finalTotalPerRound[i] << std::endl;
1697 std::cout <<
"recoloring time in round " << i
1698 <<
": " << maxRecoloringPerRound[i] << std::endl;
1699 std::cout <<
"min recoloring time in round " << i
1700 <<
": " << minRecoloringPerRound[i] << std::endl;
1701 std::cout <<
"conflict detection time in round " << i
1702 <<
": " << finalConflictDetectionPerRound[i] << std::endl;
1703 std::cout <<
"comm time in round " << i
1704 <<
": " << finalCommPerRound[i] << std::endl;
1705 std::cout <<
"recvbuf size in round " << i
1706 <<
": " << finalRecvPerRound[i] << std::endl;
1707 std::cout <<
"sendbuf size in round " << i
1708 <<
": " << finalSentPerRound[i] << std::endl;
1709 std::cout <<
"comp time in round " << i
1710 <<
": " << finalCompPerRound[i] << std::endl;
1714 double global_total_time = 0.0;
1715 double global_recoloring_time = 0.0;
1716 double global_min_recoloring_time = 0.0;
1717 double global_conflict_detection=0.0;
1718 double global_comm_time=0.0;
1719 double global_comp_time=0.0;
1720 double global_interior_time=0.0;
1721 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,1,&total_time,&global_total_time);
1722 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,1,&recoloring_time,&global_recoloring_time);
1723 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MIN,1,&recoloring_time,&global_min_recoloring_time);
1724 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,1,&conflict_detection,&global_conflict_detection);
1725 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,1,&comm_time,&global_comm_time);
1726 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,1,&comp_time,&global_comp_time);
1727 Teuchos::reduceAll<int,double>(*
comm, Teuchos::REDUCE_MAX,1,&interior_time,&global_interior_time);
1730 if(comm->getRank()==0){
1731 std::cout <<
"Total Time: " << global_total_time << std::endl;
1732 std::cout <<
"Interior Time: " << global_interior_time << std::endl;
1733 std::cout <<
"Recoloring Time: " << global_recoloring_time << std::endl;
1734 std::cout <<
"Min Recoloring Time: " << global_min_recoloring_time << std::endl;
1735 std::cout <<
"Conflict Detection Time: " << global_conflict_detection << std::endl;
1736 std::cout <<
"Comm Time: " << global_comm_time << std::endl;
1737 std::cout <<
"Comp Time: " << global_comp_time << std::endl;
Zoltan2::BaseAdapter< userTypes_t > base_adapter_t
Tpetra::FEMultiVector< femv_scalar_t, lno_t, gno_t > femv_t
typename Adapter::gno_t gno_t
virtual void detectConflicts(const size_t n_local, Kokkos::View< offset_t *, device_type > dist_offsets_dev, Kokkos::View< lno_t *, device_type > dist_adjs_dev, Kokkos::View< int *, device_type > femv_colors, Kokkos::View< lno_t *, device_type > boundary_verts_view, Kokkos::View< lno_t *, device_type > verts_to_recolor_view, Kokkos::View< int *, device_type, Kokkos::MemoryTraits< Kokkos::Atomic >> verts_to_recolor_size_atomic, Kokkos::View< lno_t *, device_type > verts_to_send_view, Kokkos::View< size_t *, device_type, Kokkos::MemoryTraits< Kokkos::Atomic >> verts_to_send_size_atomic, Kokkos::View< size_t *, device_type > recoloringSize, Kokkos::View< int *, device_type > rand, Kokkos::View< gno_t *, device_type > gid, Kokkos::View< gno_t *, device_type > ghost_degrees, bool recolor_degrees)=0
typename Adapter::lno_t lno_t
std::bitset< NUM_MODEL_FLAGS > modelFlag_t
map_t::global_ordinal_type gno_t
typename Adapter::offset_t offset_t
AlgTwoGhostLayer(const RCP< const base_adapter_t > &adapter_, const RCP< Teuchos::ParameterList > &pl_, const RCP< Environment > &env_, const RCP< const Teuchos::Comm< int > > &comm_)
virtual void constructBoundary(const size_t n_local, Kokkos::View< offset_t *, device_type > dist_offsets_dev, Kokkos::View< lno_t *, device_type > dist_adjs_dev, typename Kokkos::View< offset_t *, device_type >::HostMirror dist_offsets_host, typename Kokkos::View< lno_t *, device_type >::HostMirror dist_adjs_host, Kokkos::View< lno_t *, device_type > &boundary_verts, Kokkos::View< lno_t *, device_type > verts_to_send_view, Kokkos::View< size_t *, device_type, Kokkos::MemoryTraits< Kokkos::Atomic >> verts_to_send_size_atomic)=0
algorithm requires no self edges
typename femv_t::host_view_type::device_type::memory_space host_mem
void color(const RCP< ColoringSolution< Adapter > > &solution)
Coloring method.
RCP< Teuchos::ParameterList > pl
Algorithm defines the base class for all algorithms.
map_t::local_ordinal_type lno_t
typename femv_t::device_type device_type
void twoGhostLayer(const size_t n_local, const size_t n_total, const Teuchos::ArrayView< const lno_t > &adjs, const Teuchos::ArrayView< const offset_t > &offsets, const Teuchos::ArrayView< const lno_t > &ghost_adjs, const Teuchos::ArrayView< const offset_t > &ghost_offsets, const Teuchos::RCP< femv_t > &femv, const Teuchos::ArrayView< const gno_t > &gids, const Teuchos::ArrayView< const int > &rand, const Teuchos::ArrayView< const int > &ghost_owners, RCP< const map_t > mapOwnedPlusGhosts, const std::unordered_map< lno_t, std::vector< int >> &procs_to_send)
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS's idx_t...
typename femv_t::host_view_type::device_type::execution_space host_exec
RCP< const base_adapter_t > adapter
GraphModel defines the interface required for graph models.
RCP< const Teuchos::Comm< int > > comm
Defines the ColoringSolution class.
Tpetra::global_size_t global_size_t
virtual void detectConflicts_serial(const size_t n_local, typename Kokkos::View< offset_t *, device_type >::HostMirror dist_offsets_host, typename Kokkos::View< lno_t *, device_type >::HostMirror dist_adjs_host, typename Kokkos::View< int *, device_type >::HostMirror femv_colors, typename Kokkos::View< lno_t *, device_type >::HostMirror boundary_verts_view, typename Kokkos::View< lno_t *, device_type >::HostMirror verts_to_recolor_view, typename Kokkos::View< int *, device_type >::HostMirror verts_to_recolor_size_atomic, typename Kokkos::View< lno_t *, device_type >::HostMirror verts_to_send_view, typename Kokkos::View< size_t *, device_type >::HostMirror verts_to_send_size_atomic, typename Kokkos::View< size_t *, device_type >::HostMirror recoloringSize, typename Kokkos::View< int *, device_type >::HostMirror rand, typename Kokkos::View< gno_t *, device_type >::HostMirror gid, typename Kokkos::View< gno_t *, device_type >::HostMirror ghost_degrees, bool recolor_degrees)=0
Defines the GraphModel interface.
A gathering of useful namespace methods.
typename Adapter::base_adapter_t base_adapter_t
typename device_type::execution_space execution_space
Tpetra::Map< lno_t, gno_t > map_t
typename Adapter::scalar_t scalar_t
typename device_type::memory_space memory_space
AlltoAll communication methods.
The class containing coloring solution.