60 for (
int i = 0; i < from->
nrecvs; i++) {
87 #define PRINT_VECTOR(v) \
88 if(v != Teuchos::null) { \
89 std::cout << " " << #v << " "; \
90 for(Teuchos::ArrayRCP<int>::size_type n = 0; n < v.size(); ++n) { \
91 std::cout << v[n] << " "; \
93 std::cout << std::endl; \
96 #define PRINT_VAL(val) std::cout << " " << #val << ": " << val << std::endl;
98 for(
int proc = 0; proc <
comm->getSize(); ++proc) {
100 if(proc ==
comm->getRank()) {
102 std::cout <<
"Rank " << proc <<
" " << headerMessage << std::endl;
135 const Teuchos::ArrayRCP<int> &assign,
136 Teuchos::RCP<
const Teuchos::Comm<int> > comm,
141 if (comm == Teuchos::null){
142 throw std::logic_error(
"Invalid communicator: MPI_COMM_NULL.");
145 int my_proc = comm->getRank();
146 int nprocs = comm->getSize();
153 Teuchos::ArrayRCP<int> starts(
new int[nprocs + 1], 0, nprocs + 1,
true);
154 for(
int n = 0; n < starts.size(); ++n) {
163 int no_send_buff = 1;
165 int prev_proc = nprocs;
167 for (
int i = 0; i < nvals; i++) {
168 int proc = assign[i];
169 if (no_send_buff && proc != prev_proc) {
170 if (proc >= 0 && (starts[proc] || prev_proc < 0)) {
183 int self_msg = (starts[my_proc] != 0);
185 Teuchos::ArrayRCP<int> lengths_to;
186 Teuchos::ArrayRCP<int> procs_to;
187 Teuchos::ArrayRCP<int> starts_to;
188 Teuchos::ArrayRCP<int> indices_to;
190 int max_send_size = 0;
197 for (
int i = 0; i < nprocs; i++) {
198 if (starts[i] != 0) ++nsends;
201 lengths_to.resize(nsends);
202 starts_to.resize(nsends);
203 procs_to.resize(nsends);
207 for (
int i = 0; i < nsends; i++) {
208 starts_to[i] = index;
209 int proc = assign[index];
211 index += starts[proc];
217 sort_ints(procs_to, starts_to);
220 for (
int i = 0; i < nsends; i++) {
221 int proc = procs_to[i];
222 lengths_to[i] = starts[proc];
223 if (proc != my_proc && lengths_to[i] > max_send_size) {
224 max_send_size = lengths_to[i];
230 nsends = (starts[0] != 0);
231 for (
int i = 1; i < nprocs; i++) {
234 starts[i] += starts[i - 1];
237 for (
int i = nprocs - 1; i; i--) {
238 starts[i] = starts[i - 1];
243 indices_to = (nactive > 0) ?
244 Teuchos::arcp(
new int[nactive], 0, nactive,
true) : Teuchos::null;
246 for (
int i = 0; i < nvals; i++) {
247 int proc = assign[i];
249 indices_to[starts[proc]] = i;
256 for (
int i = nprocs - 1; i; i--) {
257 starts[i] = starts[i - 1];
260 starts[nprocs] = nactive;
264 lengths_to.resize(nsends);
265 starts_to.resize(nsends);
266 procs_to.resize(nsends);
270 for (
int i = 0; i < nprocs; i++) {
271 if (starts[i + 1] != starts[i]) {
272 starts_to[j] = starts[i];
273 lengths_to[j] = starts[i + 1] - starts[i];
274 if (i != my_proc && lengths_to[j] > max_send_size) {
275 max_send_size = lengths_to[j];
287 Teuchos::ArrayRCP<int> lengths_from;
288 Teuchos::ArrayRCP<int> procs_from;
291 int comm_flag = invert_map(lengths_to, procs_to, nsends, self_msg,
292 lengths_from, procs_from, &nrecvs, my_proc, nprocs,
293 out_of_mem, tag, comm);
296 Teuchos::ArrayRCP<int> starts_from(
297 new int[nrecvs + self_msg], 0, nrecvs + self_msg,
true);
299 for (
int i = 0; i < nrecvs + self_msg; i++) {
301 j += lengths_from[i];
304 if (comm_flag != 0) {
305 throw std::logic_error(
"Failed to construct Zoltan2_Directory_Comm");
308 int total_recv_size = 0;
309 for (
int i = 0; i < nrecvs + self_msg; i++) {
310 total_recv_size += lengths_from[i];
321 plan_forward->
nvals = nvals;
323 plan_forward->
nrecvs = nrecvs;
324 plan_forward->
nsends = nsends;
329 plan_forward->
comm = comm;
333 throw std::logic_error(
"UNTESTED COMM 2");
340 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, 1, &nrecvs, &global_nrecvs);
354 nrec = total_recv_size;
362 int Zoltan2_Directory_Comm::invert_map(
363 const Teuchos::ArrayRCP<int> &lengths_to,
364 const Teuchos::ArrayRCP<int> &procs_to,
367 Teuchos::ArrayRCP<int> &lengths_from,
368 Teuchos::ArrayRCP<int> &procs_from,
374 Teuchos::RCP<
const Teuchos::Comm<int> > comm)
376 Teuchos::ArrayRCP<int> msg_count(
new int[nprocs], 0, nprocs,
true);
377 Teuchos::ArrayRCP<int> counts(
new int[nprocs], 0, nprocs,
true);
378 for(
int i = 0; i < nprocs; ++i) {
383 for (
int i = 0; i < nsends + self_msg; i++) {
384 msg_count[procs_to[i]] = 1;
397 Teuchos::reduceAll<int>(*comm, Teuchos::REDUCE_SUM, nprocs,
398 msg_count.getRawPtr(), counts.getRawPtr());
403 MPI_Scatter(&(counts[0]), 1, MPI_INT, &nrecvs, 1, MPI_INT, 0, getRawComm());
407 for (
int i=0; i < nprocs; i++) {
408 if (counts[i] > max_nrecvs) {
409 max_nrecvs = counts[i];
416 MPI_Bcast(&max_nrecvs, 1, MPI_INT, 0, getRawComm());
419 lengths_from.resize(nrecvs);
420 procs_from.resize(nrecvs);
425 std::vector<MPI_Request> req(nrecvs);
432 for (
int i=0; i < nrecvs; i++) {
433 MPI_Irecv(&(lengths_from[0]) + i, 1, MPI_INT, MPI_ANY_SOURCE,
434 tag, getRawComm(), &(req[i]));
438 for (
int i=0; i < nsends+self_msg; i++) {
442 MPI_Send(const_cast<int*>(&lengths_to[i]), 1, MPI_INT, procs_to[i], tag,
446 for (
int i=0; i < nrecvs; i++) {
448 MPI_Wait(&(req[i]), &status);
449 procs_from[i] = status.MPI_SOURCE;
454 throw std::logic_error(
"UNTESTED COMM 3");
456 Teuchos::ArrayRCP<int> sendbuf(
new int[nprocs], 0, nprocs,
true);
457 Teuchos::ArrayRCP<int> recvbuf(
new int[nprocs], 0, nprocs,
true);
459 for (
int i=0; i < nsends + self_msg; i++) {
460 sendbuf[procs_to[i]] = lengths_to[i];
464 MPI_Alltoall(&(sendbuf[0]), 1, MPI_INT, &(recvbuf[0]), 1, MPI_INT,
467 for (
int i=0, j=0; i < nprocs; i++) {
469 lengths_from[j] = recvbuf[i];
479 sort_ints(procs_from, lengths_from);
481 *pnrecvs = nrecvs - self_msg;
486 int Zoltan2_Directory_Comm::sort_ints(
487 Teuchos::ArrayRCP<int> &vals_sort,
488 Teuchos::ArrayRCP<int> &vals_other)
491 if (vals_sort == Teuchos::null || vals_sort.size() == 0) {
494 if (vals_other == Teuchos::null || vals_other.size() == 0) {
497 if (vals_sort == Teuchos::null || vals_sort.size() == 1) {
502 int already_sorted = 1;
505 int top = vals_sort[0];
506 for (Teuchos::ArrayRCP<int>::size_type i = 1; i < vals_sort.size(); i++) {
507 if (vals_sort[i-1] > vals_sort[i]) {
510 if (top < vals_sort[i]) {
515 if (already_sorted) {
519 Teuchos::ArrayRCP<int> store(
new int[top+2], 0, top+2,
true);
520 for(
int n = 0; n < store.size(); ++n) {
524 Teuchos::ArrayRCP<int> copy_sort(
new int[vals_sort.size()], 0, vals_sort.size(),
true);
525 for(Teuchos::ArrayRCP<int>::size_type n = 0; n < copy_sort.size(); ++n) {
526 copy_sort[n] = vals_sort[n];
529 Teuchos::ArrayRCP<int> copy_other(
new int[vals_other.size()], 0, vals_other.size(),
true);
530 for(Teuchos::ArrayRCP<int>::size_type n = 0; n < copy_other.size(); ++n) {
531 copy_other[n] = vals_other[n];
536 int *p = &(store[1]);
537 for (Teuchos::ArrayRCP<int>::size_type i = 0; i < vals_sort.size(); i++) {
541 for (
int i = 1; i < top+1; i++) {
546 for (Teuchos::ArrayRCP<int>::size_type i = 0; i < vals_sort.size(); i++) {
547 vals_sort[p[copy_sort[i]]] = copy_sort[i];
548 vals_other[p[copy_sort[i]]] = copy_other[i];
557 const Teuchos::ArrayRCP<char> &send_data,
559 Teuchos::ArrayRCP<char> &recv_data)
564 status = do_post (plan_forward, tag, send_data, nbytes, recv_data);
566 status = do_wait (plan_forward, tag, send_data, nbytes, recv_data);
570 status = do_all_to_all(plan_forward, send_data, nbytes, recv_data);
576 int Zoltan2_Directory_Comm::do_post(
579 const Teuchos::ArrayRCP<char> &send_data,
581 Teuchos::ArrayRCP<char> &recv_data)
585 throw std::logic_error(
"Communication plan = NULL");
590 throw std::logic_error(
"UNTESTED COMM 4");
591 return do_all_to_all(plan, send_data, nbytes, recv_data);
594 int my_proc = plan->
comm->getRank();
596 throw std::logic_error(
"UNTESTED COMM 5");
604 throw std::logic_error(
"nsends not zero, but send_data = NULL");
607 if ((plan->
nrecvs + plan->
self_msg) && recv_data == Teuchos::null) {
608 throw std::logic_error(
"UNTESTED COMM 6");
614 throw std::logic_error(
"nrecvs not zero, but recv_data = NULL");
625 plan->
recv_buff = Teuchos::arcp(
new char[rsize], 0, rsize,
true);
628 size_t self_recv_address = 0;
637 (MPI_Datatype) MPI_BYTE, plan->
procs_from[i], tag,
638 getRawComm(), &plan->
request[k]);
642 self_recv_address = (size_t)(plan->
starts_from[i]) * (size_t)nbytes;
657 tag, getRawComm(), &plan->
request[k]);
660 plan->
request[k] = MPI_REQUEST_NULL;
670 Teuchos::ArrayRCP<char> send_buff;
684 int global_out_of_mem;
685 Teuchos::reduceAll(*plan->
comm, Teuchos::REDUCE_SUM, 1, &out_of_mem,
695 while (proc_index < nblocks && plan->procs_to[proc_index] < my_proc) {
698 if (proc_index == nblocks) {
705 for (
int i = proc_index, j = 0; j < nblocks; j++) {
709 (
void *) &send_data[(
size_t)(plan->
starts_to[i])*(
size_t)nbytes],
710 plan->
lengths_to[i] * nbytes, (MPI_Datatype) MPI_BYTE,
711 plan->
procs_to[i], tag, getRawComm());
716 if (++i == nblocks) {
730 send_data.getRawPtr()+(size_t)(plan->
starts_to[self_num])*(size_t)nbytes,
731 (
size_t) (plan->
lengths_to[self_num]) * (
size_t) nbytes);
737 for (
int i = proc_index, jj = 0; jj < nblocks; jj++) {
742 for (
int k = 0; k < plan->
lengths_to[i]; k++) {
743 memcpy(&send_buff[offset],
744 &send_data[(
size_t)(plan->
indices_to[j++]) * (
size_t)nbytes], nbytes);
748 MPI_Rsend((
void *) &(send_buff[0]), plan->
lengths_to[i] * nbytes,
749 (MPI_Datatype) MPI_BYTE, plan->
procs_to[i], tag,
761 for (
int k = 0; k < plan->
lengths_to[self_num]; k++) {
764 (
size_t)(plan->
indices_to[self_index++]) * (
size_t)nbytes],
766 self_recv_address += nbytes;
774 for (
int i = proc_index, j = 0; j < nblocks; j++) {
778 MPI_Rsend((
void *) &send_data[(
size_t)(
781 (MPI_Datatype) MPI_BYTE, plan->
procs_to[i],
793 char* lrecv = &plan->
getRecvBuff().getRawPtr()[self_recv_address];
795 &send_data.getRawPtr()[(size_t)(plan->
starts_to_ptr[self_num]) * (size_t)nbytes];
796 int sindex = plan->
sizes_to[self_num], idx;
797 for (idx=0; idx<nbytes; idx++) {
798 memcpy(lrecv, lsend, sindex);
807 for (
int i = proc_index, jj = 0; jj < nblocks; jj++) {
811 for (
int k = 0; k < plan->
lengths_to[i]; k++) {
813 memcpy(&send_buff[offset],
823 MPI_Rsend((
void *) &(send_buff[0]), plan->
sizes_to[i] * nbytes,
824 (MPI_Datatype) MPI_BYTE, plan->
procs_to[i], tag, getRawComm());
836 for (
int k = 0; k < plan->
lengths_to[self_num]; k++) {
838 char* lrecv = &(plan->
getRecvBuff())[self_recv_address];
839 size_t send_idx = (size_t)kk * (
size_t)nbytes;
840 const char* lsend = &send_data[send_idx];
842 for (idx=0; idx<nbytes; idx++) {
843 memcpy(lrecv, lsend, sindex);
859 int Zoltan2_Directory_Comm::do_wait(
860 Zoltan2_Directory_Plan *plan,
862 const Teuchos::ArrayRCP<char> &,
864 Teuchos::ArrayRCP<char> &recv_data)
867 if (plan->maxed_recvs){
872 int my_proc = plan->comm->getRank();
876 if (plan->indices_from == Teuchos::null) {
877 if (plan->nrecvs > 0) {
879 MPI_Waitall(plan->nrecvs, &plan->request[0], &plan->status[0]);
885 size_t offsetDst = 0;
886 if (plan->self_msg) {
887 for (self_num = 0; self_num < plan->nrecvs + plan->self_msg;
889 if (plan->procs_from[self_num] == my_proc) {
894 if(plan->sizes_from.size()) {
901 memcpy(&recv_data[offsetDst * (
size_t)nbytes],
902 &(plan->getRecvBuff())[plan->starts_from_ptr[self_num] * (
size_t)nbytes],
903 plan->sizes_from[self_num] * (size_t)nbytes);
904 offsetDst += plan->sizes_from[self_num];
907 int k = plan->starts_from[self_num];
908 for (
int j = plan->lengths_from[self_num]; j; j--) {
909 memcpy(&recv_data[(
size_t)(plan->indices_from[k]) * (
size_t)nbytes],
910 &(plan->getRecvBuff())[(
size_t)k * (
size_t)nbytes], nbytes);
916 self_num = plan->nrecvs;
919 for (
int jj = 0; jj < plan->nrecvs; jj++) {
922 MPI_Waitany(plan->nrecvs, &plan->request[0], &index, &status);
925 if (index == MPI_UNDEFINED) {
929 if (index >= self_num) {
933 if(plan->sizes_from.size()) {
940 memcpy(&recv_data[offsetDst * (
size_t)nbytes],
941 &plan->getRecvBuff().getRawPtr()[plan->starts_from_ptr[index] * (size_t)nbytes],
942 plan->sizes_from[index] * (
size_t)nbytes);
943 offsetDst += plan->sizes_from[index];
946 int k = plan->starts_from[index];
947 for (
int j = plan->lengths_from[index]; j; j--) {
948 memcpy(&recv_data.getRawPtr()[(size_t)(plan->indices_from[k]) * (size_t)nbytes],
949 &plan->getRecvBuff().getRawPtr()[(size_t)k * (
size_t)nbytes], nbytes);
964 int Zoltan2_Directory_Comm::do_all_to_all(
965 Zoltan2_Directory_Plan *plan,
966 const Teuchos::ArrayRCP<char> &send_data,
968 Teuchos::ArrayRCP<char> &recv_data)
970 throw std::logic_error(
"UNTESTED COMM 10");
972 int sm = (plan->self_msg > 0) ? 1 : 0;
974 int nSendMsgs = plan->nsends + sm;
975 int nRecvMsgs = plan->nrecvs + sm;
978 for (
int i=0; i <nSendMsgs; i++) {
979 nSendItems += plan->lengths_to[i];
982 for (
int i=0; i <nRecvMsgs; i++) {
983 nRecvItems += plan->lengths_from[i];
986 int nprocs = plan->comm->getSize();
988 Teuchos::ArrayRCP<int> outbufCounts(
new int[nprocs], 0, nprocs,
true);
989 Teuchos::ArrayRCP<int> outbufOffsets(
new int[nprocs], 0, nprocs,
true);
990 Teuchos::ArrayRCP<int> inbufCounts(
new int[nprocs], 0, nprocs,
true);
991 Teuchos::ArrayRCP<int> inbufOffsets(
new int[nprocs], 0, nprocs,
true);
1003 if (plan->indices_to == Teuchos::null){
1005 for (
int i=1; i< nSendMsgs; i++){
1006 if (plan->starts_to[i] < plan->starts_to[i-1]){
1013 Teuchos::ArrayRCP<char> outbuf;
1014 Teuchos::ArrayRCP<char> inbuf;
1015 Teuchos::ArrayRCP<char> buf;
1017 if (plan->sizes_to.size()){
1024 for (
int i = 0; i < nSendMsgs; i++){
1025 outbufLen += plan->sizes_to[i];
1028 if (plan->indices_to != Teuchos::null) {
1032 buf.resize(outbufLen*nbytes);
1033 outbuf.resize(outbufLen*nbytes);
1034 char * pBufPtr = &(outbuf[0]);
1037 for (
int p = 0; p < nprocs; p++) {
1042 if (plan->procs_to[i] == p){
1044 for (
int j=0; j < plan->lengths_to[i]; j++,k++){
1045 int itemSize = plan->sizes[plan->indices_to[k]] * nbytes;
1046 int offset = plan->indices_to_ptr[k] * nbytes;
1048 memcpy(pBufPtr, &(send_data[0]) + offset, itemSize);
1050 pBufPtr += itemSize;
1057 outbufCounts[p] = length;
1059 outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
1068 if (!sorted || (plan->nvals > nSendItems) ){
1069 buf.resize(outbufLen*nbytes);
1070 outbuf.resize(outbufLen*nbytes);
1077 for(
int n = 0; n < outbufLen*nbytes; ++n) {
1078 outbuf[n] = send_data[n];
1082 char * pBufPtr = &(outbuf[0]);
1085 for (
int p = 0; p < nprocs; p++) {
1090 if (plan->procs_to[i] == p){
1091 length = plan->sizes_to[i] * nbytes;
1092 int offset = plan->starts_to_ptr[i] * nbytes;
1094 if ((!sorted || (plan->nvals > nSendItems)) && length){
1095 memcpy(pBufPtr, &(send_data[0]) + offset, length);
1102 outbufCounts[p] = length;
1104 outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
1109 else if (plan->indices_to != Teuchos::null) {
1114 buf.resize(nSendItems*nbytes);
1115 outbuf.resize(nSendItems*nbytes);
1116 char * pBufPtr = &(outbuf[0]);
1119 for (
int p = 0; p < nprocs; p++){
1124 if (plan->procs_to[i] == p){
1125 for (
int j=0; j < plan->lengths_to[i]; j++,k++) {
1126 int offset = plan->indices_to[k] * nbytes;
1127 memcpy(pBufPtr, &(send_data[0]) + offset, nbytes);
1130 length = plan->lengths_to[i] * nbytes;
1135 outbufCounts[p] = length;
1137 outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
1147 if (!sorted || (plan->nvals > nSendItems)){
1148 buf.resize(nSendItems*nbytes);
1149 outbuf.resize(nSendItems*nbytes);
1160 char * pBufPtr = &(outbuf[0]);
1163 for (
int p=0; p < nprocs; p++) {
1168 if (plan->procs_to[i] == p){
1169 int offset = plan->starts_to[i] * nbytes;
1170 length = plan->lengths_to[i] * nbytes;
1172 if ((!sorted || (plan->nvals > nSendItems)) && length){
1173 memcpy(pBufPtr, &(send_data[0]) + offset, length);
1180 outbufCounts[p] = length;
1182 outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
1191 if (plan->indices_from == Teuchos::null) {
1193 for (i=1; i< nRecvMsgs; i++) {
1194 if (plan->starts_from[i] < plan->starts_from[i-1]){
1211 inbuf.resize(plan->total_recv_size * nbytes);
1214 for (
int p = 0; p < nprocs; p++) {
1218 if (plan->procs_from[i] == p){
1220 if (!plan->using_sizes){
1221 length = plan->lengths_from[i] * nbytes;
1224 length = plan->sizes_from[i] * nbytes;
1230 inbufCounts[p] = length;
1232 inbufOffsets[p] = inbufOffsets[p-1] + inbufCounts[p-1];
1237 MPI_Alltoallv(&(outbuf[0]), &(outbufCounts[0]), &(outbufOffsets[0]), MPI_BYTE,
1238 &(inbuf[0]), &(inbufCounts[0]), &(inbufOffsets[0]), MPI_BYTE, getRawComm());
1249 char * pBufPtr = &(inbuf[0]);
1251 if (!plan->using_sizes){
1255 if (plan->indices_from == Teuchos::null) {
1256 for (i=0; i < nRecvMsgs; i++){
1257 int offset = plan->starts_from[i] * nbytes;
1258 int length = plan->lengths_from[i] * nbytes;
1259 memcpy(&(recv_data[0]) + offset, pBufPtr, length);
1265 for (i=0; i < nRecvMsgs; i++) {
1267 for (
int j=0; j < plan->lengths_from[i]; j++,k++){
1268 int offset = plan->indices_from[k] * nbytes;
1269 memcpy(&(recv_data[0]) + offset, pBufPtr, nbytes);
1279 for (i=0; i < nRecvMsgs; i++){
1280 int offset = plan->starts_from_ptr[i] * nbytes;
1281 int length = plan->sizes_from[i] * nbytes;
1282 memcpy(&(recv_data[0]) + offset, pBufPtr, length);
1293 const Teuchos::ArrayRCP<char> &send_data,
1295 const Teuchos::ArrayRCP<int> &sizes,
1296 Teuchos::ArrayRCP<char> &recv_data)
1300 int status = create_reverse_plan(tag, sizes);
1311 recv_data = Teuchos::arcp(
new char[new_size], 0, new_size,
true);
1318 throw std::logic_error(
"UNTESTED COMM 11");
1324 status = do_all_to_all(plan_forward->
plan_reverse, send_data,
1330 status = do_post(plan_forward->
plan_reverse, tag, send_data,
1334 status = do_wait (plan_forward->
plan_reverse, tag, send_data,
1340 free_reverse_plan(plan_forward);
1348 throw std::logic_error(
"Plan is NULL!");
1354 int Zoltan2_Directory_Comm::create_reverse_plan(
1356 const Teuchos::ArrayRCP<int> &sizes)
1360 throw std::logic_error(
"memory error");
1364 plan_forward->
plan_reverse =
new Zoltan2_Directory_Plan;
1373 Teuchos::reduceAll<int>(*plan_forward->
comm, Teuchos::REDUCE_SUM, 1,
1374 &plan_forward->
nsends, &global_nsends);
1390 sizes, tag, &sum_recv_sizes);
1392 if (comm_flag != 0) {
1405 const Teuchos::ArrayRCP<int> &sizes,
1407 int *sum_recv_sizes)
1409 return resize(plan_forward, sizes, tag, sum_recv_sizes);
1414 const Teuchos::ArrayRCP<int> &sizes,
1416 int *sum_recv_sizes)
1420 int my_proc = plan->
comm->getRank();
1421 int has_sizes = (sizes.size() != 0);
1425 MPI_Allreduce(&has_sizes, &var_sizes, 1, MPI_INT, MPI_LOR, getRawComm());
1427 if (var_sizes && plan->
indices_from != Teuchos::null) {
1434 int nsends = plan->
nsends;
1435 int nrecvs = plan->
nrecvs;
1438 Teuchos::ArrayRCP<int> sizes_to;
1439 Teuchos::ArrayRCP<int> sizes_from;
1440 Teuchos::ArrayRCP<int> starts_to_ptr;
1441 Teuchos::ArrayRCP<int> starts_from_ptr;
1442 Teuchos::ArrayRCP<int> indices_to_ptr;
1443 Teuchos::ArrayRCP<int> indices_from_ptr;
1447 for (
int i = 0; i < nrecvs + self_msg; i++) {
1452 for (
int i = 0; i < nsends + self_msg; i++) {
1453 if (plan->
procs_to[i] != my_proc &&
1470 plan->
sizes = sizes;
1473 if(nsends + self_msg > 0) {
1474 sizes_to = Teuchos::arcp(
1475 new int[nsends + self_msg], 0, nsends + self_msg,
true);
1476 for(
int n = 0; n < sizes_to.size(); ++n) {
1480 if(nrecvs + self_msg > 0) {
1481 sizes_from = Teuchos::arcp(
1482 new int[nrecvs + self_msg], 0, nrecvs + self_msg,
true);
1492 if(nsends + self_msg > 0) {
1493 starts_to_ptr = Teuchos::arcp(
1494 new int[nsends + self_msg], 0, nsends + self_msg,
true);
1498 Teuchos::ArrayRCP<int> index;
1499 Teuchos::ArrayRCP<int> sort_val;
1500 if(nsends + self_msg > 0) {
1501 index = Teuchos::arcp(
1502 new int[nsends + self_msg], 0, nsends + self_msg,
true);
1503 sort_val = Teuchos::arcp(
1504 new int[nsends + self_msg], 0, nsends + self_msg,
true);
1506 for (
int i = 0; i < nsends + self_msg; i++) {
1509 for (
int k = 0; k < plan->
lengths_to[i]; k++) {
1510 sizes_to[i] += sizes[j++];
1516 for (
int i = 0; i < nsends + self_msg; i++) {
1520 sort_ints(sort_val, index);
1522 for (
int i = 0; i < nsends + self_msg; i++) {
1523 starts_to_ptr[index[i]] = sum;
1524 sum += sizes_to[index[i]];
1528 Teuchos::ArrayRCP<int> offset;
1529 if(plan->
nvals > 0) {
1530 offset = Teuchos::arcp(
new int[plan->
nvals], 0, plan->
nvals,
true);
1532 indices_to_ptr.resize(plan->
nvals);
1536 for (
int i = 0; i < plan->
nvals; i++) {
1543 for (
int i = 0; i < nsends + self_msg; i++) {
1544 starts_to_ptr[i] = sum;
1546 for (
int k = 0; k < plan->
lengths_to[i]; k++) {
1547 indices_to_ptr[j] = offset[plan->
indices_to[j]];
1560 exchange_sizes(sizes_to, plan->
procs_to, nsends, self_msg,
1564 if(nrecvs + self_msg > 0) {
1565 starts_from_ptr = Teuchos::arcp(
1566 new int[nrecvs + self_msg], 0, nrecvs + self_msg,
true);
1571 Teuchos::ArrayRCP<int> index;
1572 Teuchos::ArrayRCP<int> sort_val;
1573 if(nrecvs + self_msg > 0) {
1574 index = Teuchos::arcp(
1575 new int[nrecvs + self_msg], 0, nrecvs + self_msg,
true);
1576 sort_val = Teuchos::arcp<int>(
1577 new int[nrecvs + self_msg], 0, nrecvs + self_msg,
true);
1580 for (
int i = 0; i < nrecvs + self_msg; i++) {
1584 sort_ints(sort_val, index);
1587 for (
int i = 0; i < nrecvs + self_msg; i++) {
1588 starts_from_ptr[index[i]] = sum;
1589 sum += sizes_from[index[i]];
1609 Teuchos::ArrayRCP<int> index;
1610 Teuchos::ArrayRCP<int> sort_val;
1611 if(nrecvs + self_msg > 0) {
1612 index = Teuchos::arcp(
1613 new int[nrecvs + self_msg], 0, nrecvs + self_msg,
true);
1614 sort_val = Teuchos::arcp(
1615 new int[nrecvs + self_msg], 0, nrecvs + self_msg,
true);
1618 for (
int i = 0; i < nrecvs + self_msg; i++) {
1622 sort_ints(sort_val, index);
1625 for (
int i = 0; i < nrecvs + self_msg; i++) {
1626 starts_from_ptr[index[i]] = sum;
1627 sum += sizes_from[index[i]];
1638 if (sum_recv_sizes) {
1645 int Zoltan2_Directory_Comm::exchange_sizes(
1646 const Teuchos::ArrayRCP<int> &sizes_to,
1647 const Teuchos::ArrayRCP<int> &procs_to,
1650 Teuchos::ArrayRCP<int> &sizes_from,
1651 const Teuchos::ArrayRCP<int> &procs_from,
1653 int *total_recv_size,
1656 Teuchos::RCP<
const Teuchos::Comm<int> > ) {
1659 int self_index_to = -1;
1660 for (
int i = 0; i < nsends + self_msg; i++) {
1661 if (procs_to[i] != my_proc) {
1663 MPI_Send((
void *) &sizes_to[i], 1, MPI_INT, procs_to[i], tag, getRawComm());
1670 *total_recv_size = 0;
1672 for (
int i = 0; i < nrecvs + self_msg; i++) {
1673 if (procs_from[i] != my_proc) {
1676 MPI_Recv((
void *) &(sizes_from[i]), 1, MPI_INT, procs_from[i],
1677 tag, getRawComm(), &status);
1680 sizes_from[i] = sizes_to[self_index_to];
1682 *total_recv_size += sizes_from[i];
Teuchos::ArrayRCP< int > procs_from
Teuchos::ArrayRCP< int > indices_to
Teuchos::ArrayRCP< int > lengths_to
Teuchos::ArrayRCP< char > recv_buff
Teuchos::ArrayRCP< int > indices_from_ptr
Teuchos::ArrayRCP< int > starts_from
void print(const std::string &headerMessage) const
Teuchos::ArrayRCP< int > procs_to
int do_reverse(int tag, const Teuchos::ArrayRCP< char > &send_data, int nbytes, const Teuchos::ArrayRCP< int > &sizes, Teuchos::ArrayRCP< char > &recv_data)
Teuchos::ArrayRCP< char > getRecvBuff() const
std::vector< MPI_Request > request
~Zoltan2_Directory_Comm()
Teuchos::ArrayRCP< int > starts_to
Teuchos::ArrayRCP< int > indices_to_ptr
Teuchos::ArrayRCP< int > indices_from
Teuchos::ArrayRCP< int > starts_from_ptr
Teuchos::ArrayRCP< int > sizes_from
std::vector< MPI_Status > status
Teuchos::ArrayRCP< int > sizes_to
int do_forward(int tag, const Teuchos::ArrayRCP< char > &send_data, int nbytes, Teuchos::ArrayRCP< char > &recv_data)
int resize(const Teuchos::ArrayRCP< int > &sizes, int tag, int *sum_recv_sizes)
Teuchos::ArrayRCP< int > lengths_from
Teuchos::ArrayRCP< int > sizes
Zoltan2_Directory_Plan * plan_reverse
Teuchos::RCP< const Teuchos::Comm< int > > comm
Zoltan2_Directory_Comm(int nvals, const Teuchos::ArrayRCP< int > &assign, Teuchos::RCP< const Teuchos::Comm< int > > comm, int tag)
Teuchos::ArrayRCP< int > starts_to_ptr
void getInvertedValues(Zoltan2_Directory_Plan *from)