43 #include "SubdomainGraph_dh.h"
44 #include "getRow_dh.h"
46 #include "Parser_dh.h"
47 #include "Hash_i_dh.h"
48 #include "mat_dh_private.h"
50 #include "SortedSet_dh.h"
51 #include "shellSort_dh.h"
76 int *interiorNodes,
int *bdryNodes,
77 int *interiorCount,
int *bdryCount);
79 void *A,
int *interiorNodes,
80 int *bdryNodes,
int *interiorCount,
91 #define __FUNC__ "SubdomainGraph_dhCreate"
102 tmp->ptrs = tmp->adj = NULL;
104 tmp->colorVec = NULL;
105 tmp->o2n_sub = tmp->n2o_sub = NULL;
106 tmp->beg_row = tmp->beg_rowP = NULL;
107 tmp->bdry_count = tmp->row_count = NULL;
108 tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
109 tmp->loCount = tmp->hiCount = tmp->allCount = 0;
112 tmp->n2o_row = tmp->o2n_col = NULL;
113 tmp->o2n_ext = tmp->n2o_ext = NULL;
115 tmp->doNotColor = Parser_dhHasSwitch (parser_dh,
"-doNotColor");
116 tmp->debug = Parser_dhHasSwitch (parser_dh,
"-debug_SubGraph");
119 for (i = 0; i < TIMING_BINS_SG; ++i)
120 tmp->timing[i] = 0.0;
125 #define __FUNC__ "SubdomainGraph_dhDestroy"
129 START_FUNC_DH
if (s->ptrs != NULL)
139 if (s->colorVec != NULL)
141 FREE_DH (s->colorVec);
144 if (s->o2n_sub != NULL)
146 FREE_DH (s->o2n_sub);
149 if (s->n2o_sub != NULL)
151 FREE_DH (s->n2o_sub);
155 if (s->beg_row != NULL)
157 FREE_DH (s->beg_row);
160 if (s->beg_rowP != NULL)
162 FREE_DH (s->beg_rowP);
165 if (s->row_count != NULL)
167 FREE_DH (s->row_count);
170 if (s->bdry_count != NULL)
172 FREE_DH (s->bdry_count);
175 if (s->loNabors != NULL)
177 FREE_DH (s->loNabors);
180 if (s->hiNabors != NULL)
182 FREE_DH (s->hiNabors);
185 if (s->allNabors != NULL)
187 FREE_DH (s->allNabors);
191 if (s->n2o_row != NULL)
193 FREE_DH (s->n2o_row);
196 if (s->o2n_col != NULL)
198 FREE_DH (s->o2n_col);
201 if (s->o2n_ext != NULL)
203 Hash_i_dhDestroy (s->o2n_ext);
206 if (s->n2o_ext != NULL)
208 Hash_i_dhDestroy (s->n2o_ext);
216 #define __FUNC__ "SubdomainGraph_dhInit"
220 START_FUNC_DH
double t1 = MPI_Wtime ();
225 if (np_dh == 1 || blocks > 1)
228 init_seq_private (s, blocks, bj, A);
234 init_mpi_private (s, np_dh, bj, A);
238 s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
243 #define __FUNC__ "SubdomainGraph_dhFindOwner"
247 START_FUNC_DH
int sd;
248 int *beg_row = s->beg_row;
249 int *row_count = s->row_count;
250 int owner = -1, blocks = s->blocks;
253 beg_row = s->beg_rowP;
256 for (sd = 0; sd < blocks; ++sd)
258 if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
268 fprintf (stderr,
"@@@ failed to find owner for idx = %i @@@\n", idx);
269 fprintf (stderr,
"blocks= %i\n", blocks);
271 sprintf (msgBuf_dh,
"failed to find owner for idx = %i", idx);
272 SET_ERROR (-1, msgBuf_dh);
275 END_FUNC_VAL (owner)}
279 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
283 START_FUNC_DH
int i, j, k;
284 double max = 0, min = INT_MAX;
287 "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
288 fprintf (fp,
"colors used = %i\n", s->colors);
289 fprintf (fp,
"subdomain count = %i\n", s->blocks);
292 fprintf (fp,
"\ninterior/boundary node ratios:\n");
294 for (i = 0; i < s->blocks; ++i)
296 int inNodes = s->row_count[i] - s->bdry_count[i];
297 int bdNodes = s->bdry_count[i];
306 ratio = (double) inNodes / (
double) bdNodes;
309 max = MAX (max, ratio);
310 min = MIN (min, ratio);
312 " P_%i: first= %3i rowCount= %3i interior= %3i bdry= %3i ratio= %0.1f\n",
313 i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
318 fprintf (fp,
"\nmax interior/bdry ratio = %.1f\n", max);
319 fprintf (fp,
"min interior/bdry ratio = %.1f\n", min);
327 fprintf (fp,
"\nunpermuted subdomain graph: \n");
328 for (i = 0; i < s->blocks; ++i)
330 fprintf (fp,
"%i :: ", i);
331 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
333 fprintf (fp,
"%i ", s->adj[j]);
343 fprintf (fp,
"\no2n subdomain permutation:\n");
344 for (i = 0; i < s->blocks; ++i)
346 fprintf (fp,
" %i %i\n", i, s->o2n_sub[i]);
356 fprintf (fp,
"\nlocal n2o_row permutation:\n ");
357 for (i = 0; i < s->row_count[myid_dh]; ++i)
359 fprintf (fp,
"%i ", s->n2o_row[i]);
366 fprintf (fp,
"\nlocal o2n_col permutation:\n ");
367 for (i = 0; i < s->row_count[myid_dh]; ++i)
369 fprintf (fp,
"%i ", s->o2n_col[i]);
379 fprintf (fp,
"\nlocal n2o_row permutation:\n");
380 fprintf (fp,
"--------------------------\n");
381 for (k = 0; k < s->blocks; ++k)
383 int beg_row = s->beg_row[k];
384 int end_row = beg_row + s->row_count[k];
386 for (i = beg_row; i < end_row; ++i)
388 fprintf (fp,
"%i ", s->n2o_row[i]);
396 fprintf (fp,
"\nlocal o2n_col permutation:\n");
397 fprintf (fp,
"--------------------------\n");
398 for (k = 0; k < s->blocks; ++k)
400 int beg_row = s->beg_row[k];
401 int end_row = beg_row + s->row_count[k];
403 for (i = beg_row; i < end_row; ++i)
405 fprintf (fp,
"%i ", s->o2n_col[i]);
416 #define __FUNC__ "init_seq_private"
420 START_FUNC_DH
int m, n, beg_row;
428 EuclidGetDimensions (A, &beg_row, &m, &n);
437 allocate_storage_private (s, blocks, m, bj);
448 int rpp = m / blocks;
450 if (rpp * blocks < m)
454 for (i = 1; i < blocks; ++i)
455 s->beg_row[i] = rpp + s->beg_row[i - 1];
456 for (i = 0; i < blocks; ++i)
457 s->row_count[i] = rpp;
458 s->row_count[blocks - 1] = m - rpp * (blocks - 1);
460 memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (
int));
481 find_bdry_nodes_seq_private (s, m, A);
487 for (i = 0; i < m; ++i)
493 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
507 form_subdomaingraph_seq_private (s, m, A);
512 printf_dh (
"subdomain coloring and reordering is OFF\n");
513 for (i = 0; i < blocks; ++i)
522 SET_INFO (
"subdomain coloring and reordering is ON");
523 color_subdomain_graph_private (s);
532 for (i = 0; i < blocks; ++i)
538 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
549 adjust_matrix_perms_private (s, m);
574 #define __FUNC__ "partition_metis_private"
578 START_FUNC_DH
if (ignoreMe)
579 SET_V_ERROR (
"not implemented");
584 #define __FUNC__ "allocate_storage_private"
588 START_FUNC_DH
if (!bj)
590 s->ptrs = (
int *) MALLOC_DH ((blocks + 1) *
sizeof (int));
593 s->colorVec = (
int *) MALLOC_DH (blocks *
sizeof (
int));
595 s->loNabors = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
597 s->hiNabors = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
599 s->allNabors = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
603 s->n2o_row = (
int *) MALLOC_DH (m *
sizeof (
int));
605 s->o2n_col = (
int *) MALLOC_DH (m *
sizeof (
int));
610 s->beg_row = (
int *) MALLOC_DH ((blocks) *
sizeof (int));
612 s->beg_rowP = (
int *) MALLOC_DH ((blocks) *
sizeof (int));
614 s->row_count = (
int *) MALLOC_DH (blocks *
sizeof (
int));
616 s->bdry_count = (
int *) MALLOC_DH (blocks *
sizeof (
int));
618 s->o2n_sub = (
int *) MALLOC_DH (blocks *
sizeof (
int));
620 s->n2o_sub = (
int *) MALLOC_DH (blocks *
sizeof (
int));
629 #define __FUNC__ "init_mpi_private"
633 START_FUNC_DH
int m, n, beg_row;
637 symmetric = Parser_dhHasSwitch (parser_dh,
"-sym");
639 if (Parser_dhHasSwitch (parser_dh,
"-makeSymmetric"))
648 EuclidGetDimensions (A, &beg_row, &m, &n);
657 allocate_storage_private (s, blocks, m, bj);
668 MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
669 MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
670 memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (
int));
674 s->beg_row[myid_dh] = beg_row;
675 s->beg_rowP[myid_dh] = beg_row;
676 s->row_count[myid_dh] = m;
688 find_all_neighbors_sym_private (s, m, A);
693 find_all_neighbors_unsym_private (s, m, A);
696 s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
711 int *interiorNodes, *bdryNodes;
712 int interiorCount, bdryCount;
713 int *o2n = s->o2n_col, idx;
716 interiorNodes = (
int *) MALLOC_DH (m *
sizeof (
int));
718 bdryNodes = (
int *) MALLOC_DH (m *
sizeof (
int));
726 find_bdry_nodes_sym_private (s, m, A,
727 interiorNodes, bdryNodes,
728 &interiorCount, &bdryCount);
733 find_bdry_nodes_unsym_private (s, m, A,
734 interiorNodes, bdryNodes,
735 &interiorCount, &bdryCount);
740 MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
745 for (i = 0; i < interiorCount; ++i)
747 o2n[interiorNodes[i]] = idx++;
749 for (i = 0; i < bdryCount; ++i)
751 o2n[bdryNodes[i]] = idx++;
755 invert_perm (m, o2n, s->n2o_row);
758 FREE_DH (interiorNodes);
767 int *o2n = s->o2n_col, *n2o = s->n2o_row;
770 for (i = 0; i < m; ++i)
776 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
790 form_subdomaingraph_mpi_private (s);
795 printf_dh (
"subdomain coloring and reordering is OFF\n");
796 for (i = 0; i < blocks; ++i)
805 SET_INFO (
"subdomain coloring and reordering is ON");
806 color_subdomain_graph_private (s);
809 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
819 find_ordered_neighbors_private (s);
832 SubdomainGraph_dhExchangePerms (s);
834 s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
842 #define __FUNC__ "SubdomainGraph_dhExchangePerms"
846 START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
847 MPI_Status *status = NULL;
848 int *nabors = s->allNabors, naborCount = s->allCount;
849 int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
850 int m = s->row_count[myid_dh];
851 int beg_row = s->beg_row[myid_dh];
852 int beg_rowP = s->beg_rowP[myid_dh];
853 int *bdryNodeCounts = s->bdry_count;
854 int myBdryCount = s->bdry_count[myid_dh];
856 int myFirstBdry = m - myBdryCount;
857 int *n2o_row = s->n2o_row;
860 if (logFile != NULL && s->debug)
866 sendBuf = (
int *) MALLOC_DH (2 * myBdryCount *
sizeof (
int));
873 "\nSUBG myFirstBdry= %i myBdryCount= %i m= %i beg_rowP= %i\n",
874 1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
878 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
880 sendBuf[2 * j] = n2o_row[i] + beg_row;
881 sendBuf[2 * j + 1] = i + beg_rowP;
886 fprintf (logFile,
"\nSUBG SEND_BUF:\n");
887 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
889 fprintf (logFile,
"SUBG %i, %i\n", 1 + sendBuf[2 * j],
890 1 + sendBuf[2 * j + 1]);
899 naborIdx = (
int *) MALLOC_DH ((1 + naborCount) *
sizeof (int));
903 for (i = 0; i < naborCount; ++i)
905 nz += (2 * bdryNodeCounts[nabors[i]]);
906 naborIdx[i + 1] = nz;
910 recvBuf = (
int *) MALLOC_DH (nz *
sizeof (
int));
917 recv_req = (MPI_Request *) MALLOC_DH (naborCount *
sizeof (MPI_Request));
919 send_req = (MPI_Request *) MALLOC_DH (naborCount *
sizeof (MPI_Request));
921 status = (MPI_Status *) MALLOC_DH (naborCount *
sizeof (MPI_Status));
924 for (i = 0; i < naborCount; ++i)
926 int nabr = nabors[i];
927 int *buf = recvBuf + naborIdx[i];
928 int ct = 2 * bdryNodeCounts[nabr];
931 MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
936 fprintf (logFile,
"SUBG sending %i elts to %i\n", 2 * myBdryCount,
941 MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
945 fprintf (logFile,
"SUBG receiving %i elts from %i\n", ct, nabr);
950 MPI_Waitall (naborCount, send_req, status);
951 MPI_Waitall (naborCount, recv_req, status);
953 Hash_i_dhCreate (&n2o_table, nz / 2);
955 Hash_i_dhCreate (&o2n_table, nz / 2);
957 s->n2o_ext = n2o_table;
958 s->o2n_ext = o2n_table;
961 for (i = 0; i < nz; i += 2)
963 int old = recvBuf[i];
964 int new = recvBuf[i + 1];
968 fprintf (logFile,
"SUBG i= %i old= %i new= %i\n", i, old + 1,
973 Hash_i_dhInsert (o2n_table, old,
new);
975 Hash_i_dhInsert (n2o_table,
new, old);
985 if (naborIdx != NULL)
995 if (recv_req != NULL)
1000 if (send_req != NULL)
1016 #define __FUNC__ "form_subdomaingraph_mpi_private"
1020 START_FUNC_DH
int *nabors = s->allNabors, nct = s->allCount;
1022 int i, j, nz, *adj, *ptrs = s->ptrs;
1023 MPI_Request *recvReqs = NULL, sendReq;
1024 MPI_Status *statuses = NULL, status;
1029 idxAll = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
1032 MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
1038 for (i = 0; i < np_dh; ++i)
1041 MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
1046 adj = s->adj = (
int *) MALLOC_DH (nz *
sizeof (
int));
1052 recvReqs = (MPI_Request *) MALLOC_DH (np_dh *
sizeof (MPI_Request));
1054 statuses = (MPI_Status *) MALLOC_DH (np_dh *
sizeof (MPI_Status));
1059 for (j = 0; j < np_dh; ++j)
1060 ptrs[j + 1] = ptrs[j] + idxAll[j];
1063 for (j = 0; j < np_dh; ++j)
1067 MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
1073 MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
1078 MPI_Waitall (np_dh, recvReqs, statuses);
1080 MPI_Wait (&sendReq, &status);
1083 MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
1084 MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
1091 if (recvReqs != NULL)
1096 if (statuses != NULL)
1108 #define __FUNC__ "form_subdomaingraph_seq_private"
1112 START_FUNC_DH
int *dense, i, j, row, blocks = s->blocks;
1113 int *cval, len, *adj;
1114 int idx = 0, *ptrs = s->ptrs;
1121 adj = s->adj = (
int *) MALLOC_DH (blocks * blocks *
sizeof (
int));
1124 dense = (
int *) MALLOC_DH (blocks * blocks *
sizeof (
int));
1126 for (i = 0; i < blocks * blocks; ++i)
1130 for (i = 0; i < blocks; ++i)
1132 int beg_row = s->beg_row[i];
1133 int end_row = beg_row + s->row_count[i];
1135 for (row = beg_row; row < end_row; ++row)
1137 EuclidGetRow (A, row, &len, &cval, NULL);
1139 for (j = 0; j < len; ++j)
1142 if (col < beg_row || col >= end_row)
1144 int owner = SubdomainGraph_dhFindOwner (s, col,
false);
1146 dense[i * blocks + owner] = 1;
1147 dense[owner * blocks + i] = 1;
1150 EuclidRestoreRow (A, row, &len, &cval, NULL);
1159 for (i = 0; i < blocks; ++i)
1161 for (j = 0; j < blocks; ++j)
1163 if (dense[i * blocks + j])
1177 #define __FUNC__ "find_all_neighbors_sym_private"
1181 START_FUNC_DH
int *marker, i, j, beg_row, end_row;
1182 int row, len, *cval, ct = 0;
1183 int *nabors = s->allNabors;
1185 marker = (
int *) MALLOC_DH (m *
sizeof (
int));
1187 for (i = 0; i < m; ++i)
1191 (
"finding nabors in subdomain graph for structurally symmetric matrix");
1192 SET_INFO (
"(if this isn't what you want, use '-sym 0' switch)");
1194 beg_row = s->beg_row[myid_dh];
1195 end_row = beg_row + s->row_count[myid_dh];
1197 for (row = beg_row; row < end_row; ++row)
1199 EuclidGetRow (A, row, &len, &cval, NULL);
1201 for (j = 0; j < len; ++j)
1204 if (col < beg_row || col >= end_row)
1206 int owner = SubdomainGraph_dhFindOwner (s, col,
false);
1211 nabors[ct++] = owner;
1215 EuclidRestoreRow (A, row, &len, &cval, NULL);
1230 #define __FUNC__ "find_all_neighbors_unsym_private"
1234 START_FUNC_DH
int i, j, row, beg_row, end_row;
1236 int *cval, len, idx = 0;
1237 int nz, *nabors = s->allNabors, *myNabors;
1239 myNabors = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
1241 marker = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
1243 for (i = 0; i < np_dh; ++i)
1247 (
"finding nabors in subdomain graph for structurally unsymmetric matrix");
1252 beg_row = s->beg_row[myid_dh];
1253 end_row = beg_row + s->row_count[myid_dh];
1258 for (row = beg_row; row < end_row; ++row)
1260 EuclidGetRow (A, row, &len, &cval, NULL);
1262 for (j = 0; j < len; ++j)
1266 if (col < beg_row || col >= end_row)
1268 int owner = SubdomainGraph_dhFindOwner (s, col,
false);
1276 myNabors[idx++] = owner;
1280 EuclidRestoreRow (A, row, &len, &cval, NULL);
1307 MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
1311 for (i = 0; i < idx; ++i)
1312 nabors[myNabors[i]] = 1;
1315 nabors[myid_dh] = 0;
1325 for (i = 0; i < np_dh; ++i)
1331 memcpy (nabors, myNabors, nz *
sizeof (
int));
1338 if (myNabors != NULL)
1348 #define __FUNC__ "find_bdry_nodes_sym_private"
1351 int *interiorNodes,
int *bdryNodes,
1352 int *interiorCount,
int *bdryCount)
1354 START_FUNC_DH
int beg_row = s->beg_row[myid_dh];
1355 int end_row = beg_row + s->row_count[myid_dh];
1356 int row, inCt = 0, bdCt = 0;
1362 for (row = beg_row; row < end_row; ++row)
1364 bool isBdry =
false;
1366 EuclidGetRow (A, row, &len, &cval, NULL);
1369 for (j = 0; j < len; ++j)
1372 if (col < beg_row || col >= end_row)
1378 EuclidRestoreRow (A, row, &len, &cval, NULL);
1383 bdryNodes[bdCt++] = row - beg_row;
1387 interiorNodes[inCt++] = row - beg_row;
1391 *interiorCount = inCt;
1396 #define BDRY_NODE_TAG 42
1399 #define __FUNC__ "find_bdry_nodes_unsym_private"
1402 int *interiorNodes,
int *boundaryNodes,
1403 int *interiorCount,
int *bdryCount)
1405 START_FUNC_DH
int beg_row = s->beg_row[myid_dh];
1406 int end_row = beg_row + s->row_count[myid_dh];
1410 int *rpIN = NULL, *rpOUT = NULL;
1411 int *sendBuf, *recvBuf;
1412 int *marker, inCt, bdCt;
1415 MPI_Request *sendReq, *recvReq;
1419 SortedSet_dhCreate (&ss, m);
1426 for (row = beg_row; row < end_row; ++row)
1428 bool isBdry =
false;
1430 EuclidGetRow (A, row, &len, &cval, NULL);
1433 for (j = 0; j < len; ++j)
1436 if (col < beg_row || col >= end_row)
1439 SortedSet_dhInsert (ss, col);
1444 EuclidRestoreRow (A, row, &len, &cval, NULL);
1449 SortedSet_dhInsert (ss, row);
1458 sendBuf = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
1460 recvBuf = (
int *) MALLOC_DH (np_dh *
sizeof (
int));
1462 rpOUT = (
int *) MALLOC_DH ((np_dh + 1) *
sizeof (int));
1465 for (i = 0; i < np_dh; ++i)
1469 SortedSet_dhGetList (ss, &list, &count);
1472 for (i = 0; i < count; )
1478 owner = SubdomainGraph_dhFindOwner (s, node,
false);
1480 last = s->beg_row[owner] + s->row_count[owner];
1483 while ((i < count) && (list[i] < last))
1487 sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
1495 MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
1504 rpIN = (
int *) MALLOC_DH ((np_dh + 1) *
sizeof (int));
1509 for (i = 0; i < np_dh; ++i)
1518 bdryNodes = (
int *) MALLOC_DH (nz *
sizeof (
int));
1520 sendReq = (MPI_Request *) MALLOC_DH (sendCt *
sizeof (MPI_Request));
1522 recvReq = (MPI_Request *) MALLOC_DH (recvCt *
sizeof (MPI_Request));
1524 max = MAX (sendCt, recvCt);
1525 status = (MPI_Status *) MALLOC_DH (max *
sizeof (MPI_Status));
1530 for (i = 0; i < np_dh; ++i)
1534 MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
1535 i, BDRY_NODE_TAG, comm_dh, recvReq + j);
1542 for (i = 0; i < np_dh; ++i)
1546 MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
1547 i, BDRY_NODE_TAG, comm_dh, sendReq + j);
1553 MPI_Waitall (sendCt, sendReq, status);
1554 MPI_Waitall (recvCt, recvReq, status);
1557 for (i = 0; i < nz; ++i)
1558 bdryNodes[i] -= beg_row;
1564 marker = (
int *) MALLOC_DH (m *
sizeof (
int));
1566 for (i = 0; i < m; ++i)
1568 for (i = 0; i < nz; ++i)
1569 marker[bdryNodes[i]] = 1;
1572 for (i = 0; i < m; ++i)
1576 boundaryNodes[bdCt++] = i;
1580 interiorNodes[inCt++] = i;
1583 *interiorCount = inCt;
1589 SortedSet_dhDestroy (ss);
1601 if (sendBuf != NULL)
1606 if (recvBuf != NULL)
1611 if (bdryNodes != NULL)
1613 FREE_DH (bdryNodes);
1621 if (sendReq != NULL)
1626 if (recvReq != NULL)
1640 #define __FUNC__ "find_ordered_neighbors_private"
1644 START_FUNC_DH
int *loNabors = s->loNabors;
1645 int *hiNabors = s->hiNabors;
1646 int *allNabors = s->allNabors, allCount = s->allCount;
1647 int loCt = 0, hiCt = 0;
1648 int *o2n = s->o2n_sub;
1649 int i, myNewId = o2n[myid_dh];
1651 for (i = 0; i < allCount; ++i)
1653 int nabor = allNabors[i];
1654 if (o2n[nabor] < myNewId)
1656 loNabors[loCt++] = nabor;
1660 hiNabors[hiCt++] = nabor;
1670 #define __FUNC__ "color_subdomain_graph_private"
1674 START_FUNC_DH
int i, n = np_dh;
1675 int *rp = s->ptrs, *cval = s->adj;
1676 int j, *marker, thisNodesColor, *colorCounter;
1677 int *o2n = s->o2n_sub;
1678 int *color = s->colorVec;
1683 marker = (
int *) MALLOC_DH ((n + 1) *
sizeof (int));
1684 colorCounter = (
int *) MALLOC_DH ((n + 1) *
sizeof (int));
1685 for (i = 0; i <= n; ++i)
1688 colorCounter[i] = 0;
1694 for (i = 0; i < n; ++i)
1700 for (j = rp[i]; j < rp[i + 1]; ++j)
1702 int nabor = cval[j];
1705 int naborsColor = color[nabor];
1706 marker[naborsColor] = i;
1711 thisNodesColor = -1;
1712 for (j = 0; j < n; ++j)
1720 color[i] = thisNodesColor;
1721 colorCounter[1 + thisNodesColor] += 1;
1729 for (i = 1; i < n; ++i)
1731 if (colorCounter[i] == 0)
1733 colorCounter[i] += colorCounter[i - 1];
1736 for (i = 0; i < n; ++i)
1738 o2n[i] = colorCounter[color[i]];
1739 colorCounter[color[i]] += 1;
1743 invert_perm (n, s->o2n_sub, s->n2o_sub);
1752 for (j = 0; j < n; ++j)
1754 if (marker[j] == -1)
1767 for (i = 0; i < n; ++i)
1769 int old = s->n2o_sub[i];
1770 s->beg_rowP[old] = sum;
1771 sum += s->row_count[old];
1777 FREE_DH (colorCounter);
1783 #define __FUNC__ "SubdomainGraph_dhDump"
1787 START_FUNC_DH
int i;
1799 fp = openFile_dh (filename,
"w");
1803 fprintf (fp,
"----- colors used\n");
1804 fprintf (fp,
"%i\n", s->colors);
1805 if (s->colorVec == NULL)
1807 fprintf (fp,
"s->colorVec == NULL\n");
1811 fprintf (fp,
"----- colorVec\n");
1812 for (i = 0; i < sCt; ++i)
1814 fprintf (fp,
"%i ", s->colorVec[i]);
1819 if (s->o2n_sub == NULL || s->o2n_sub == NULL)
1821 fprintf (fp,
"s->o2n_sub == NULL || s->o2n_sub == NULL\n");
1825 fprintf (fp,
"----- o2n_sub\n");
1826 for (i = 0; i < sCt; ++i)
1828 fprintf (fp,
"%i ", s->o2n_sub[i]);
1831 fprintf (fp,
"----- n2o_sub\n");
1832 for (i = 0; i < sCt; ++i)
1834 fprintf (fp,
"%i ", s->n2o_sub[i]);
1840 if (s->beg_row == NULL || s->beg_rowP == NULL)
1842 fprintf (fp,
"s->beg_row == NULL || s->beg_rowP == NULL\n");
1846 fprintf (fp,
"----- beg_row\n");
1847 for (i = 0; i < sCt; ++i)
1849 fprintf (fp,
"%i ", 1 + s->beg_row[i]);
1852 fprintf (fp,
"----- beg_rowP\n");
1853 for (i = 0; i < sCt; ++i)
1855 fprintf (fp,
"%i ", 1 + s->beg_rowP[i]);
1861 if (s->row_count == NULL || s->bdry_count == NULL)
1863 fprintf (fp,
"s->row_count == NULL || s->bdry_count == NULL\n");
1867 fprintf (fp,
"----- row_count\n");
1868 for (i = 0; i < sCt; ++i)
1870 fprintf (fp,
"%i ", s->row_count[i]);
1873 fprintf (fp,
"----- bdry_count\n");
1874 for (i = 0; i < sCt; ++i)
1876 fprintf (fp,
"%i ", s->bdry_count[i]);
1883 if (s->ptrs == NULL || s->adj == NULL)
1885 fprintf (fp,
"s->ptrs == NULL || s->adj == NULL\n");
1891 fprintf (fp,
"----- subdomain graph\n");
1892 for (i = 0; i < sCt; ++i)
1894 fprintf (fp,
"%i :: ", i);
1895 ct = s->ptrs[i + 1] - s->ptrs[i];
1898 shellSort_int (ct, s->adj + s->ptrs[i]);
1901 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
1903 fprintf (fp,
"%i ", s->adj[j]);
1915 if (s->beg_rowP == NULL)
1917 SET_V_ERROR (
"s->beg_rowP == NULL; can't continue");
1919 if (s->row_count == NULL)
1921 SET_V_ERROR (
"s->row_count == NULL; can't continue");
1923 if (s->o2n_sub == NULL)
1925 SET_V_ERROR (
"s->o2n_sub == NULL; can't continue");
1931 fp = openFile_dh (filename,
"a");
1935 if (s->n2o_row == NULL || s->o2n_col == NULL)
1937 fprintf (fp,
"s->n2o_row == NULL|| s->o2n_col == NULL\n");
1941 fprintf (fp,
"----- n2o_row\n");
1942 for (i = 0; i < s->m; ++i)
1944 fprintf (fp,
"%i ", 1 + s->n2o_row[i]);
1955 fprintf (fp,
"----- o2n_col\n");
1956 for (i = 0; i < sCt; ++i)
1958 int br = s->beg_row[i];
1959 int er = br + s->row_count[i];
1961 for (j = br; j < er; ++j)
1963 fprintf (fp,
"%i ", 1 + s->o2n_col[j]);
1979 int id = s->n2o_sub[myid_dh];
1983 if (s->beg_row != 0)
1984 beg_row = s->beg_row[myid_dh];
1987 for (pe = 0; pe < np_dh; ++pe)
1989 MPI_Barrier (comm_dh);
1992 fp = openFile_dh (filename,
"a");
1995 fprintf (fp,
"----- n2o_row\n");
1997 for (i = 0; i < m; ++i)
1999 fprintf (fp,
"%i ", 1 + s->n2o_row[i] + beg_row);
2001 if (
id == np_dh - 1)
2011 for (pe = 0; pe < np_dh; ++pe)
2013 MPI_Barrier (comm_dh);
2016 fp = openFile_dh (filename,
"a");
2019 fprintf (fp,
"----- o2n_col\n");
2021 for (i = 0; i < m; ++i)
2023 fprintf (fp,
"%i ", 1 + s->o2n_col[i] + beg_row);
2027 if (myid_dh == np_dh - 1)
2044 #define __FUNC__ "find_bdry_nodes_seq_private"
2048 START_FUNC_DH
int i, j, row, blocks = s->blocks;
2051 tmp = (
int *) MALLOC_DH (m *
sizeof (
int));
2053 for (i = 0; i < m; ++i)
2059 for (i = 0; i < blocks; ++i)
2061 int beg_row = s->beg_row[i];
2062 int end_row = beg_row + s->row_count[i];
2064 for (row = beg_row; row < end_row; ++row)
2066 bool isBdry =
false;
2068 EuclidGetRow (A, row, &len, &cval, NULL);
2071 for (j = 0; j < len; ++j)
2075 if (col < beg_row || col >= end_row)
2083 EuclidRestoreRow (A, row, &len, &cval, NULL);
2091 for (i = 0; i < blocks; ++i)
2093 int beg_row = s->beg_row[i];
2094 int end_row = beg_row + s->row_count[i];
2096 for (row = beg_row; row < end_row; ++row)
2101 s->bdry_count[i] = ct;
2107 for (i = 0; i < blocks; ++i)
2109 int beg_row = s->beg_row[i];
2110 int end_row = beg_row + s->row_count[i];
2111 int interiorIDX = beg_row;
2112 int bdryIDX = end_row - s->bdry_count[i];
2114 for (row = beg_row; row < end_row; ++row)
2118 s->o2n_col[row] = bdryIDX++;
2122 s->o2n_col[row] = interiorIDX++;
2128 invert_perm (m, s->o2n_col, s->n2o_row);
2136 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
2140 START_FUNC_DH
if (myid_dh == 0)
2145 "\n-----------------------------------------------------\n");
2146 fprintf (fp,
"SubdomainGraph, and coloring and ordering information\n");
2147 fprintf (fp,
"-----------------------------------------------------\n");
2148 fprintf (fp,
"colors used: %i\n", s->colors);
2150 fprintf (fp,
"o2n ordering vector: ");
2151 for (i = 0; i < s->blocks; ++i)
2152 fprintf (fp,
"%i ", s->o2n_sub[i]);
2154 fprintf (fp,
"\ncoloring vector (node, color): \n");
2155 for (i = 0; i < s->blocks; ++i)
2156 fprintf (fp,
" %i, %i\n", i, s->colorVec[i]);
2159 fprintf (fp,
"Adjacency lists:\n");
2161 for (i = 0; i < s->blocks; ++i)
2163 fprintf (fp,
" P_%i :: ", i);
2164 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
2166 fprintf (fp,
"%i ", s->adj[j]);
2170 fprintf (fp,
"-----------------------------------------------------\n");
2176 #define __FUNC__ "adjust_matrix_perms_private"
2180 START_FUNC_DH
int i, j, blocks = s->blocks;
2181 int *o2n = s->o2n_col;
2183 for (i = 0; i < blocks; ++i)
2185 int beg_row = s->beg_row[i];
2186 int end_row = beg_row + s->row_count[i];
2187 int adjust = s->beg_rowP[i] - s->beg_row[i];
2188 for (j = beg_row; j < end_row; ++j)
2192 invert_perm (m, s->o2n_col, s->n2o_row);
2197 #define __FUNC__ "SubdomainGraph_dhPrintRatios"
2201 START_FUNC_DH
int i;
2213 fprintf (fp,
"Subdomain interior/boundary node ratios\n");
2214 fprintf (fp,
"---------------------------------------\n");
2217 for (i = 0; i < blocks; ++i)
2219 if (s->bdry_count[i] == 0)
2226 (double) (s->row_count[i] -
2227 s->bdry_count[i]) / (double) s->bdry_count[i];
2232 shellSort_float (blocks, ratio);
2238 for (i = 0; i < blocks; ++i)
2240 fprintf (fp,
"%0.2g ", ratio[i]);
2251 fprintf (fp,
"10 smallest ratios: ");
2252 for (i = 0; i < 10; ++i)
2254 fprintf (fp,
"%0.2g ", ratio[i]);
2257 fprintf (fp,
"10 largest ratios: ");
2259 int start = blocks - 6, stop = blocks - 1;
2260 for (i = start; i < stop; ++i)
2262 fprintf (fp,
"%0.2g ", ratio[i]);
2273 #define __FUNC__ "SubdomainGraph_dhPrintStats"
2277 START_FUNC_DH
double *timing = sg->timing;
2279 fprintf_dh (fp,
"\nSubdomainGraph timing report\n");
2280 fprintf_dh (fp,
"-----------------------------\n");
2281 fprintf_dh (fp,
"total setup time: %0.2f\n", timing[TOTAL_SGT]);
2282 fprintf_dh (fp,
" find neighbors in subdomain graph: %0.2f\n",
2283 timing[FIND_NABORS_SGT]);
2284 fprintf_dh (fp,
" locally order interiors and bdry: %0.2f\n",
2285 timing[ORDER_BDRY_SGT]);
2286 fprintf_dh (fp,
" form and color subdomain graph: %0.2f\n",
2287 timing[FORM_GRAPH_SGT]);
2288 fprintf_dh (fp,
" exchange bdry permutations: %0.2f\n",
2289 timing[EXCHANGE_PERMS_SGT]);
2290 fprintf_dh (fp,
" everything else (should be small): %0.2f\n",
2291 timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
2292 timing[ORDER_BDRY_SGT] +
2293 timing[FORM_GRAPH_SGT] +
2294 timing[EXCHANGE_PERMS_SGT]));