43 #include "SubdomainGraph_dh.h" 
   44 #include "getRow_dh.h" 
   46 #include "Parser_dh.h" 
   47 #include "Hash_i_dh.h" 
   48 #include "mat_dh_private.h" 
   50 #include "SortedSet_dh.h" 
   51 #include "shellSort_dh.h" 
   76                      int *interiorNodes, 
int *bdryNodes,
 
   77                      int *interiorCount, 
int *bdryCount);
 
   79                        void *A, 
int *interiorNodes,
 
   80                        int *bdryNodes, 
int *interiorCount,
 
   91 #define __FUNC__ "SubdomainGraph_dhCreate" 
  102   tmp->ptrs = tmp->adj = NULL;
 
  104   tmp->colorVec = NULL;
 
  105   tmp->o2n_sub = tmp->n2o_sub = NULL;
 
  106   tmp->beg_row = tmp->beg_rowP = NULL;
 
  107   tmp->bdry_count = tmp->row_count = NULL;
 
  108   tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
 
  109   tmp->loCount = tmp->hiCount = tmp->allCount = 0;
 
  112   tmp->n2o_row = tmp->o2n_col = NULL;
 
  113   tmp->o2n_ext = tmp->n2o_ext = NULL;
 
  115   tmp->doNotColor = Parser_dhHasSwitch (parser_dh, 
"-doNotColor");
 
  116   tmp->debug = Parser_dhHasSwitch (parser_dh, 
"-debug_SubGraph");
 
  119     for (i = 0; i < TIMING_BINS_SG; ++i)
 
  120       tmp->timing[i] = 0.0;
 
  125 #define __FUNC__ "SubdomainGraph_dhDestroy" 
  129   START_FUNC_DH 
if (s->ptrs != NULL)
 
  139   if (s->colorVec != NULL)
 
  141       FREE_DH (s->colorVec);
 
  144   if (s->o2n_sub != NULL)
 
  146       FREE_DH (s->o2n_sub);
 
  149   if (s->n2o_sub != NULL)
 
  151       FREE_DH (s->n2o_sub);
 
  155   if (s->beg_row != NULL)
 
  157       FREE_DH (s->beg_row);
 
  160   if (s->beg_rowP != NULL)
 
  162       FREE_DH (s->beg_rowP);
 
  165   if (s->row_count != NULL)
 
  167       FREE_DH (s->row_count);
 
  170   if (s->bdry_count != NULL)
 
  172       FREE_DH (s->bdry_count);
 
  175   if (s->loNabors != NULL)
 
  177       FREE_DH (s->loNabors);
 
  180   if (s->hiNabors != NULL)
 
  182       FREE_DH (s->hiNabors);
 
  185   if (s->allNabors != NULL)
 
  187       FREE_DH (s->allNabors);
 
  191   if (s->n2o_row != NULL)
 
  193       FREE_DH (s->n2o_row);
 
  196   if (s->o2n_col != NULL)
 
  198       FREE_DH (s->o2n_col);
 
  201   if (s->o2n_ext != NULL)
 
  203       Hash_i_dhDestroy (s->o2n_ext);
 
  206   if (s->n2o_ext != NULL)
 
  208       Hash_i_dhDestroy (s->n2o_ext);
 
  216 #define __FUNC__ "SubdomainGraph_dhInit" 
  220   START_FUNC_DH 
double t1 = MPI_Wtime ();
 
  225   if (np_dh == 1 || blocks > 1)
 
  228       init_seq_private (s, blocks, bj, A);
 
  234       init_mpi_private (s, np_dh, bj, A);
 
  238   s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
 
  243 #define __FUNC__ "SubdomainGraph_dhFindOwner" 
  247   START_FUNC_DH 
int sd;
 
  248   int *beg_row = s->beg_row;
 
  249   int *row_count = s->row_count;
 
  250   int owner = -1, blocks = s->blocks;
 
  253     beg_row = s->beg_rowP;
 
  256   for (sd = 0; sd < blocks; ++sd)
 
  258       if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
 
  268       fprintf (stderr, 
"@@@ failed to find owner for idx = %i @@@\n", idx);
 
  269       fprintf (stderr, 
"blocks= %i\n", blocks);
 
  271       sprintf (msgBuf_dh, 
"failed to find owner for idx = %i", idx);
 
  272       SET_ERROR (-1, msgBuf_dh);
 
  275 END_FUNC_VAL (owner)}
 
  279 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong" 
  283   START_FUNC_DH 
int i, j, k;
 
  284   double max = 0, min = INT_MAX;
 
  287        "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
 
  288   fprintf (fp, 
"colors used     = %i\n", s->colors);
 
  289   fprintf (fp, 
"subdomain count = %i\n", s->blocks);
 
  292   fprintf (fp, 
"\ninterior/boundary node ratios:\n");
 
  294   for (i = 0; i < s->blocks; ++i)
 
  296       int inNodes = s->row_count[i] - s->bdry_count[i];
 
  297       int bdNodes = s->bdry_count[i];
 
  306       ratio = (double) inNodes / (
double) bdNodes;
 
  309       max = MAX (max, ratio);
 
  310       min = MIN (min, ratio);
 
  312            "   P_%i: first= %3i  rowCount= %3i  interior= %3i  bdry= %3i  ratio= %0.1f\n",
 
  313            i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
 
  318   fprintf (fp, 
"\nmax interior/bdry ratio = %.1f\n", max);
 
  319   fprintf (fp, 
"min interior/bdry ratio = %.1f\n", min);
 
  327       fprintf (fp, 
"\nunpermuted subdomain graph: \n");
 
  328       for (i = 0; i < s->blocks; ++i)
 
  330       fprintf (fp, 
"%i :: ", i);
 
  331       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
 
  333           fprintf (fp, 
"%i  ", s->adj[j]);
 
  343   fprintf (fp, 
"\no2n subdomain permutation:\n");
 
  344   for (i = 0; i < s->blocks; ++i)
 
  346       fprintf (fp, 
"  %i %i\n", i, s->o2n_sub[i]);
 
  356       fprintf (fp, 
"\nlocal n2o_row permutation:\n   ");
 
  357       for (i = 0; i < s->row_count[myid_dh]; ++i)
 
  359       fprintf (fp, 
"%i ", s->n2o_row[i]);
 
  366       fprintf (fp, 
"\nlocal o2n_col permutation:\n   ");
 
  367       for (i = 0; i < s->row_count[myid_dh]; ++i)
 
  369       fprintf (fp, 
"%i ", s->o2n_col[i]);
 
  379       fprintf (fp, 
"\nlocal n2o_row permutation:\n");
 
  380       fprintf (fp, 
"--------------------------\n");
 
  381       for (k = 0; k < s->blocks; ++k)
 
  383       int beg_row = s->beg_row[k];
 
  384       int end_row = beg_row + s->row_count[k];
 
  386       for (i = beg_row; i < end_row; ++i)
 
  388           fprintf (fp, 
"%i ", s->n2o_row[i]);
 
  396       fprintf (fp, 
"\nlocal o2n_col permutation:\n");
 
  397       fprintf (fp, 
"--------------------------\n");
 
  398       for (k = 0; k < s->blocks; ++k)
 
  400       int beg_row = s->beg_row[k];
 
  401       int end_row = beg_row + s->row_count[k];
 
  403       for (i = beg_row; i < end_row; ++i)
 
  405           fprintf (fp, 
"%i ", s->o2n_col[i]);
 
  416 #define __FUNC__ "init_seq_private" 
  420   START_FUNC_DH 
int m, n, beg_row;
 
  428   EuclidGetDimensions (A, &beg_row, &m, &n);
 
  437   allocate_storage_private (s, blocks, m, bj);
 
  448     int rpp = m / blocks;
 
  450     if (rpp * blocks < m)
 
  454     for (i = 1; i < blocks; ++i)
 
  455       s->beg_row[i] = rpp + s->beg_row[i - 1];
 
  456     for (i = 0; i < blocks; ++i)
 
  457       s->row_count[i] = rpp;
 
  458     s->row_count[blocks - 1] = m - rpp * (blocks - 1);
 
  460   memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (
int));
 
  481       find_bdry_nodes_seq_private (s, m, A);
 
  487       for (i = 0; i < m; ++i)
 
  493   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
 
  507       form_subdomaingraph_seq_private (s, m, A);
 
  512       printf_dh (
"subdomain coloring and reordering is OFF\n");
 
  513       for (i = 0; i < blocks; ++i)
 
  522       SET_INFO (
"subdomain coloring and reordering is ON");
 
  523       color_subdomain_graph_private (s);
 
  532       for (i = 0; i < blocks; ++i)
 
  538   s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
 
  549       adjust_matrix_perms_private (s, m);
 
  574 #define __FUNC__ "partition_metis_private" 
  578   START_FUNC_DH 
if (ignoreMe)
 
  579     SET_V_ERROR (
"not implemented");
 
  584 #define __FUNC__ "allocate_storage_private" 
  588   START_FUNC_DH 
if (!bj)
 
  590       s->ptrs = (
int *) MALLOC_DH ((blocks + 1) * 
sizeof (int));
 
  593       s->colorVec = (
int *) MALLOC_DH (blocks * 
sizeof (
int));
 
  595       s->loNabors = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
  597       s->hiNabors = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
  599       s->allNabors = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
  603   s->n2o_row = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
  605   s->o2n_col = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
  610   s->beg_row = (
int *) MALLOC_DH ((blocks) * 
sizeof (int));
 
  612   s->beg_rowP = (
int *) MALLOC_DH ((blocks) * 
sizeof (int));
 
  614   s->row_count = (
int *) MALLOC_DH (blocks * 
sizeof (
int));
 
  616   s->bdry_count = (
int *) MALLOC_DH (blocks * 
sizeof (
int));
 
  618   s->o2n_sub = (
int *) MALLOC_DH (blocks * 
sizeof (
int));
 
  620   s->n2o_sub = (
int *) MALLOC_DH (blocks * 
sizeof (
int));
 
  629 #define __FUNC__ "init_mpi_private" 
  633   START_FUNC_DH 
int m, n, beg_row;
 
  637   symmetric = Parser_dhHasSwitch (parser_dh, 
"-sym");
 
  639   if (Parser_dhHasSwitch (parser_dh, 
"-makeSymmetric"))
 
  648   EuclidGetDimensions (A, &beg_row, &m, &n);
 
  657   allocate_storage_private (s, blocks, m, bj);
 
  668       MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
 
  669       MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
 
  670       memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (
int));
 
  674       s->beg_row[myid_dh] = beg_row;
 
  675       s->beg_rowP[myid_dh] = beg_row;
 
  676       s->row_count[myid_dh] = m;
 
  688       find_all_neighbors_sym_private (s, m, A);
 
  693       find_all_neighbors_unsym_private (s, m, A);
 
  696       s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
 
  711       int *interiorNodes, *bdryNodes;
 
  712       int interiorCount, bdryCount;
 
  713       int *o2n = s->o2n_col, idx;
 
  716       interiorNodes = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
  718       bdryNodes = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
  726       find_bdry_nodes_sym_private (s, m, A,
 
  727                        interiorNodes, bdryNodes,
 
  728                        &interiorCount, &bdryCount);
 
  733       find_bdry_nodes_unsym_private (s, m, A,
 
  734                      interiorNodes, bdryNodes,
 
  735                      &interiorCount, &bdryCount);
 
  740       MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
 
  745       for (i = 0; i < interiorCount; ++i)
 
  747       o2n[interiorNodes[i]] = idx++;
 
  749       for (i = 0; i < bdryCount; ++i)
 
  751       o2n[bdryNodes[i]] = idx++;
 
  755       invert_perm (m, o2n, s->n2o_row);
 
  758       FREE_DH (interiorNodes);
 
  767       int *o2n = s->o2n_col, *n2o = s->n2o_row;
 
  770       for (i = 0; i < m; ++i)
 
  776   s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
 
  790       form_subdomaingraph_mpi_private (s);
 
  795       printf_dh (
"subdomain coloring and reordering is OFF\n");
 
  796       for (i = 0; i < blocks; ++i)
 
  805       SET_INFO (
"subdomain coloring and reordering is ON");
 
  806       color_subdomain_graph_private (s);
 
  809       s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
 
  819       find_ordered_neighbors_private (s);
 
  832       SubdomainGraph_dhExchangePerms (s);
 
  834       s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
 
  842 #define __FUNC__ "SubdomainGraph_dhExchangePerms" 
  846   START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
 
  847   MPI_Status *status = NULL;
 
  848   int *nabors = s->allNabors, naborCount = s->allCount;
 
  849   int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
 
  850   int m = s->row_count[myid_dh];
 
  851   int beg_row = s->beg_row[myid_dh];
 
  852   int beg_rowP = s->beg_rowP[myid_dh];
 
  853   int *bdryNodeCounts = s->bdry_count;
 
  854   int myBdryCount = s->bdry_count[myid_dh];
 
  856   int myFirstBdry = m - myBdryCount;
 
  857   int *n2o_row = s->n2o_row;
 
  860   if (logFile != NULL && s->debug)
 
  866   sendBuf = (
int *) MALLOC_DH (2 * myBdryCount * 
sizeof (
int));
 
  873            "\nSUBG myFirstBdry= %i  myBdryCount= %i  m= %i  beg_rowP= %i\n",
 
  874            1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
 
  878   for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
 
  880       sendBuf[2 * j] = n2o_row[i] + beg_row;
 
  881       sendBuf[2 * j + 1] = i + beg_rowP;
 
  886       fprintf (logFile, 
"\nSUBG SEND_BUF:\n");
 
  887       for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
 
  889       fprintf (logFile, 
"SUBG  %i, %i\n", 1 + sendBuf[2 * j],
 
  890            1 + sendBuf[2 * j + 1]);
 
  899   naborIdx = (
int *) MALLOC_DH ((1 + naborCount) * 
sizeof (int));
 
  903   for (i = 0; i < naborCount; ++i)
 
  905       nz += (2 * bdryNodeCounts[nabors[i]]);
 
  906       naborIdx[i + 1] = nz;
 
  910   recvBuf = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
  917   recv_req = (MPI_Request *) MALLOC_DH (naborCount * 
sizeof (MPI_Request));
 
  919   send_req = (MPI_Request *) MALLOC_DH (naborCount * 
sizeof (MPI_Request));
 
  921   status = (MPI_Status *) MALLOC_DH (naborCount * 
sizeof (MPI_Status));
 
  924   for (i = 0; i < naborCount; ++i)
 
  926       int nabr = nabors[i];
 
  927       int *buf = recvBuf + naborIdx[i];
 
  928       int ct = 2 * bdryNodeCounts[nabr];
 
  931       MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
 
  936       fprintf (logFile, 
"SUBG   sending %i elts to %i\n", 2 * myBdryCount,
 
  941       MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
 
  945       fprintf (logFile, 
"SUBG  receiving %i elts from %i\n", ct, nabr);
 
  950   MPI_Waitall (naborCount, send_req, status);
 
  951   MPI_Waitall (naborCount, recv_req, status);
 
  953   Hash_i_dhCreate (&n2o_table, nz / 2);
 
  955   Hash_i_dhCreate (&o2n_table, nz / 2);
 
  957   s->n2o_ext = n2o_table;
 
  958   s->o2n_ext = o2n_table;
 
  961   for (i = 0; i < nz; i += 2)
 
  963       int old = recvBuf[i];
 
  964       int new = recvBuf[i + 1];
 
  968       fprintf (logFile, 
"SUBG  i= %i  old= %i  new= %i\n", i, old + 1,
 
  973       Hash_i_dhInsert (o2n_table, old, 
new);
 
  975       Hash_i_dhInsert (n2o_table, 
new, old);
 
  985   if (naborIdx != NULL)
 
  995   if (recv_req != NULL)
 
 1000   if (send_req != NULL)
 
 1016 #define __FUNC__ "form_subdomaingraph_mpi_private" 
 1020   START_FUNC_DH 
int *nabors = s->allNabors, nct = s->allCount;
 
 1022   int i, j, nz, *adj, *ptrs = s->ptrs;
 
 1023   MPI_Request *recvReqs = NULL, sendReq;
 
 1024   MPI_Status *statuses = NULL, status;
 
 1029       idxAll = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
 1032   MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
 
 1038       for (i = 0; i < np_dh; ++i)
 
 1041   MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
 
 1046   adj = s->adj = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
 1052       recvReqs = (MPI_Request *) MALLOC_DH (np_dh * 
sizeof (MPI_Request));
 
 1054       statuses = (MPI_Status *) MALLOC_DH (np_dh * 
sizeof (MPI_Status));
 
 1059       for (j = 0; j < np_dh; ++j)
 
 1060     ptrs[j + 1] = ptrs[j] + idxAll[j];
 
 1063       for (j = 0; j < np_dh; ++j)
 
 1067       MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
 
 1073   MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
 
 1078       MPI_Waitall (np_dh, recvReqs, statuses);
 
 1080   MPI_Wait (&sendReq, &status);
 
 1083   MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
 
 1084   MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
 
 1091   if (recvReqs != NULL)
 
 1096   if (statuses != NULL)
 
 1108 #define __FUNC__ "form_subdomaingraph_seq_private" 
 1112   START_FUNC_DH 
int *dense, i, j, row, blocks = s->blocks;
 
 1113   int *cval, len, *adj;
 
 1114   int idx = 0, *ptrs = s->ptrs;
 
 1121   adj = s->adj = (
int *) MALLOC_DH (blocks * blocks * 
sizeof (
int));
 
 1124   dense = (
int *) MALLOC_DH (blocks * blocks * 
sizeof (
int));
 
 1126   for (i = 0; i < blocks * blocks; ++i)
 
 1130   for (i = 0; i < blocks; ++i)
 
 1132       int beg_row = s->beg_row[i];
 
 1133       int end_row = beg_row + s->row_count[i];
 
 1135       for (row = beg_row; row < end_row; ++row)
 
 1137       EuclidGetRow (A, row, &len, &cval, NULL);
 
 1139       for (j = 0; j < len; ++j)
 
 1142           if (col < beg_row || col >= end_row)
 
 1144           int owner = SubdomainGraph_dhFindOwner (s, col, 
false);
 
 1146           dense[i * blocks + owner] = 1;
 
 1147           dense[owner * blocks + i] = 1;
 
 1150       EuclidRestoreRow (A, row, &len, &cval, NULL);
 
 1159   for (i = 0; i < blocks; ++i)
 
 1161       for (j = 0; j < blocks; ++j)
 
 1163       if (dense[i * blocks + j])
 
 1177 #define __FUNC__ "find_all_neighbors_sym_private" 
 1181   START_FUNC_DH 
int *marker, i, j, beg_row, end_row;
 
 1182   int row, len, *cval, ct = 0;
 
 1183   int *nabors = s->allNabors;
 
 1185   marker = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1187   for (i = 0; i < m; ++i)
 
 1191     (
"finding nabors in subdomain graph for structurally symmetric matrix");
 
 1192   SET_INFO (
"(if this isn't what you want, use '-sym 0' switch)");
 
 1194   beg_row = s->beg_row[myid_dh];
 
 1195   end_row = beg_row + s->row_count[myid_dh];
 
 1197   for (row = beg_row; row < end_row; ++row)
 
 1199       EuclidGetRow (A, row, &len, &cval, NULL);
 
 1201       for (j = 0; j < len; ++j)
 
 1204       if (col < beg_row || col >= end_row)
 
 1206           int owner = SubdomainGraph_dhFindOwner (s, col, 
false);
 
 1211           nabors[ct++] = owner;
 
 1215       EuclidRestoreRow (A, row, &len, &cval, NULL);
 
 1230 #define __FUNC__ "find_all_neighbors_unsym_private" 
 1234   START_FUNC_DH 
int i, j, row, beg_row, end_row;
 
 1236   int *cval, len, idx = 0;
 
 1237   int nz, *nabors = s->allNabors, *myNabors;
 
 1239   myNabors = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
 1241   marker = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
 1243   for (i = 0; i < np_dh; ++i)
 
 1247     (
"finding nabors in subdomain graph for structurally unsymmetric matrix");
 
 1252   beg_row = s->beg_row[myid_dh];
 
 1253   end_row = beg_row + s->row_count[myid_dh];
 
 1258   for (row = beg_row; row < end_row; ++row)
 
 1260       EuclidGetRow (A, row, &len, &cval, NULL);
 
 1262       for (j = 0; j < len; ++j)
 
 1266       if (col < beg_row || col >= end_row)
 
 1268           int owner = SubdomainGraph_dhFindOwner (s, col, 
false);
 
 1276           myNabors[idx++] = owner;
 
 1280       EuclidRestoreRow (A, row, &len, &cval, NULL);
 
 1307   MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
 
 1311   for (i = 0; i < idx; ++i)
 
 1312     nabors[myNabors[i]] = 1;
 
 1315   nabors[myid_dh] = 0;
 
 1325   for (i = 0; i < np_dh; ++i)
 
 1331   memcpy (nabors, myNabors, nz * 
sizeof (
int));
 
 1338   if (myNabors != NULL)
 
 1348 #define __FUNC__ "find_bdry_nodes_sym_private" 
 1351                  int *interiorNodes, 
int *bdryNodes,
 
 1352                  int *interiorCount, 
int *bdryCount)
 
 1354   START_FUNC_DH 
int beg_row = s->beg_row[myid_dh];
 
 1355   int end_row = beg_row + s->row_count[myid_dh];
 
 1356   int row, inCt = 0, bdCt = 0;
 
 1362   for (row = beg_row; row < end_row; ++row)
 
 1364       bool isBdry = 
false;
 
 1366       EuclidGetRow (A, row, &len, &cval, NULL);
 
 1369       for (j = 0; j < len; ++j)
 
 1372       if (col < beg_row || col >= end_row)
 
 1378       EuclidRestoreRow (A, row, &len, &cval, NULL);
 
 1383       bdryNodes[bdCt++] = row - beg_row;
 
 1387       interiorNodes[inCt++] = row - beg_row;
 
 1391   *interiorCount = inCt;
 
 1396 #define BDRY_NODE_TAG 42 
 1399 #define __FUNC__ "find_bdry_nodes_unsym_private" 
 1402                    int *interiorNodes, 
int *boundaryNodes,
 
 1403                    int *interiorCount, 
int *bdryCount)
 
 1405   START_FUNC_DH 
int beg_row = s->beg_row[myid_dh];
 
 1406   int end_row = beg_row + s->row_count[myid_dh];
 
 1410   int *rpIN = NULL, *rpOUT = NULL;
 
 1411   int *sendBuf, *recvBuf;
 
 1412   int *marker, inCt, bdCt;
 
 1415   MPI_Request *sendReq, *recvReq;
 
 1419   SortedSet_dhCreate (&ss, m);
 
 1426   for (row = beg_row; row < end_row; ++row)
 
 1428       bool isBdry = 
false;
 
 1430       EuclidGetRow (A, row, &len, &cval, NULL);
 
 1433       for (j = 0; j < len; ++j)
 
 1436       if (col < beg_row || col >= end_row)
 
 1439           SortedSet_dhInsert (ss, col);
 
 1444       EuclidRestoreRow (A, row, &len, &cval, NULL);
 
 1449       SortedSet_dhInsert (ss, row);
 
 1458   sendBuf = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
 1460   recvBuf = (
int *) MALLOC_DH (np_dh * 
sizeof (
int));
 
 1462   rpOUT = (
int *) MALLOC_DH ((np_dh + 1) * 
sizeof (int));
 
 1465   for (i = 0; i < np_dh; ++i)
 
 1469   SortedSet_dhGetList (ss, &list, &count);
 
 1472   for (i = 0; i < count;  )
 
 1478       owner = SubdomainGraph_dhFindOwner (s, node, 
false);
 
 1480       last = s->beg_row[owner] + s->row_count[owner];
 
 1483       while ((i < count) && (list[i] < last))
 
 1487       sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
 
 1495   MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
 
 1504   rpIN = (
int *) MALLOC_DH ((np_dh + 1) * 
sizeof (int));
 
 1509   for (i = 0; i < np_dh; ++i)
 
 1518   bdryNodes = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
 1520   sendReq = (MPI_Request *) MALLOC_DH (sendCt * 
sizeof (MPI_Request));
 
 1522   recvReq = (MPI_Request *) MALLOC_DH (recvCt * 
sizeof (MPI_Request));
 
 1524   max = MAX (sendCt, recvCt);
 
 1525   status = (MPI_Status *) MALLOC_DH (max * 
sizeof (MPI_Status));
 
 1530   for (i = 0; i < np_dh; ++i)
 
 1534       MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
 
 1535              i, BDRY_NODE_TAG, comm_dh, recvReq + j);
 
 1542   for (i = 0; i < np_dh; ++i)
 
 1546       MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
 
 1547              i, BDRY_NODE_TAG, comm_dh, sendReq + j);
 
 1553   MPI_Waitall (sendCt, sendReq, status);
 
 1554   MPI_Waitall (recvCt, recvReq, status);
 
 1557   for (i = 0; i < nz; ++i)
 
 1558     bdryNodes[i] -= beg_row;
 
 1564   marker = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1566   for (i = 0; i < m; ++i)
 
 1568   for (i = 0; i < nz; ++i)
 
 1569     marker[bdryNodes[i]] = 1;
 
 1572   for (i = 0; i < m; ++i)
 
 1576       boundaryNodes[bdCt++] = i;
 
 1580       interiorNodes[inCt++] = i;
 
 1583   *interiorCount = inCt;
 
 1589   SortedSet_dhDestroy (ss);
 
 1601   if (sendBuf != NULL)
 
 1606   if (recvBuf != NULL)
 
 1611   if (bdryNodes != NULL)
 
 1613       FREE_DH (bdryNodes);
 
 1621   if (sendReq != NULL)
 
 1626   if (recvReq != NULL)
 
 1640 #define __FUNC__ "find_ordered_neighbors_private" 
 1644   START_FUNC_DH 
int *loNabors = s->loNabors;
 
 1645   int *hiNabors = s->hiNabors;
 
 1646   int *allNabors = s->allNabors, allCount = s->allCount;
 
 1647   int loCt = 0, hiCt = 0;
 
 1648   int *o2n = s->o2n_sub;
 
 1649   int i, myNewId = o2n[myid_dh];
 
 1651   for (i = 0; i < allCount; ++i)
 
 1653       int nabor = allNabors[i];
 
 1654       if (o2n[nabor] < myNewId)
 
 1656       loNabors[loCt++] = nabor;
 
 1660       hiNabors[hiCt++] = nabor;
 
 1670 #define __FUNC__ "color_subdomain_graph_private" 
 1674   START_FUNC_DH 
int i, n = np_dh;
 
 1675   int *rp = s->ptrs, *cval = s->adj;
 
 1676   int j, *marker, thisNodesColor, *colorCounter;
 
 1677   int *o2n = s->o2n_sub;
 
 1678   int *color = s->colorVec;
 
 1683   marker = (
int *) MALLOC_DH ((n + 1) * 
sizeof (int));
 
 1684   colorCounter = (
int *) MALLOC_DH ((n + 1) * 
sizeof (int));
 
 1685   for (i = 0; i <= n; ++i)
 
 1688       colorCounter[i] = 0;
 
 1694   for (i = 0; i < n; ++i)
 
 1700       for (j = rp[i]; j < rp[i + 1]; ++j)
 
 1702       int nabor = cval[j];
 
 1705           int naborsColor = color[nabor];
 
 1706           marker[naborsColor] = i;
 
 1711       thisNodesColor = -1;
 
 1712       for (j = 0; j < n; ++j)
 
 1720       color[i] = thisNodesColor;
 
 1721       colorCounter[1 + thisNodesColor] += 1;
 
 1729   for (i = 1; i < n; ++i)
 
 1731       if (colorCounter[i] == 0)
 
 1733       colorCounter[i] += colorCounter[i - 1];
 
 1736   for (i = 0; i < n; ++i)
 
 1738       o2n[i] = colorCounter[color[i]];
 
 1739       colorCounter[color[i]] += 1;
 
 1743   invert_perm (n, s->o2n_sub, s->n2o_sub);
 
 1752     for (j = 0; j < n; ++j)
 
 1754     if (marker[j] == -1)
 
 1767     for (i = 0; i < n; ++i)
 
 1769     int old = s->n2o_sub[i];
 
 1770     s->beg_rowP[old] = sum;
 
 1771     sum += s->row_count[old];
 
 1777   FREE_DH (colorCounter);
 
 1783 #define __FUNC__ "SubdomainGraph_dhDump" 
 1787   START_FUNC_DH 
int i;
 
 1799   fp = openFile_dh (filename, 
"w");
 
 1803   fprintf (fp, 
"----- colors used\n");
 
 1804   fprintf (fp, 
"%i\n", s->colors);
 
 1805   if (s->colorVec == NULL)
 
 1807       fprintf (fp, 
"s->colorVec == NULL\n");
 
 1811       fprintf (fp, 
"----- colorVec\n");
 
 1812       for (i = 0; i < sCt; ++i)
 
 1814       fprintf (fp, 
"%i ", s->colorVec[i]);
 
 1819   if (s->o2n_sub == NULL || s->o2n_sub == NULL)
 
 1821       fprintf (fp, 
"s->o2n_sub == NULL || s->o2n_sub == NULL\n");
 
 1825       fprintf (fp, 
"----- o2n_sub\n");
 
 1826       for (i = 0; i < sCt; ++i)
 
 1828       fprintf (fp, 
"%i ", s->o2n_sub[i]);
 
 1831       fprintf (fp, 
"----- n2o_sub\n");
 
 1832       for (i = 0; i < sCt; ++i)
 
 1834       fprintf (fp, 
"%i ", s->n2o_sub[i]);
 
 1840   if (s->beg_row == NULL || s->beg_rowP == NULL)
 
 1842       fprintf (fp, 
"s->beg_row == NULL || s->beg_rowP == NULL\n");
 
 1846       fprintf (fp, 
"----- beg_row\n");
 
 1847       for (i = 0; i < sCt; ++i)
 
 1849       fprintf (fp, 
"%i ", 1 + s->beg_row[i]);
 
 1852       fprintf (fp, 
"----- beg_rowP\n");
 
 1853       for (i = 0; i < sCt; ++i)
 
 1855       fprintf (fp, 
"%i ", 1 + s->beg_rowP[i]);
 
 1861   if (s->row_count == NULL || s->bdry_count == NULL)
 
 1863       fprintf (fp, 
"s->row_count == NULL || s->bdry_count == NULL\n");
 
 1867       fprintf (fp, 
"----- row_count\n");
 
 1868       for (i = 0; i < sCt; ++i)
 
 1870       fprintf (fp, 
"%i ", s->row_count[i]);
 
 1873       fprintf (fp, 
"----- bdry_count\n");
 
 1874       for (i = 0; i < sCt; ++i)
 
 1876       fprintf (fp, 
"%i ", s->bdry_count[i]);
 
 1883   if (s->ptrs == NULL || s->adj == NULL)
 
 1885       fprintf (fp, 
"s->ptrs == NULL || s->adj == NULL\n");
 
 1891       fprintf (fp, 
"----- subdomain graph\n");
 
 1892       for (i = 0; i < sCt; ++i)
 
 1894       fprintf (fp, 
"%i :: ", i);
 
 1895       ct = s->ptrs[i + 1] - s->ptrs[i];
 
 1898           shellSort_int (ct, s->adj + s->ptrs[i]);
 
 1901       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
 
 1903           fprintf (fp, 
"%i ", s->adj[j]);
 
 1915   if (s->beg_rowP == NULL)
 
 1917       SET_V_ERROR (
"s->beg_rowP == NULL; can't continue");
 
 1919   if (s->row_count == NULL)
 
 1921       SET_V_ERROR (
"s->row_count == NULL; can't continue");
 
 1923   if (s->o2n_sub == NULL)
 
 1925       SET_V_ERROR (
"s->o2n_sub == NULL; can't continue");
 
 1931       fp = openFile_dh (filename, 
"a");
 
 1935       if (s->n2o_row == NULL || s->o2n_col == NULL)
 
 1937       fprintf (fp, 
"s->n2o_row == NULL|| s->o2n_col == NULL\n");
 
 1941       fprintf (fp, 
"----- n2o_row\n");
 
 1942       for (i = 0; i < s->m; ++i)
 
 1944           fprintf (fp, 
"%i ", 1 + s->n2o_row[i]);
 
 1955       fprintf (fp, 
"----- o2n_col\n");
 
 1956       for (i = 0; i < sCt; ++i)
 
 1958           int br = s->beg_row[i];
 
 1959           int er = br + s->row_count[i];
 
 1961           for (j = br; j < er; ++j)
 
 1963           fprintf (fp, 
"%i ", 1 + s->o2n_col[j]);
 
 1979       int id = s->n2o_sub[myid_dh];
 
 1983       if (s->beg_row != 0)
 
 1984     beg_row = s->beg_row[myid_dh];
 
 1987       for (pe = 0; pe < np_dh; ++pe)
 
 1989       MPI_Barrier (comm_dh);
 
 1992           fp = openFile_dh (filename, 
"a");
 
 1995         fprintf (fp, 
"----- n2o_row\n");
 
 1997           for (i = 0; i < m; ++i)
 
 1999           fprintf (fp, 
"%i ", 1 + s->n2o_row[i] + beg_row);
 
 2001           if (
id == np_dh - 1)
 
 2011       for (pe = 0; pe < np_dh; ++pe)
 
 2013       MPI_Barrier (comm_dh);
 
 2016           fp = openFile_dh (filename, 
"a");
 
 2019         fprintf (fp, 
"----- o2n_col\n");
 
 2021           for (i = 0; i < m; ++i)
 
 2023           fprintf (fp, 
"%i ", 1 + s->o2n_col[i] + beg_row);
 
 2027           if (myid_dh == np_dh - 1)
 
 2044 #define __FUNC__ "find_bdry_nodes_seq_private" 
 2048   START_FUNC_DH 
int i, j, row, blocks = s->blocks;
 
 2051   tmp = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 2053   for (i = 0; i < m; ++i)
 
 2059   for (i = 0; i < blocks; ++i)
 
 2061       int beg_row = s->beg_row[i];
 
 2062       int end_row = beg_row + s->row_count[i];
 
 2064       for (row = beg_row; row < end_row; ++row)
 
 2066       bool isBdry = 
false;
 
 2068       EuclidGetRow (A, row, &len, &cval, NULL);
 
 2071       for (j = 0; j < len; ++j)
 
 2075           if (col < beg_row || col >= end_row)
 
 2083       EuclidRestoreRow (A, row, &len, &cval, NULL);
 
 2091   for (i = 0; i < blocks; ++i)
 
 2093       int beg_row = s->beg_row[i];
 
 2094       int end_row = beg_row + s->row_count[i];
 
 2096       for (row = beg_row; row < end_row; ++row)
 
 2101       s->bdry_count[i] = ct;
 
 2107   for (i = 0; i < blocks; ++i)
 
 2109       int beg_row = s->beg_row[i];
 
 2110       int end_row = beg_row + s->row_count[i];
 
 2111       int interiorIDX = beg_row;
 
 2112       int bdryIDX = end_row - s->bdry_count[i];
 
 2114       for (row = beg_row; row < end_row; ++row)
 
 2118           s->o2n_col[row] = bdryIDX++;
 
 2122           s->o2n_col[row] = interiorIDX++;
 
 2128   invert_perm (m, s->o2n_col, s->n2o_row);
 
 2136 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph" 
 2140   START_FUNC_DH 
if (myid_dh == 0)
 
 2145            "\n-----------------------------------------------------\n");
 
 2146       fprintf (fp, 
"SubdomainGraph, and coloring and ordering information\n");
 
 2147       fprintf (fp, 
"-----------------------------------------------------\n");
 
 2148       fprintf (fp, 
"colors used: %i\n", s->colors);
 
 2150       fprintf (fp, 
"o2n ordering vector: ");
 
 2151       for (i = 0; i < s->blocks; ++i)
 
 2152     fprintf (fp, 
"%i ", s->o2n_sub[i]);
 
 2154       fprintf (fp, 
"\ncoloring vector (node, color): \n");
 
 2155       for (i = 0; i < s->blocks; ++i)
 
 2156     fprintf (fp, 
"  %i, %i\n", i, s->colorVec[i]);
 
 2159       fprintf (fp, 
"Adjacency lists:\n");
 
 2161       for (i = 0; i < s->blocks; ++i)
 
 2163       fprintf (fp, 
"   P_%i :: ", i);
 
 2164       for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
 
 2166           fprintf (fp, 
"%i ", s->adj[j]);
 
 2170       fprintf (fp, 
"-----------------------------------------------------\n");
 
 2176 #define __FUNC__ "adjust_matrix_perms_private" 
 2180   START_FUNC_DH 
int i, j, blocks = s->blocks;
 
 2181   int *o2n = s->o2n_col;
 
 2183   for (i = 0; i < blocks; ++i)
 
 2185       int beg_row = s->beg_row[i];
 
 2186       int end_row = beg_row + s->row_count[i];
 
 2187       int adjust = s->beg_rowP[i] - s->beg_row[i];
 
 2188       for (j = beg_row; j < end_row; ++j)
 
 2192   invert_perm (m, s->o2n_col, s->n2o_row);
 
 2197 #define __FUNC__ "SubdomainGraph_dhPrintRatios" 
 2201   START_FUNC_DH 
int i;
 
 2213       fprintf (fp, 
"Subdomain interior/boundary node ratios\n");
 
 2214       fprintf (fp, 
"---------------------------------------\n");
 
 2217       for (i = 0; i < blocks; ++i)
 
 2219       if (s->bdry_count[i] == 0)
 
 2226         (double) (s->row_count[i] -
 
 2227               s->bdry_count[i]) / (double) s->bdry_count[i];
 
 2232       shellSort_float (blocks, ratio);
 
 2238       for (i = 0; i < blocks; ++i)
 
 2240           fprintf (fp, 
"%0.2g  ", ratio[i]);
 
 2251       fprintf (fp, 
"10 smallest ratios: ");
 
 2252       for (i = 0; i < 10; ++i)
 
 2254           fprintf (fp, 
"%0.2g  ", ratio[i]);
 
 2257       fprintf (fp, 
"10 largest ratios:  ");
 
 2259         int start = blocks - 6, stop = blocks - 1;
 
 2260         for (i = start; i < stop; ++i)
 
 2262         fprintf (fp, 
"%0.2g  ", ratio[i]);
 
 2273 #define __FUNC__ "SubdomainGraph_dhPrintStats" 
 2277   START_FUNC_DH 
double *timing = sg->timing;
 
 2279   fprintf_dh (fp, 
"\nSubdomainGraph timing report\n");
 
 2280   fprintf_dh (fp, 
"-----------------------------\n");
 
 2281   fprintf_dh (fp, 
"total setup time: %0.2f\n", timing[TOTAL_SGT]);
 
 2282   fprintf_dh (fp, 
"  find neighbors in subdomain graph: %0.2f\n",
 
 2283           timing[FIND_NABORS_SGT]);
 
 2284   fprintf_dh (fp, 
"  locally order interiors and bdry:  %0.2f\n",
 
 2285           timing[ORDER_BDRY_SGT]);
 
 2286   fprintf_dh (fp, 
"  form and color subdomain graph:    %0.2f\n",
 
 2287           timing[FORM_GRAPH_SGT]);
 
 2288   fprintf_dh (fp, 
"  exchange bdry permutations:        %0.2f\n",
 
 2289           timing[EXCHANGE_PERMS_SGT]);
 
 2290   fprintf_dh (fp, 
"  everything else (should be small): %0.2f\n",
 
 2291           timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
 
 2292                    timing[ORDER_BDRY_SGT] +
 
 2293                    timing[FORM_GRAPH_SGT] +
 
 2294                    timing[EXCHANGE_PERMS_SGT]));