43 #include "mat_dh_private.h" 
   44 #include "Parser_dh.h" 
   45 #include "Hash_i_dh.h" 
   50 #define IS_UPPER_TRI 97 
   51 #define IS_LOWER_TRI 98 
   53 static int isTriangular (
int m, 
int *rp, 
int *cval);
 
   58 static void mat_par_read_allocate_private (
Mat_dh * Aout, 
int n,
 
   59                        int *rowLengths, 
int *rowToBlock);
 
   64 void mat_partition_private (
Mat_dh A, 
int blocks, 
int *o2n_row,
 
   68 static void convert_triples_to_scr_private (
int m, 
int nz,
 
   69                         int *I, 
int *J, 
double *A,
 
   70                         int *rp, 
int *cval, 
double *aval);
 
   74 #define __FUNC__ "mat_dh_print_graph_private" 
   76 mat_dh_print_graph_private (
int m, 
int beg_row, 
int *rp, 
int *cval,
 
   77                 double *aval, 
int *n2o, 
int *o2n, 
Hash_i_dh hash,
 
   80   START_FUNC_DH 
int i, j, row, col;
 
   82   bool private_n2o = 
false;
 
   83   bool private_hash = 
false;
 
   88       create_nat_ordering_private (m, &n2o);
 
   90       create_nat_ordering_private (m, &o2n);
 
   97       Hash_i_dhCreate (&hash, -1);
 
  101   for (i = 0; i < m; ++i)
 
  104       for (j = rp[row]; j < rp[row + 1]; ++j)
 
  107       if (col < beg_row || col >= beg_row + m)
 
  112           tmp = Hash_i_dhLookup (hash, col);
 
  117                "beg_row= %i  m= %i; nonlocal column= %i not in hash table",
 
  119           SET_V_ERROR (msgBuf_dh);
 
  139       fprintf (fp, 
"%i %i %g\n", 1 + row + beg_row, 1 + col, val);
 
  145       destroy_nat_ordering_private (n2o);
 
  147       destroy_nat_ordering_private (o2n);
 
  153       Hash_i_dhDestroy (hash);
 
  163 #define __FUNC__ "mat_dh_print_graph_private" 
  165 mat_dh_print_graph_private (
int m, 
int beg_row, 
int *rp, 
int *cval,
 
  166                 double *aval, 
int *n2o, 
int *o2n, 
Hash_i_dh hash,
 
  169   START_FUNC_DH 
int i, j, row, col;
 
  170   bool private_n2o = 
false;
 
  171   bool private_hash = 
false;
 
  174   work = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
  180       create_nat_ordering_private (m, &n2o);
 
  182       create_nat_ordering_private (m, &o2n);
 
  189       Hash_i_dhCreate (&hash, -1);
 
  193   for (i = 0; i < m; ++i)
 
  195       for (j = 0; j < m; ++j)
 
  198       for (j = rp[row]; j < rp[row + 1]; ++j)
 
  203       if (col >= beg_row || col < beg_row + m)
 
  213           tmp = Hash_i_dhLookup (hash, col);
 
  218                "beg_row= %i  m= %i; nonlocal column= %i not in hash table",
 
  220           SET_V_ERROR (msgBuf_dh);
 
  231       for (j = 0; j < m; ++j)
 
  247       destroy_nat_ordering_private (n2o);
 
  249       destroy_nat_ordering_private (o2n);
 
  255       Hash_i_dhDestroy (hash);
 
  267 #define __FUNC__ "create_nat_ordering_private" 
  269 create_nat_ordering_private (
int m, 
int **p)
 
  271   START_FUNC_DH 
int *tmp, i;
 
  273   tmp = *p = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
  275   for (i = 0; i < m; ++i)
 
  282 #define __FUNC__ "destroy_nat_ordering_private" 
  284 destroy_nat_ordering_private (
int *p)
 
  286   START_FUNC_DH FREE_DH (p);
 
  292 #define __FUNC__ "invert_perm" 
  294 invert_perm (
int m, 
int *pIN, 
int *pOUT)
 
  298   for (i = 0; i < m; ++i)
 
  306 #define __FUNC__ "mat_dh_print_csr_private" 
  308 mat_dh_print_csr_private (
int m, 
int *rp, 
int *cval, 
double *aval, FILE * fp)
 
  310   START_FUNC_DH 
int i, nz = rp[m];
 
  313   fprintf (fp, 
"%i %i\n", m, rp[m]);
 
  316   for (i = 0; i <= m; ++i)
 
  317     fprintf (fp, 
"%i ", rp[i]);
 
  321   for (i = 0; i < nz; ++i)
 
  322     fprintf (fp, 
"%i ", cval[i]);
 
  326   for (i = 0; i < nz; ++i)
 
  327     fprintf (fp, 
"%1.19e ", aval[i]);
 
  335 #define __FUNC__ "mat_dh_read_csr_private" 
  337 mat_dh_read_csr_private (
int *mOUT, 
int **rpOUT, 
int **cvalOUT,
 
  338              double **avalOUT, FILE * fp)
 
  340   START_FUNC_DH 
int i, m, nz, items;
 
  345   items = fscanf (fp, 
"%d %d", &m, &nz);
 
  348       SET_V_ERROR (
"failed to read header");
 
  352       printf (
"mat_dh_read_csr_private:: m= %i  nz= %i\n", m, nz);
 
  356   rp = *rpOUT = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
  358   cval = *cvalOUT = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
  360   aval = *avalOUT = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
  364   for (i = 0; i <= m; ++i)
 
  366       items = fscanf (fp, 
"%d", &(rp[i]));
 
  369       sprintf (msgBuf_dh, 
"failed item %i of %i in rp block", i, m + 1);
 
  370       SET_V_ERROR (msgBuf_dh);
 
  375   for (i = 0; i < nz; ++i)
 
  377       items = fscanf (fp, 
"%d", &(cval[i]));
 
  380       sprintf (msgBuf_dh, 
"failed item %i of %i in cval block", i, m + 1);
 
  381       SET_V_ERROR (msgBuf_dh);
 
  386   for (i = 0; i < nz; ++i)
 
  388       items = fscanf (fp, 
"%lg", &(aval[i]));
 
  391       sprintf (msgBuf_dh, 
"failed item %i of %i in aval block", i, m + 1);
 
  392       SET_V_ERROR (msgBuf_dh);
 
  401 #define __FUNC__ "mat_dh_read_triples_private" 
  403 mat_dh_read_triples_private (
int ignore, 
int *mOUT, 
int **rpOUT,
 
  404                  int **cvalOUT, 
double **avalOUT, FILE * fp)
 
  406   START_FUNC_DH 
int m, n, nz, items, i, j;
 
  408   int *cval, *rp, *I, *J;
 
  414   if (ignore && myid_dh == 0)
 
  417     (
"mat_dh_read_triples_private:: ignoring following header lines:\n");
 
  419     (
"--------------------------------------------------------------\n");
 
  420       for (i = 0; i < ignore; ++i)
 
  422       fgets (junk, MAX_JUNK, fp);
 
  426     (
"--------------------------------------------------------------\n");
 
  427       if (fgetpos (fp, &fpos))
 
  428     SET_V_ERROR (
"fgetpos failed!");
 
  429       printf (
"\nmat_dh_read_triples_private::1st two non-ignored lines:\n");
 
  431     (
"--------------------------------------------------------------\n");
 
  432       for (i = 0; i < 2; ++i)
 
  434       fgets (junk, MAX_JUNK, fp);
 
  438     (
"--------------------------------------------------------------\n");
 
  439       if (fsetpos (fp, &fpos))
 
  440     SET_V_ERROR (
"fsetpos failed!");
 
  451       items = fscanf (fp, 
"%d %d %lg", &i, &j, &v);
 
  465       printf (
"mat_dh_read_triples_private: m= %i  nz= %i\n", m, nz);
 
  471   for (i = 0; i < ignore; ++i)
 
  473       fgets (junk, MAX_JUNK, fp);
 
  479       sprintf (msgBuf_dh, 
"matrix is not square; row= %i, cols= %i", m, n);
 
  480       SET_V_ERROR (msgBuf_dh);
 
  486   rp = *rpOUT = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
  488   cval = *cvalOUT = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
  490   aval = *avalOUT = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
  493   I = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
  495   J = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
  497   A = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
  503       items = fscanf (fp, 
"%d %d %lg", &i, &j, &v);
 
  515   convert_triples_to_scr_private (m, nz, I, J, A, rp, cval, aval);
 
  521     type = isTriangular (m, rp, cval);
 
  523     if (type == IS_UPPER_TRI)
 
  525     printf (
"CAUTION: matrix is upper triangular; converting to full\n");
 
  527     else if (type == IS_LOWER_TRI)
 
  529     printf (
"CAUTION: matrix is lower triangular; converting to full\n");
 
  532     if (type == IS_UPPER_TRI || type == IS_LOWER_TRI)
 
  534     make_full_private (m, &rp, &cval, &aval);
 
  553 #define __FUNC__ "convert_triples_to_scr_private" 
  555 convert_triples_to_scr_private (
int m, 
int nz, 
int *I, 
int *J, 
double *A,
 
  556                 int *rp, 
int *cval, 
double *aval)
 
  561   rowCounts = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
  563   for (i = 0; i < m; ++i)
 
  567   for (i = 0; i < nz; ++i)
 
  575   for (i = 1; i <= m; ++i)
 
  577       rp[i] = rp[i - 1] + rowCounts[i - 1];
 
  579   memcpy (rowCounts, rp, (m + 1) * 
sizeof (
int));
 
  582   for (i = 0; i < nz; ++i)
 
  587       int idx = rowCounts[row];
 
  605 void fix_diags_private (
Mat_dh A);
 
  606 void insert_missing_diags_private (
Mat_dh A);
 
  609 #define __FUNC__ "readMat" 
  611 readMat (
Mat_dh * Aout, 
char *ft, 
char *fn, 
int ignore)
 
  613   START_FUNC_DH 
bool makeStructurallySymmetric;
 
  617   makeStructurallySymmetric =
 
  618     Parser_dhHasSwitch (parser_dh, 
"-makeSymmetric");
 
  619   fixDiags = Parser_dhHasSwitch (parser_dh, 
"-fixDiags");
 
  623       SET_V_ERROR (
"passed NULL filename; can't open for reading!");
 
  626   if (!strcmp (ft, 
"csr"))
 
  628       Mat_dhReadCSR (Aout, fn);
 
  632   else if (!strcmp (ft, 
"trip"))
 
  634       Mat_dhReadTriples (Aout, ignore, fn);
 
  638   else if (!strcmp (ft, 
"ebin"))
 
  640       Mat_dhReadBIN (Aout, fn);
 
  644   else if (!strcmp (ft, 
"petsc"))
 
  646       sprintf (msgBuf_dh, 
"must recompile Euclid using petsc mode!");
 
  647       SET_V_ERROR (msgBuf_dh);
 
  652       sprintf (msgBuf_dh, 
"unknown filetype: -ftin %s", ft);
 
  653       SET_V_ERROR (msgBuf_dh);
 
  656   if (makeStructurallySymmetric)
 
  658       printf (
"\npadding with zeros to make structurally symmetric\n");
 
  659       Mat_dhMakeStructurallySymmetric (*Aout);
 
  665       SET_V_ERROR (
"row count = 0; something's wrong!");
 
  670       fix_diags_private (*Aout);
 
  678 #define __FUNC__ "fix_diags_private" 
  680 fix_diags_private (
Mat_dh A)
 
  682   START_FUNC_DH 
int i, j, m = A->m, *rp = A->rp, *cval = A->cval;
 
  683   double *aval = A->aval;
 
  684   bool insertDiags = 
false;
 
  687   for (i = 0; i < m; ++i)
 
  689       bool isMissing = 
true;
 
  690       for (j = rp[i]; j < rp[i + 1]; ++j)
 
  707       insert_missing_diags_private (A);
 
  715   for (i = 0; i < m; ++i)
 
  718       for (j = rp[i]; j < rp[i + 1]; ++j)
 
  720       sum = MAX (sum, fabs (aval[j]));
 
  722       for (j = rp[i]; j < rp[i + 1]; ++j)
 
  735 #define __FUNC__ "insert_missing_diags_private" 
  737 insert_missing_diags_private (
Mat_dh A)
 
  739   START_FUNC_DH 
int *RP = A->rp, *CVAL = A->cval, m = A->m;
 
  741   double *AVAL = A->aval, *aval;
 
  742   int i, j, nz = RP[m] + m;
 
  745   rp = A->rp = (
int *) MALLOC_DH ((1 + m) * 
sizeof (int));
 
  747   cval = A->cval = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
  749   aval = A->aval = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
  753   for (i = 0; i < m; ++i)
 
  755       bool isMissing = 
true;
 
  756       for (j = RP[i]; j < RP[i + 1]; ++j)
 
  782 #define __FUNC__ "readVec" 
  784 readVec (
Vec_dh * bout, 
char *ft, 
char *fn, 
int ignore)
 
  786   START_FUNC_DH * bout = NULL;
 
  790       SET_V_ERROR (
"passed NULL filename; can't open for reading!");
 
  793   if (!strcmp (ft, 
"csr") || !strcmp (ft, 
"trip"))
 
  795       Vec_dhRead (bout, ignore, fn);
 
  799   else if (!strcmp (ft, 
"ebin"))
 
  801       Vec_dhReadBIN (bout, fn);
 
  805   else if (!strcmp (ft, 
"petsc"))
 
  807       sprintf (msgBuf_dh, 
"must recompile Euclid using petsc mode!");
 
  808       SET_V_ERROR (msgBuf_dh);
 
  813       sprintf (msgBuf_dh, 
"unknown filetype: -ftin %s", ft);
 
  814       SET_V_ERROR (msgBuf_dh);
 
  821 #define __FUNC__ "writeMat" 
  823 writeMat (
Mat_dh Ain, 
char *ft, 
char *fn)
 
  825   START_FUNC_DH 
if (fn == NULL)
 
  827       SET_V_ERROR (
"passed NULL filename; can't open for writing!");
 
  830   if (!strcmp (ft, 
"csr"))
 
  832       Mat_dhPrintCSR (Ain, NULL, fn);
 
  836   else if (!strcmp (ft, 
"trip"))
 
  838       Mat_dhPrintTriples (Ain, NULL, fn);
 
  842   else if (!strcmp (ft, 
"ebin"))
 
  844       Mat_dhPrintBIN (Ain, NULL, fn);
 
  849   else if (!strcmp (ft, 
"petsc"))
 
  851       sprintf (msgBuf_dh, 
"must recompile Euclid using petsc mode!");
 
  852       SET_V_ERROR (msgBuf_dh);
 
  857       sprintf (msgBuf_dh, 
"unknown filetype: -ftout %s", ft);
 
  858       SET_V_ERROR (msgBuf_dh);
 
  864 #define __FUNC__ "writeVec" 
  866 writeVec (
Vec_dh bin, 
char *ft, 
char *fn)
 
  868   START_FUNC_DH 
if (fn == NULL)
 
  870       SET_V_ERROR (
"passed NULL filename; can't open for writing!");
 
  873   if (!strcmp (ft, 
"csr") || !strcmp (ft, 
"trip"))
 
  875       Vec_dhPrint (bin, NULL, fn);
 
  879   else if (!strcmp (ft, 
"ebin"))
 
  881       Vec_dhPrintBIN (bin, NULL, fn);
 
  885   else if (!strcmp (ft, 
"petsc"))
 
  887       sprintf (msgBuf_dh, 
"must recompile Euclid using petsc mode!");
 
  888       SET_V_ERROR (msgBuf_dh);
 
  893       sprintf (msgBuf_dh, 
"unknown filetype: -ftout %s", ft);
 
  894       SET_V_ERROR (msgBuf_dh);
 
  900 #define __FUNC__ "isTriangular" 
  902 isTriangular (
int m, 
int *rp, 
int *cval)
 
  904   START_FUNC_DH 
int row, j;
 
  906   bool type_lower = 
false, type_upper = 
false;
 
  910       SET_ERROR (-1, 
"only implemented for a single cpu");
 
  913   for (row = 0; row < m; ++row)
 
  915       for (j = rp[row]; j < rp[row + 1]; ++j)
 
  923       if (type_lower && type_upper)
 
  927   if (type_lower && type_upper)
 
  943 static void mat_dh_transpose_reuse_private_private (
bool allocateMem, 
int m,
 
  944                             int *rpIN, 
int *cvalIN,
 
  952 #define __FUNC__ "mat_dh_transpose_reuse_private" 
  954 mat_dh_transpose_reuse_private (
int m,
 
  955                 int *rpIN, 
int *cvalIN, 
double *avalIN,
 
  956                 int *rpOUT, 
int *cvalOUT, 
double *avalOUT)
 
  959     mat_dh_transpose_reuse_private_private (
false, m, rpIN, cvalIN, avalIN,
 
  960                         &rpOUT, &cvalOUT, &avalOUT);
 
  966 #define __FUNC__ "mat_dh_transpose_private" 
  968 mat_dh_transpose_private (
int m, 
int *RP, 
int **rpOUT,
 
  969               int *CVAL, 
int **cvalOUT,
 
  970               double *AVAL, 
double **avalOUT)
 
  973     mat_dh_transpose_reuse_private_private (
true, m, RP, CVAL, AVAL,
 
  974                         rpOUT, cvalOUT, avalOUT);
 
  979 #define __FUNC__ "mat_dh_transpose_private_private" 
  981 mat_dh_transpose_reuse_private_private (
bool allocateMem, 
int m,
 
  982                     int *RP, 
int *CVAL, 
double *AVAL,
 
  983                     int **rpOUT, 
int **cvalOUT,
 
  986   START_FUNC_DH 
int *rp, *cval, *tmp;
 
  987   int i, j, nz = RP[m];
 
  992       rp = *rpOUT = (
int *) MALLOC_DH ((1 + m) * 
sizeof (int));
 
  994       cval = *cvalOUT = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
  998       aval = *avalOUT = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
 1006       if (avalOUT != NULL)
 
 1011   tmp = (
int *) MALLOC_DH ((1 + m) * 
sizeof (int));
 
 1013   for (i = 0; i <= m; ++i)
 
 1016   for (i = 0; i < m; ++i)
 
 1018       for (j = RP[i]; j < RP[i + 1]; ++j)
 
 1024   for (i = 1; i <= m; ++i)
 
 1025     tmp[i] += tmp[i - 1];
 
 1026   memcpy (rp, tmp, (m + 1) * 
sizeof (
int));
 
 1028   if (avalOUT != NULL)
 
 1030       for (i = 0; i < m; ++i)
 
 1032       for (j = RP[i]; j < RP[i + 1]; ++j)
 
 1037           aval[idx] = AVAL[j];
 
 1045       for (i = 0; i < m; ++i)
 
 1047       for (j = RP[i]; j < RP[i + 1]; ++j)
 
 1064 #define __FUNC__ "mat_find_owner" 
 1066 mat_find_owner (
int *beg_rows, 
int *end_rows, 
int index)
 
 1068   START_FUNC_DH 
int pe, owner = -1;
 
 1070   for (pe = 0; pe < np_dh; ++pe)
 
 1072       if (index >= beg_rows[pe] && index < end_rows[pe])
 
 1081       sprintf (msgBuf_dh, 
"failed to find owner for index= %i", index);
 
 1082       SET_ERROR (-1, msgBuf_dh);
 
 1085 END_FUNC_VAL (owner)}
 
 1090 void partition_and_distribute_private (
Mat_dh A, 
Mat_dh * Bout);
 
 1091 void partition_and_distribute_metis_private (
Mat_dh A, 
Mat_dh * Bout);
 
 1094 #define __FUNC__ "readMat_par" 
 1096 readMat_par (
Mat_dh * Aout, 
char *fileType, 
char *fileName, 
int ignore)
 
 1098   START_FUNC_DH 
Mat_dh A = NULL;
 
 1104       readMat (&A, fileType, fileName, ignore);
 
 1115       if (Parser_dhHasSwitch (parser_dh, 
"-metis"))
 
 1117       partition_and_distribute_metis_private (A, Aout);
 
 1122       partition_and_distribute_private (A, Aout);
 
 1127   if (np_dh > 1 && A != NULL)
 
 1134   if (Parser_dhHasSwitch (parser_dh, 
"-printMAT"))
 
 1136       char xname[] = 
"A", *name = xname;
 
 1137       Parser_dhReadString (parser_dh, 
"-printMat", &name);
 
 1138       Mat_dhPrintTriples (*Aout, NULL, name);
 
 1140       printf_dh (
"\n@@@ readMat_par: printed mat to %s\n\n", xname);
 
 1148 #define __FUNC__ "partition_and_distribute_metis_private" 
 1150 partition_and_distribute_metis_private (
Mat_dh A, 
Mat_dh * Bout)
 
 1152   START_FUNC_DH 
Mat_dh B = NULL;
 
 1155   int *rowLengths = NULL;
 
 1156   int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
 
 1157   int *beg_row = NULL, *row_count = NULL;
 
 1158   MPI_Request *send_req = NULL;
 
 1159   MPI_Request *rcv_req = NULL;
 
 1160   MPI_Status *send_status = NULL;
 
 1161   MPI_Status *rcv_status = NULL;
 
 1163   MPI_Barrier (comm_dh);
 
 1164   printf_dh (
"@@@ partitioning with metis\n");
 
 1169   MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD); 
 
 1172   rowLengths = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1174   rowToBlock = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1180       for (i = 0; i < m; ++i)
 
 1182       rowLengths[i] = tmp[i + 1] - tmp[i];
 
 1185   MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
 
 1194       Mat_dhPartition (A, np_dh, &beg_row, &row_count, &n2o_col, &o2n_row);
 
 1196       Mat_dhPermute (A, n2o_col, &C);
 
 1200       for (i = 0; i < np_dh; ++i)
 
 1202       for (j = beg_row[i]; j < beg_row[i] + row_count[i]; ++j)
 
 1204           rowToBlock[idx++] = i;
 
 1210   MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
 
 1213   mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
 
 1219       int *cval = C->cval, *rp = C->rp;
 
 1220       double *aval = C->aval;
 
 1221       send_req = (MPI_Request *) MALLOC_DH (2 * m * 
sizeof (MPI_Request));
 
 1223       send_status = (MPI_Status *) MALLOC_DH (2 * m * 
sizeof (MPI_Status));
 
 1225       for (i = 0; i < m; ++i)
 
 1227       int owner = rowToBlock[i];
 
 1228       int count = rp[i + 1] - rp[i];
 
 1233           sprintf (msgBuf_dh, 
"row %i of %i is empty!", i + 1, m);
 
 1234           SET_V_ERROR (msgBuf_dh);
 
 1237       MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
 
 1239       MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
 
 1240              comm_dh, send_req + 2 * i + 1);
 
 1246     int *cval = B->cval;
 
 1248     double *aval = B->aval;
 
 1251     rcv_req = (MPI_Request *) MALLOC_DH (2 * m * 
sizeof (MPI_Request));
 
 1253     rcv_status = (MPI_Status *) MALLOC_DH (2 * m * 
sizeof (MPI_Status));
 
 1256     for (i = 0; i < m; ++i)
 
 1260     int count = rp[i + 1] - rp[i];
 
 1263         sprintf (msgBuf_dh, 
"local row %i of %i is empty!", i + 1, m);
 
 1264         SET_V_ERROR (msgBuf_dh);
 
 1267     MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
 
 1269     MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
 
 1270            rcv_req + 2 * i + 1);
 
 1277       MPI_Waitall (m * 2, send_req, send_status);
 
 1279   MPI_Waitall (2 * B->m, rcv_req, rcv_status);
 
 1282   if (rowLengths != NULL)
 
 1284       FREE_DH (rowLengths);
 
 1287   if (o2n_row != NULL)
 
 1292   if (n2o_col != NULL)
 
 1297   if (rowToBlock != NULL)
 
 1299       FREE_DH (rowToBlock);
 
 1302   if (send_req != NULL)
 
 1307   if (rcv_req != NULL)
 
 1312   if (send_status != NULL)
 
 1314       FREE_DH (send_status);
 
 1317   if (rcv_status != NULL)
 
 1319       FREE_DH (rcv_status);
 
 1322   if (beg_row != NULL)
 
 1327   if (row_count != NULL)
 
 1329       FREE_DH (row_count);
 
 1344 #define __FUNC__ "partition_and_distribute_private" 
 1346 partition_and_distribute_private (
Mat_dh A, 
Mat_dh * Bout)
 
 1348   START_FUNC_DH 
Mat_dh B = NULL;
 
 1350   int *rowLengths = NULL;
 
 1351   int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
 
 1352   MPI_Request *send_req = NULL;
 
 1353   MPI_Request *rcv_req = NULL;
 
 1354   MPI_Status *send_status = NULL;
 
 1355   MPI_Status *rcv_status = NULL;
 
 1357   MPI_Barrier (comm_dh);
 
 1362   MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD); 
 
 1365   rowLengths = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1370       for (i = 0; i < m; ++i)
 
 1372       rowLengths[i] = tmp[i + 1] - tmp[i];
 
 1375   MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
 
 1378   rowToBlock = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1383       o2n_row = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1385       mat_partition_private (A, np_dh, o2n_row, rowToBlock);
 
 1390   MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
 
 1393   mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
 
 1399       int *cval = A->cval, *rp = A->rp;
 
 1400       double *aval = A->aval;
 
 1401       send_req = (MPI_Request *) MALLOC_DH (2 * m * 
sizeof (MPI_Request));
 
 1403       send_status = (MPI_Status *) MALLOC_DH (2 * m * 
sizeof (MPI_Status));
 
 1405       for (i = 0; i < m; ++i)
 
 1407       int owner = rowToBlock[i];
 
 1408       int count = rp[i + 1] - rp[i];
 
 1413           sprintf (msgBuf_dh, 
"row %i of %i is empty!", i + 1, m);
 
 1414           SET_V_ERROR (msgBuf_dh);
 
 1417       MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
 
 1419       MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
 
 1420              comm_dh, send_req + 2 * i + 1);
 
 1426     int *cval = B->cval;
 
 1428     double *aval = B->aval;
 
 1431     rcv_req = (MPI_Request *) MALLOC_DH (2 * m * 
sizeof (MPI_Request));
 
 1433     rcv_status = (MPI_Status *) MALLOC_DH (2 * m * 
sizeof (MPI_Status));
 
 1436     for (i = 0; i < m; ++i)
 
 1440     int count = rp[i + 1] - rp[i];
 
 1443         sprintf (msgBuf_dh, 
"local row %i of %i is empty!", i + 1, m);
 
 1444         SET_V_ERROR (msgBuf_dh);
 
 1447     MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
 
 1449     MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
 
 1450            rcv_req + 2 * i + 1);
 
 1457       MPI_Waitall (m * 2, send_req, send_status);
 
 1459   MPI_Waitall (2 * B->m, rcv_req, rcv_status);
 
 1462   if (rowLengths != NULL)
 
 1464       FREE_DH (rowLengths);
 
 1467   if (o2n_row != NULL)
 
 1472   if (n2o_col != NULL)
 
 1477   if (rowToBlock != NULL)
 
 1479       FREE_DH (rowToBlock);
 
 1482   if (send_req != NULL)
 
 1487   if (rcv_req != NULL)
 
 1492   if (send_status != NULL)
 
 1494       FREE_DH (send_status);
 
 1497   if (rcv_status != NULL)
 
 1499       FREE_DH (rcv_status);
 
 1508 #define __FUNC__ "mat_par_read_allocate_private" 
 1510 mat_par_read_allocate_private (
Mat_dh * Aout, 
int n, 
int *rowLengths,
 
 1514   int i, m, nz, beg_row, *rp, idx;
 
 1523   for (i = 0; i < n; ++i)
 
 1525       if (rowToBlock[i] == myid_dh)
 
 1532   for (i = 0; i < n; ++i)
 
 1534       if (rowToBlock[i] < myid_dh)
 
 1537   A->beg_row = beg_row;
 
 1540   A->rp = rp = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
 1547   for (i = 0; i < n; ++i)
 
 1549       if (rowToBlock[i] == myid_dh)
 
 1551       nz += rowLengths[i];
 
 1557   A->cval = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
 1559   A->aval = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
 1565 #define __FUNC__ "mat_partition_private" 
 1567 mat_partition_private (
Mat_dh A, 
int blocks, 
int *o2n_row, 
int *rowToBlock)
 
 1569   START_FUNC_DH 
int i, j, n = A->n;
 
 1570   int rpb = n / blocks;     
 
 1573   while (rpb * blocks < n)
 
 1576   if (rpb * (blocks - 1) == n)
 
 1579       printf_dh (
"adjusted rpb to: %i\n", rpb);
 
 1582   for (i = 0; i < n; ++i)
 
 1588   for (i = 0; i < blocks - 1; ++i)
 
 1590       for (j = 0; j < rpb; ++j)
 
 1592       rowToBlock[idx++] = i;
 
 1599     rowToBlock[idx++] = i;
 
 1606 #define __FUNC__ "make_full_private" 
 1608 make_full_private (
int m, 
int **rpIN, 
int **cvalIN, 
double **avalIN)
 
 1610   START_FUNC_DH 
int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
 
 1611   double *avalNew, *aval = *avalIN;
 
 1612   int nz, *rowCounts = NULL;
 
 1615   rowCounts = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
 1617   for (i = 0; i <= m; ++i)
 
 1620   for (i = 0; i < m; ++i)
 
 1622       for (j = rp[i]; j < rp[i + 1]; ++j)
 
 1625       rowCounts[i + 1] += 1;
 
 1627         rowCounts[col + 1] += 1;
 
 1632   rpNew = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
 1634   for (i = 1; i <= m; ++i)
 
 1635     rowCounts[i] += rowCounts[i - 1];
 
 1636   memcpy (rpNew, rowCounts, (m + 1) * 
sizeof (
int));
 
 1641   cvalNew = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
 1643   avalNew = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
 1645   for (i = 0; i < m; ++i)
 
 1647       for (j = rp[i]; j < rp[i + 1]; ++j)
 
 1650       double val = aval[j];
 
 1652       cvalNew[rowCounts[i]] = col;
 
 1653       avalNew[rowCounts[i]] = val;
 
 1657           cvalNew[rowCounts[col]] = i;
 
 1658           avalNew[rowCounts[col]] = val;
 
 1659           rowCounts[col] += 1;
 
 1664   if (rowCounts != NULL)
 
 1666       FREE_DH (rowCounts);
 
 1681 #define __FUNC__ "make_symmetric_private" 
 1683 make_symmetric_private (
int m, 
int **rpIN, 
int **cvalIN, 
double **avalIN)
 
 1685   START_FUNC_DH 
int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
 
 1686   double *avalNew, *aval = *avalIN;
 
 1687   int nz, *rowCounts = NULL;
 
 1688   int *rpTrans, *cvalTrans;
 
 1691   int nzCount = 0, transCount = 0;
 
 1693   mat_dh_transpose_private (m, rp, &rpTrans,
 
 1694                 cval, &cvalTrans, aval, &avalTrans);
 
 1698   work = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1700   for (i = 0; i < m; ++i)
 
 1702   rowCounts = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
 1704   for (i = 0; i <= m; ++i)
 
 1707   for (i = 0; i < m; ++i)
 
 1710       for (j = rp[i]; j < rp[i + 1]; ++j)
 
 1717       for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
 
 1719       int col = cvalTrans[j];
 
 1726       rowCounts[i + 1] = ct;
 
 1732   if (transCount == 0)
 
 1735     (
"make_symmetric_private: matrix is already structurally symmetric!\n");
 
 1738       FREE_DH (cvalTrans);
 
 1740       FREE_DH (avalTrans);
 
 1744       FREE_DH (rowCounts);
 
 1746       goto END_OF_FUNCTION;
 
 1754       printf (
"original nz= %i\n", rp[m]);
 
 1755       printf (
"zeros added= %i\n", transCount);
 
 1757     (
"ratio of added zeros to nonzeros = %0.2f (assumes all original entries were nonzero!)\n",
 
 1758      (
double) transCount / (
double) (nzCount));
 
 1762   rpNew = (
int *) MALLOC_DH ((m + 1) * 
sizeof (int));
 
 1764   for (i = 1; i <= m; ++i)
 
 1765     rowCounts[i] += rowCounts[i - 1];
 
 1766   memcpy (rpNew, rowCounts, (m + 1) * 
sizeof (
int));
 
 1767   for (i = 0; i < m; ++i)
 
 1772   cvalNew = (
int *) MALLOC_DH (nz * 
sizeof (
int));
 
 1774   avalNew = (
double *) MALLOC_DH (nz * 
sizeof (
double));
 
 1776   for (i = 0; i < m; ++i)
 
 1779   for (i = 0; i < m; ++i)
 
 1781       for (j = rp[i]; j < rp[i + 1]; ++j)
 
 1784       double val = aval[j];
 
 1786       cvalNew[rowCounts[i]] = col;
 
 1787       avalNew[rowCounts[i]] = val;
 
 1790       for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
 
 1792       int col = cvalTrans[j];
 
 1795           cvalNew[rowCounts[i]] = col;
 
 1796           avalNew[rowCounts[i]] = 0.0;
 
 1802   if (rowCounts != NULL)
 
 1804       FREE_DH (rowCounts);
 
 1815   FREE_DH (cvalTrans);
 
 1819   FREE_DH (avalTrans);
 
 1830 #define __FUNC__ "profileMat" 
 1834   START_FUNC_DH 
Mat_dh B = NULL;
 
 1840   bool isStructurallySymmetric = 
true;
 
 1841   bool isNumericallySymmetric = 
true;
 
 1842   bool is_Triangular = 
false;
 
 1843   int zeroCount = 0, nz;
 
 1847       SET_V_ERROR (
"only for a single MPI task!");
 
 1852   printf (
"\nYY----------------------------------------------------\n");
 
 1856   for (i = 0; i < nz; ++i)
 
 1858       if (A->aval[i] == 0)
 
 1861   printf (
"YY  row count:      %i\n", m);
 
 1862   printf (
"YY  nz count:       %i\n", nz);
 
 1863   printf (
"YY  explicit zeros: %i (entire matrix)\n", zeroCount);
 
 1867     int m_diag = 0, z_diag = 0;
 
 1868     for (i = 0; i < m; ++i)
 
 1871     for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
 
 1873         int col = A->cval[j];
 
 1878         double val = A->aval[j];
 
 1890     printf (
"YY  missing diagonals:   %i\n", m_diag);
 
 1891     printf (
"YY  explicit zero diags: %i\n", z_diag);
 
 1895   type = isTriangular (m, A->rp, A->cval);
 
 1897   if (type == IS_UPPER_TRI)
 
 1899       printf (
"YY  matrix is upper triangular\n");
 
 1900       is_Triangular = 
true;
 
 1901       goto END_OF_FUNCTION;
 
 1903   else if (type == IS_LOWER_TRI)
 
 1905       printf (
"YY  matrix is lower triangular\n");
 
 1906       is_Triangular = 
true;
 
 1907       goto END_OF_FUNCTION;
 
 1912     int unz = 0, lnz = 0;
 
 1913     for (i = 0; i < m; ++i)
 
 1915     for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
 
 1917         int col = A->cval[j];
 
 1924     printf (
"YY  strict upper triangular nonzeros: %i\n", unz);
 
 1925     printf (
"YY  strict lower triangular nonzeros: %i\n", lnz);
 
 1931   Mat_dhTranspose (A, &B);
 
 1936   work1 = (
int *) MALLOC_DH (m * 
sizeof (
int));
 
 1938   work2 = (
double *) MALLOC_DH (m * 
sizeof (
double));
 
 1940   for (i = 0; i < m; ++i)
 
 1942   for (i = 0; i < m; ++i)
 
 1945   for (i = 0; i < m; ++i)
 
 1947       for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
 
 1949       int col = A->cval[j];
 
 1950       double val = A->aval[j];
 
 1954       for (j = B->rp[i]; j < B->rp[i + 1]; ++j)
 
 1956       int col = B->cval[j];
 
 1957       double val = B->aval[j];
 
 1959       if (work1[col] != i)
 
 1961           isStructurallySymmetric = 
false;
 
 1962           isNumericallySymmetric = 
false;
 
 1963           goto END_OF_FUNCTION;
 
 1965       if (work2[col] != val)
 
 1967           isNumericallySymmetric = 
false;
 
 1978       printf (
"YY  matrix is NOT triangular\n");
 
 1979       if (isStructurallySymmetric)
 
 1981       printf (
"YY  matrix IS structurally symmetric\n");
 
 1985       printf (
"YY  matrix is NOT structurally symmetric\n");
 
 1987       if (isNumericallySymmetric)
 
 1989       printf (
"YY  matrix IS numerically symmetric\n");
 
 1993       printf (
"YY  matrix is NOT numerically symmetric\n");
 
 2013   printf (
"YY----------------------------------------------------\n");