56 int *end_rows,
int reqlen,
57 int *reqind,
int *outlist);
63 not used anyplace, I think;
65 expansion ?[mar 21, 2 K + 1]
66 static
void Mat_dhAllocate_getRow_private (
Mat_dh A);
72 #define __FUNC__ "Mat_dhCreate"
82 if (
myid_dh == 0 && commsOnly ==
true)
129 #define __FUNC__ "Mat_dhDestroy"
142 if (mat->
len != NULL)
147 if (mat->
cval != NULL)
152 if (mat->
aval != NULL)
157 if (mat->
diag != NULL)
162 if (mat->
fill != NULL)
185 MPI_Request_free (&mat->
recv_req[i]);
187 MPI_Request_free (&mat->
send_req[i]);
224 if (mat->
numb != NULL)
236 #define __FUNC__ "Mat_dhMatVecSetDown"
247 #define __FUNC__ "Mat_dhMatVecSetup"
258 int *outlist, *inlist;
263 int lastLocal = firstLocal +
m;
264 int *beg_rows, *end_rows;
287 MPI_Allgather (&firstLocal, 1, MPI_INT, beg_rows, 1, MPI_INT,
293 MPI_Allgather (&lastLocal, 1, MPI_INT, end_rows, 1, MPI_INT,
302 for (i = 0; i <
np_dh; ++i)
321 inlist[0] = outlist[0];
326 MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT,
comm_dh);
334 for (row = 0; row <
m; row++)
336 int len = rp[row + 1] - rp[row];
337 int *ind =
cval + rp[row];
358 #define __FUNC__ "setup_matvec_receives_private"
361 int reqlen,
int *reqind,
int *outlist)
373 for (i = 0; i < reqlen; i = j)
380 for (j = i + 1; j < reqlen; j++)
383 if (reqind[j] < beg_rows[this_pe] || reqind[j] > end_rows[this_pe])
389 MPI_Isend (&reqind[i], j - i, MPI_INT, this_pe, 444,
comm_dh,
392 ierr = MPI_Request_free (&request);
396 outlist[this_pe] = j - i;
399 MPI_Recv_init (&mat->
recvbuf[i + m], j - i, MPI_DOUBLE, this_pe, 555,
411 #define __FUNC__ "setup_matvec_sends_private"
416 MPI_Request *requests;
417 MPI_Status *statuses;
419 requests = (MPI_Request *)
MALLOC_DH (
np_dh *
sizeof (MPI_Request));
421 statuses = (MPI_Status *)
MALLOC_DH (
np_dh *
sizeof (MPI_Status));
426 for (i = 0; i <
np_dh; i++)
427 sendlen += inlist[i];
436 for (i = 0; i <
np_dh; i++)
447 MPI_Send_init (&mat->
sendbuf[j], inlist[i], MPI_DOUBLE, i, 555,
460 ierr = MPI_Waitall (mat->
num_send, requests, statuses);
464 for (i = 0; i < mat->
sendlen; i++)
474 #define __FUNC__ "Mat_dhMatVec"
486 int ierr, i, row,
m = mat->
m;
493 double t1 = 0, t2 = 0, t3 = 0, t4 = 0;
504 sendbuf[i] = x[sendind[i]];
533 for (i = 0; i <
m; i++)
537 for (row = 0; row <
m; row++)
539 int len = rp[row + 1] - rp[row];
540 int *ind =
cval + rp[row];
541 double *val = aval + rp[row];
543 for (i = 0; i <
len; i++)
545 temp += (val[i] * recvbuf[ind[i]]);
562 #define __FUNC__ "Mat_dhMatVec_omp"
573 double t1 = 0, t2 = 0, t3 = 0, t4 = 0, tx = 0;
583 sendbuf[i] = x[sendind[i]];
607 for (i = 0; i <
m; i++)
618 for (row = 0; row <
m; row++)
620 len = rp[row + 1] - rp[row];
621 ind =
cval + rp[row];
622 val = aval + rp[row];
624 for (i = 0; i <
len; i++)
626 temp += (val[i] * recvbuf[ind[i]]);
643 #define __FUNC__ "Mat_dhMatVec_uni_omp"
650 double t1 = 0, t2 = 0;
659 for (row = 0; row <
m; row++)
661 int len = rp[row + 1] - rp[row];
662 int *ind =
cval + rp[row];
663 double *val = aval + rp[row];
665 for (i = 0; i <
len; i++)
667 temp += (val[i] * x[ind[i]]);
684 #define __FUNC__ "Mat_dhMatVec_uni"
691 double t1 = 0, t2 = 0;
697 for (row = 0; row <
m; row++)
699 int len = rp[row + 1] - rp[row];
700 int *ind =
cval + rp[row];
701 double *val = aval + rp[row];
703 for (i = 0; i <
len; i++)
705 temp += (val[i] * x[ind[i]]);
721 #define __FUNC__ "Mat_dhReadNz"
727 ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM,
comm_dh);
736 #define __FUNC__ "Mat_dhAllocate_getRow_private"
738 Mat_dhAllocate_getRow_private (
Mat_dh A)
744 for (i = 0; i <
m; ++i)
774 #define __FUNC__ "Mat_dhZeroTiming"
789 #define __FUNC__ "Mat_dhReduceTiming"
805 #define __FUNC__ "Mat_dhPermute"
810 int i, j, *RP = A->
rp, *CVAL = A->
cval;
811 int *o2n, *
rp, *
cval, m = A->
m, nz = RP[
m];
820 o2n = (
int *)
MALLOC_DH (m *
sizeof (
int));
822 for (i = 0; i <
m; ++i)
826 rp = B->rp = (
int *)
MALLOC_DH ((m + 1) *
sizeof (int));
828 cval = B->cval = (
int *)
MALLOC_DH (nz *
sizeof (
int));
830 aval = B->aval = (
double *)
MALLOC_DH (nz *
sizeof (
double));
835 for (i = 0; i <
m; ++i)
838 rp[i + 1] = RP[oldRow + 1] - RP[oldRow];
840 for (i = 1; i <=
m; ++i)
841 rp[i] = rp[i] + rp[i - 1];
843 for (i = 0; i <
m; ++i)
847 for (j = RP[oldRow]; j < RP[oldRow + 1]; ++j)
849 cval[idx] = o2n[CVAL[j]];
866 #define __FUNC__ "Mat_dhPrintGraph"
878 for (pe = 0; pe <
np_dh; ++pe)
887 A->
aval, NULL, NULL, NULL, fp);
904 #define __FUNC__ "Mat_dhPrintRows"
925 "\n----- A, unpermuted ------------------------------------\n");
926 for (i = 0; i <
m; ++i)
928 fprintf (fp,
"%i :: ", 1 + i + beg_row);
929 for (j = rp[i]; j < rp[i + 1]; ++j)
933 fprintf (fp,
"%i ", 1 +
cval[j]);
937 fprintf (fp,
"%i,%g ; ", 1 +
cval[j], aval[j]);
952 for (i = 0; i < sg->
blocks; ++i)
960 int end_row = beg_row + sg->
row_count[oldBlock];
964 "\n----- A, permuted, single mpi task ------------------\n");
965 fprintf (fp,
"---- new subdomain: %i; old subdomain: %i\n", i,
967 fprintf (fp,
" old beg_row: %i; new beg_row: %i\n",
969 fprintf (fp,
" local rows in this block: %i\n",
971 fprintf (fp,
" bdry rows in this block: %i\n",
973 fprintf (fp,
" 1st bdry row= %i \n",
976 for (oldRow = beg_row; oldRow < end_row; ++oldRow)
981 fprintf (fp,
"%3i (old= %3i) :: ", idx, 1 + oldRow);
986 for (k = 0; k <
len; ++k)
1017 for (i = 0; i <
m; ++i)
1019 int row = n2o_row[i];
1020 fprintf (fp,
"%3i (old= %3i) :: ", 1 + i + beg_rowP,
1022 for (j = rp[row]; j < rp[row + 1]; ++j)
1028 if (col >= beg_row && col < beg_row + m)
1030 col = o2n_col[col -
beg_row] + beg_rowP;
1042 "nonlocal column= %i not in hash table",
1054 fprintf (fp,
"%i ", 1 + col);
1058 fprintf (fp,
"%i,%g ; ", 1 + col, aval[j]);
1069 #define __FUNC__ "Mat_dhPrintTriples"
1093 for (pe = 0; pe <
np_dh; ++pe)
1109 for (i = 0; i <
m; ++i)
1111 for (j = rp[i]; j < rp[i + 1]; ++j)
1115 fprintf (fp,
"%i %i\n", 1 + i + beg_row,
1121 if (val == 0.0 && matlab)
1137 else if (
np_dh == 1)
1139 int i, j, k, idx = 1;
1144 for (i = 0; i < sg->
blocks; ++i)
1146 int oldBlock = sg->
n2o_sub[i];
1148 int end_row = beg_row + sg->
row_count[oldBlock];
1150 for (j = beg_row; j < end_row; ++j)
1161 for (k = 0; k <
len; ++k)
1163 fprintf (fp,
"%i %i\n", idx, 1 + sg->
o2n_col[
cval[k]]);
1170 for (k = 0; k <
len; ++k)
1172 double val = aval[k];
1173 if (val == 0.0 && matlab)
1198 for (pe = 0; pe <
np_dh; ++pe)
1214 for (i = 0; i <
m; ++i)
1216 int row = n2o_row[i];
1217 for (j = rp[row]; j < rp[row + 1]; ++j)
1224 if (val == 0.0 && matlab)
1229 if (col >= beg_row && col < beg_row + m)
1231 col = o2n_col[col -
beg_row] + beg_rowP;
1243 "nonlocal column= %i not in hash table",
1255 fprintf (fp,
"%i %i\n", 1 + i + beg_rowP, 1 + col);
1274 #define __FUNC__ "Mat_dhPrintCSR"
1282 SET_V_ERROR (
"only implemented for a single mpi task");
1287 (
"not implemented for reordered matrix (SubdomainGraph_dh should be NULL)");
1310 #define __FUNC__ "Mat_dhPrintBIN"
1316 SET_V_ERROR (
"only implemented for a single MPI task");
1322 SET_V_ERROR (
"not implemented for reordering; ensure sg=NULL");
1326 NULL, NULL, NULL, filename);
1336 #define __FUNC__ "Mat_dhReadCSR"
1345 SET_V_ERROR (
"only implemented for a single MPI task");
1364 #define __FUNC__ "Mat_dhReadTriples"
1373 SET_V_ERROR (
"only implemented for a single MPI task");
1395 #define __FUNC__ "Mat_dhReadBIN"
1403 SET_V_ERROR (
"only implemented for a single MPI task");
1415 #define __FUNC__ "Mat_dhTranspose"
1436 #define __FUNC__ "Mat_dhMakeStructurallySymmetric"
1455 #define __FUNC__ "Mat_dhFixDiags"
1465 for (i = 0; i <
m; ++i)
1468 for (j = rp[i]; j < rp[i + 1]; ++j)
1485 (
"\nMat_dhFixDiags:: %i diags not explicitly present; inserting!\n",
1495 for (i = 0; i <
m; ++i)
1498 for (j = rp[i]; j < rp[i + 1]; ++j)
1500 sum += fabs (aval[j]);
1502 for (j = rp[i]; j < rp[i + 1]; ++j)
1514 #define __FUNC__ "insert_diags_private"
1521 int nz = RP[
m] + ct;
1524 rp = A->
rp = (
int *)
MALLOC_DH ((m + 1) *
sizeof (int));
1528 aval = A->
aval = (
double *)
MALLOC_DH (nz *
sizeof (
double));
1532 for (i = 0; i <
m; ++i)
1535 for (j = RP[i]; j < RP[i + 1]; ++j)
1537 cval[idx] = CVAL[j];
1538 aval[idx] = AVAL[j];
1563 #define __FUNC__ "Mat_dhPrintDiags"
1572 "=================== diagonal elements ====================\n");
1573 for (i = 0; i <
m; ++i)
1576 for (j = rp[i]; j < rp[i + 1]; ++j)
1580 fprintf (fp,
"%i %g\n", i + 1, aval[j]);
1587 fprintf (fp,
"%i ---------- missing\n", i + 1);
1594 #define __FUNC__ "Mat_dhGetRow"
1602 "requested globalRow= %i, which is local row= %i, but only have %i rows!",
1603 globalRow, row, B->
m);
1606 *len = B->
rp[row + 1] - B->
rp[row];
1608 *ind = B->
cval + B->
rp[row];
1610 *val = B->
aval + B->
rp[row];
1614 #define __FUNC__ "Mat_dhRestoreRow"
1621 #define __FUNC__ "Mat_dhRowPermute"
1626 SET_V_ERROR (
"turned off; compilation problem on blue");
1629 int i, j, m = mat->
m, nz = mat->
rp[
m];
1638 * = 1:Compute a row permutation of the matrix so that the
1639 * permuted matrix has
as many entries on its diagonal
as
1640 * possible.The values on the diagonal are of arbitrary size.
1641 * HSL subroutine MC21A / AD is used
for this
1642 . * = 2: Compute a row permutation of the matrix so that the smallest * value on the diagonal of the permuted matrix is maximized. * = 3:Compute a row permutation of the matrix so that the smallest
1643 * value on the diagonal of the permuted matrix is maximized.
1644 * The algorithm differs from the one used
for JOB
1645 = 2 and may * have quite a different performance. * = 4: Compute a row permutation of the matrix so that the sum * of the diagonal entries of the permuted matrix is maximized. * = 5:Compute a row permutation of the matrix so that the product
1646 * of the diagonal entries of the permuted matrix is maximized
1647 * and vectors to scale the matrix so that the nonzero diagonal
1648 * entries of the permuted matrix are one in absolute value and
1649 * all the off - diagonal entries are less than or equal to one in
1658 sprintf (
msgBuf_dh,
"calling row permutation with algo= %i", algo);
1661 r1 = (
double *)
MALLOC_DH (m *
sizeof (
double));
1663 c1 = (
double *)
MALLOC_DH (m *
sizeof (
double));
1683 for (i = 0; i < nz; ++i)
1684 cval[i] = o2n[cval[i]];
1689 fprintf (
logFile,
"\n-------- row permutation vector --------\n");
1690 for (i = 0; i <
m; ++i)
1691 fprintf (
logFile,
"%i ", 1 + o2n[i]);
1696 printf (
"\n-------- row permutation vector --------\n");
1697 for (i = 0; i <
m; ++i)
1698 printf (
"%i ", 1 + o2n[i]);
1705 for (i = 0; i <
m; ++i)
1716 printf (
"@@@ [%i] Mat_dhRowPermute :: got natural ordering!\n",
1721 int *rp = B->
rp, *cval = B->
cval;
1727 (
"@@@ [%i] Mat_dhRowPermute :: scaling matrix rows and columns!\n",
1731 for (i = 0; i <
m; i++)
1733 r1[i] = exp (r1[i]);
1734 c1[i] = exp (c1[i]);
1736 for (i = 0; i <
m; i++)
1737 for (j = rp[i]; j < rp[i + 1]; j++)
1738 aval[j] *= r1[cval[j]] * c1[i];
1760 #define __FUNC__ "Mat_dhPartition"
1765 int *RP = mat->
rp, *CVAL = mat->
cval;
1767 int i, j, *
rp, *
cval, idx = 0;
1769 rp = *rpOUT = (
int *)
MALLOC_DH ((m + 1) *
sizeof (int));
1771 cval = *cvalOUT = (
int *)
MALLOC_DH (nz *
sizeof (
int));
1776 for (i = 0; i <
m; ++i)
1778 for (j = RP[i]; j < RP[i + 1]; ++j)
1792 #define __FUNC__ "Mat_dhPartition"
1795 int **beg_rowOUT,
int **row_countOUT,
int **n2oOUT,
1799 #ifndef HAVE_METIS_DH
1804 int *
beg_row, *row_count, *n2o, *o2n, bk,
new, *part;
1806 int i, cutEdgeCount;
1808 int metisOpts[5] = { 0, 0, 0, 0, 0 };
1812 beg_row = *beg_rowOUT = (
int *)
MALLOC_DH (blocks *
sizeof (
int));
1814 row_count = *row_countOUT = (
int *)
MALLOC_DH (blocks *
sizeof (
int));
1816 *n2oOUT = n2o = (
int *)
MALLOC_DH (m *
sizeof (
int));
1818 *o2nOUT = o2n = (
int *)
MALLOC_DH (m *
sizeof (
int));
1822 == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == = Metis arguments:
1824 n - number of nodes rp[], cval[]NULL, NULL, 0
1826 blocksIN, options[5] = 0::0 / 1 use defauls;
1831 == == == == == == == == == == == == == == == == == == == == == == == == ==
1837 part = (
int *)
MALLOC_DH (m *
sizeof (
int));
1841 METIS_PartGraphKway (&m, rp, cval, NULL, NULL,
1842 &zero, &zero, &blocks, metisOpts, &cutEdgeCount, part);
1851 printf_dh (
"\nmetis partitioning vector; blocks= %i\n", blocks);
1852 for (i = 0; i <
m; ++i)
1857 for (i = 0; i < blocks; ++i)
1859 for (i = 0; i <
m; ++i)
1865 for (i = 1; i < blocks; ++i)
1866 beg_row[i] = beg_row[i - 1] + row_count[i - 1];
1871 for (i = 0; i < blocks; ++i)
1874 for (i = 0; i < blocks; ++i)
1881 int *tmp = (
int *)
MALLOC_DH (blocks *
sizeof (
int));
1883 memcpy (tmp, beg_row, blocks *
sizeof (
int));
1884 for (i = 0; i <
m; ++i)
void io_dh_print_ebin_mat_private(int m, int beg_row, int *rp, int *cval, double *aval, int *n2o, int *o2n, Hash_i_dh hash, char *filename)
void Mat_dhReadBIN(Mat_dh *mat, char *filename)
void insert_diags_private(Mat_dh A, int ct)
void Mat_dhMatVecSetup(Mat_dh mat)
void mat_dh_read_csr_private(int *mOUT, int **rpOUT, int **cvalOUT, double **avalOUT, FILE *fp)
void Mat_dhGetRow(Mat_dh B, int globalRow, int *len, int **ind, double **val)
void Mat_dhPermute(Mat_dh A, int *n2o, Mat_dh *Bout)
void mat_dh_print_csr_private(int m, int *rp, int *cval, double *aval, FILE *fp)
void Mat_dhMatVec_uni(Mat_dh mat, double *x, double *b)
void Mat_dhMatVec_uni_omp(Mat_dh mat, double *x, double *b)
void make_symmetric_private(int m, int **rpIN, int **cvalIN, double **avalIN)
void Mat_dhMatVec_omp(Mat_dh mat, double *x, double *b)
void Mat_dhReadCSR(Mat_dh *mat, char *filename)
void Mat_dhPrintDiags(Mat_dh A, FILE *fp)
#define END_FUNC_VAL(retval)
void Mat_dhReadTriples(Mat_dh *mat, int ignore, char *filename)
static void setup_matvec_sends_private(Mat_dh mat, int *inlist)
void Mat_dhFixDiags(Mat_dh A)
static void setup_matvec_receives_private(Mat_dh mat, int *beg_rows, int *end_rows, int reqlen, int *reqind, int *outlist)
bool Parser_dhHasSwitch(Parser_dh p, char *s)
#define MATVEC_TOTAL_TIME
void Mat_dhPrintGraph(Mat_dh A, SubdomainGraph_dh sg, FILE *fp)
void Numbering_dhSetup(Numbering_dh numb, Mat_dh mat)
int Mat_dhReadNz(Mat_dh mat)
void printf_dh(char *fmt,...)
void Mat_dhPrintBIN(Mat_dh A, SubdomainGraph_dh sg, char *filename)
int mat_find_owner(int *beg_rows, int *end_rows, int index)
void Numbering_dhGlobalToLocal(Numbering_dh numb, int len, int *global, int *local)
void Mat_dhRestoreRow(Mat_dh B, int row, int *len, int **ind, double **val)
bool Parser_dhReadInt(Parser_dh p, char *in, int *out)
void mat_dh_read_triples_private(int ignore, int *mOUT, int **rpOUT, int **cvalOUT, double **avalOUT, FILE *fp)
void Mat_dhPrintCSR(Mat_dh A, SubdomainGraph_dh sg, char *filename)
void Mat_dhMatVec(Mat_dh mat, double *x, double *b)
#define CHECK_MPI_ERROR(errCode)
void Mat_dhRowPermute(Mat_dh mat)
double time_min[MAT_DH_BINS]
void Mat_dhMatVecSetdown(Mat_dh mat)
void Mat_dhCreate(Mat_dh *mat)
void io_dh_read_ebin_mat_private(int *m, int **rp, int **cval, double **aval, char *filename)
TypeTo as(const TypeFrom &t)
void Mat_dhPrintTriples(Mat_dh A, SubdomainGraph_dh sg, char *filename)
void mat_dh_transpose_private(int m, int *RP, int **rpOUT, int *CVAL, int **cvalOUT, double *AVAL, double **avalOUT)
void mat_dh_transpose_reuse_private(int m, int *rpIN, int *cvalIN, double *avalIN, int *rpOUT, int *cvalOUT, double *avalOUT)
void dldperm(int job, int n, int nnz, int colptr[], int adjncy[], double nzval[], int *perm, double u[], double v[])
void Mat_dhReduceTiming(Mat_dh mat)
void Mat_dhPartition(Mat_dh mat, int blocks, int **beg_rowOUT, int **row_countOUT, int **n2oOUT, int **o2nOUT)
void Mat_dhTranspose(Mat_dh A, Mat_dh *Bout)
double time_max[MAT_DH_BINS]
void Numbering_dhCreate(Numbering_dh *numb)
void Mat_dhZeroTiming(Mat_dh mat)
void mat_dh_print_graph_private(int m, int beg_row, int *rp, int *cval, double *aval, int *n2o, int *o2n, Hash_i_dh hash, FILE *fp)
void closeFile_dh(FILE *fpIN)
void Numbering_dhDestroy(Numbering_dh numb)
void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp)
void build_adj_lists_private(Mat_dh mat, int **rpOUT, int **cvalOUT)
int Hash_i_dhLookup(Hash_i_dh h, int key)
void Mat_dhMakeStructurallySymmetric(Mat_dh A)
FILE * openFile_dh(const char *filenameIN, const char *modeIN)
char msgBuf_dh[MSG_BUF_SIZE_DH]
void Mat_dhDestroy(Mat_dh mat)
#define CHECK_MPI_V_ERROR(errCode)