IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
mat_dh_private.c
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "mat_dh_private.h"
44 #include "Parser_dh.h"
45 #include "Hash_i_dh.h"
46 #include "Mat_dh.h"
47 #include "Mem_dh.h"
48 #include "Vec_dh.h"
49 
50 #define IS_UPPER_TRI 97
51 #define IS_LOWER_TRI 98
52 #define IS_FULL 99
53 static int isTriangular (int m, int *rp, int *cval);
54 
55 /* Instantiates Aout; allocates storage for rp, cval, and aval arrays;
56  uses rowLengths[] and rowToBlock[] data to fill in rp[].
57 */
58 static void mat_par_read_allocate_private (Mat_dh * Aout, int n,
59  int *rowLengths, int *rowToBlock);
60 
61 /* Currently, divides (partitions)matrix by contiguous sections of rows.
62  For future expansion: use metis.
63 */
64 void mat_partition_private (Mat_dh A, int blocks, int *o2n_row,
65  int *rowToBlock);
66 
67 
68 static void convert_triples_to_scr_private (int m, int nz,
69  int *I, int *J, double *A,
70  int *rp, int *cval, double *aval);
71 
72 #if 0
73 #undef __FUNC__
74 #define __FUNC__ "mat_dh_print_graph_private"
75 void
76 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval,
77  double *aval, int *n2o, int *o2n, Hash_i_dh hash,
78  FILE * fp)
79 {
80  START_FUNC_DH int i, j, row, col;
81  double val;
82  bool private_n2o = false;
83  bool private_hash = false;
84 
85  if (n2o == NULL)
86  {
87  private_n2o = true;
88  create_nat_ordering_private (m, &n2o);
89  CHECK_V_ERROR;
90  create_nat_ordering_private (m, &o2n);
91  CHECK_V_ERROR;
92  }
93 
94  if (hash == NULL)
95  {
96  private_hash = true;
97  Hash_i_dhCreate (&hash, -1);
98  CHECK_V_ERROR;
99  }
100 
101  for (i = 0; i < m; ++i)
102  {
103  row = n2o[i];
104  for (j = rp[row]; j < rp[row + 1]; ++j)
105  {
106  col = cval[j];
107  if (col < beg_row || col >= beg_row + m)
108  {
109  int tmp = col;
110 
111  /* nonlocal column: get permutation from hash table */
112  tmp = Hash_i_dhLookup (hash, col);
113  CHECK_V_ERROR;
114  if (tmp == -1)
115  {
116  sprintf (msgBuf_dh,
117  "beg_row= %i m= %i; nonlocal column= %i not in hash table",
118  beg_row, m, col);
119  SET_V_ERROR (msgBuf_dh);
120  }
121  else
122  {
123  col = tmp;
124  }
125  }
126  else
127  {
128  col = o2n[col];
129  }
130 
131  if (aval == NULL)
132  {
133  val = _MATLAB_ZERO_;
134  }
135  else
136  {
137  val = aval[j];
138  }
139  fprintf (fp, "%i %i %g\n", 1 + row + beg_row, 1 + col, val);
140  }
141  }
142 
143  if (private_n2o)
144  {
145  destroy_nat_ordering_private (n2o);
146  CHECK_V_ERROR;
147  destroy_nat_ordering_private (o2n);
148  CHECK_V_ERROR;
149  }
150 
151  if (private_hash)
152  {
153  Hash_i_dhDestroy (hash);
154  CHECK_V_ERROR;
155  }
156 END_FUNC_DH}
157 
158 #endif
159 
160 
161 /* currently only for unpermuted */
162 #undef __FUNC__
163 #define __FUNC__ "mat_dh_print_graph_private"
164 void
165 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval,
166  double *aval, int *n2o, int *o2n, Hash_i_dh hash,
167  FILE * fp)
168 {
169  START_FUNC_DH int i, j, row, col;
170  bool private_n2o = false;
171  bool private_hash = false;
172  int *work = NULL;
173 
174  work = (int *) MALLOC_DH (m * sizeof (int));
175  CHECK_V_ERROR;
176 
177  if (n2o == NULL)
178  {
179  private_n2o = true;
180  create_nat_ordering_private (m, &n2o);
181  CHECK_V_ERROR;
182  create_nat_ordering_private (m, &o2n);
183  CHECK_V_ERROR;
184  }
185 
186  if (hash == NULL)
187  {
188  private_hash = true;
189  Hash_i_dhCreate (&hash, -1);
190  CHECK_V_ERROR;
191  }
192 
193  for (i = 0; i < m; ++i)
194  {
195  for (j = 0; j < m; ++j)
196  work[j] = 0;
197  row = n2o[i];
198  for (j = rp[row]; j < rp[row + 1]; ++j)
199  {
200  col = cval[j];
201 
202  /* local column */
203  if (col >= beg_row || col < beg_row + m)
204  {
205  col = o2n[col];
206  }
207 
208  /* nonlocal column: get permutation from hash table */
209  else
210  {
211  int tmp = col;
212 
213  tmp = Hash_i_dhLookup (hash, col);
214  CHECK_V_ERROR;
215  if (tmp == -1)
216  {
217  sprintf (msgBuf_dh,
218  "beg_row= %i m= %i; nonlocal column= %i not in hash table",
219  beg_row, m, col);
220  SET_V_ERROR (msgBuf_dh);
221  }
222  else
223  {
224  col = tmp;
225  }
226  }
227 
228  work[col] = 1;
229  }
230 
231  for (j = 0; j < m; ++j)
232  {
233  if (work[j])
234  {
235  fprintf (fp, " x ");
236  }
237  else
238  {
239  fprintf (fp, " ");
240  }
241  }
242  fprintf (fp, "\n");
243  }
244 
245  if (private_n2o)
246  {
247  destroy_nat_ordering_private (n2o);
248  CHECK_V_ERROR;
249  destroy_nat_ordering_private (o2n);
250  CHECK_V_ERROR;
251  }
252 
253  if (private_hash)
254  {
255  Hash_i_dhDestroy (hash);
256  CHECK_V_ERROR;
257  }
258 
259  if (work != NULL)
260  {
261  FREE_DH (work);
262  CHECK_V_ERROR;
263  }
264 END_FUNC_DH}
265 
266 #undef __FUNC__
267 #define __FUNC__ "create_nat_ordering_private"
268 void
269 create_nat_ordering_private (int m, int **p)
270 {
271  START_FUNC_DH int *tmp, i;
272 
273  tmp = *p = (int *) MALLOC_DH (m * sizeof (int));
274  CHECK_V_ERROR;
275  for (i = 0; i < m; ++i)
276  {
277  tmp[i] = i;
278  }
279 END_FUNC_DH}
280 
281 #undef __FUNC__
282 #define __FUNC__ "destroy_nat_ordering_private"
283 void
284 destroy_nat_ordering_private (int *p)
285 {
286  START_FUNC_DH FREE_DH (p);
287  CHECK_V_ERROR;
288 END_FUNC_DH}
289 
290 
291 #undef __FUNC__
292 #define __FUNC__ "invert_perm"
293 void
294 invert_perm (int m, int *pIN, int *pOUT)
295 {
296  START_FUNC_DH int i;
297 
298  for (i = 0; i < m; ++i)
299  pOUT[pIN[i]] = i;
300 END_FUNC_DH}
301 
302 
303 
304 /* only implemented for a single cpu! */
305 #undef __FUNC__
306 #define __FUNC__ "mat_dh_print_csr_private"
307 void
308 mat_dh_print_csr_private (int m, int *rp, int *cval, double *aval, FILE * fp)
309 {
310  START_FUNC_DH int i, nz = rp[m];
311 
312  /* print header line */
313  fprintf (fp, "%i %i\n", m, rp[m]);
314 
315  /* print rp[] */
316  for (i = 0; i <= m; ++i)
317  fprintf (fp, "%i ", rp[i]);
318  fprintf (fp, "\n");
319 
320  /* print cval[] */
321  for (i = 0; i < nz; ++i)
322  fprintf (fp, "%i ", cval[i]);
323  fprintf (fp, "\n");
324 
325  /* print aval[] */
326  for (i = 0; i < nz; ++i)
327  fprintf (fp, "%1.19e ", aval[i]);
328  fprintf (fp, "\n");
329 
330 END_FUNC_DH}
331 
332 
333 /* only implemented for a single cpu! */
334 #undef __FUNC__
335 #define __FUNC__ "mat_dh_read_csr_private"
336 void
337 mat_dh_read_csr_private (int *mOUT, int **rpOUT, int **cvalOUT,
338  double **avalOUT, FILE * fp)
339 {
340  START_FUNC_DH int i, m, nz, items;
341  int *rp, *cval;
342  double *aval;
343 
344  /* read header line */
345  items = fscanf (fp, "%d %d", &m, &nz);
346  if (items != 2)
347  {
348  SET_V_ERROR ("failed to read header");
349  }
350  else
351  {
352  printf ("mat_dh_read_csr_private:: m= %i nz= %i\n", m, nz);
353  }
354 
355  *mOUT = m;
356  rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
357  CHECK_V_ERROR;
358  cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
359  CHECK_V_ERROR;
360  aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
361  CHECK_V_ERROR;
362 
363  /* read rp[] block */
364  for (i = 0; i <= m; ++i)
365  {
366  items = fscanf (fp, "%d", &(rp[i]));
367  if (items != 1)
368  {
369  sprintf (msgBuf_dh, "failed item %i of %i in rp block", i, m + 1);
370  SET_V_ERROR (msgBuf_dh);
371  }
372  }
373 
374  /* read cval[] block */
375  for (i = 0; i < nz; ++i)
376  {
377  items = fscanf (fp, "%d", &(cval[i]));
378  if (items != 1)
379  {
380  sprintf (msgBuf_dh, "failed item %i of %i in cval block", i, m + 1);
381  SET_V_ERROR (msgBuf_dh);
382  }
383  }
384 
385  /* read aval[] block */
386  for (i = 0; i < nz; ++i)
387  {
388  items = fscanf (fp, "%lg", &(aval[i]));
389  if (items != 1)
390  {
391  sprintf (msgBuf_dh, "failed item %i of %i in aval block", i, m + 1);
392  SET_V_ERROR (msgBuf_dh);
393  }
394  }
395 END_FUNC_DH}
396 
397 /*============================================*/
398 #define MAX_JUNK 200
399 
400 #undef __FUNC__
401 #define __FUNC__ "mat_dh_read_triples_private"
402 void
403 mat_dh_read_triples_private (int ignore, int *mOUT, int **rpOUT,
404  int **cvalOUT, double **avalOUT, FILE * fp)
405 {
406  START_FUNC_DH int m, n, nz, items, i, j;
407  int idx = 0;
408  int *cval, *rp, *I, *J;
409  double *aval, *A, v;
410  char junk[MAX_JUNK];
411  fpos_t fpos;
412 
413  /* skip over header */
414  if (ignore && myid_dh == 0)
415  {
416  printf
417  ("mat_dh_read_triples_private:: ignoring following header lines:\n");
418  printf
419  ("--------------------------------------------------------------\n");
420  for (i = 0; i < ignore; ++i)
421  {
422  fgets (junk, MAX_JUNK, fp);
423  printf ("%s", junk);
424  }
425  printf
426  ("--------------------------------------------------------------\n");
427  if (fgetpos (fp, &fpos))
428  SET_V_ERROR ("fgetpos failed!");
429  printf ("\nmat_dh_read_triples_private::1st two non-ignored lines:\n");
430  printf
431  ("--------------------------------------------------------------\n");
432  for (i = 0; i < 2; ++i)
433  {
434  fgets (junk, MAX_JUNK, fp);
435  printf ("%s", junk);
436  }
437  printf
438  ("--------------------------------------------------------------\n");
439  if (fsetpos (fp, &fpos))
440  SET_V_ERROR ("fsetpos failed!");
441  }
442 
443 
444  if (feof (fp))
445  printf ("trouble!");
446 
447  /* determine matrix dimensions */
448  m = n = nz = 0;
449  while (!feof (fp))
450  {
451  items = fscanf (fp, "%d %d %lg", &i, &j, &v);
452  if (items != 3)
453  {
454  break;
455  }
456  ++nz;
457  if (i > m)
458  m = i;
459  if (j > n)
460  n = j;
461  }
462 
463  if (myid_dh == 0)
464  {
465  printf ("mat_dh_read_triples_private: m= %i nz= %i\n", m, nz);
466  }
467 
468 
469  /* reset file, and skip over header again */
470  rewind (fp);
471  for (i = 0; i < ignore; ++i)
472  {
473  fgets (junk, MAX_JUNK, fp);
474  }
475 
476  /* error check for squareness */
477  if (m != n)
478  {
479  sprintf (msgBuf_dh, "matrix is not square; row= %i, cols= %i", m, n);
480  SET_V_ERROR (msgBuf_dh);
481  }
482 
483  *mOUT = m;
484 
485  /* allocate storage */
486  rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
487  CHECK_V_ERROR;
488  cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
489  CHECK_V_ERROR;
490  aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
491  CHECK_V_ERROR;
492 
493  I = (int *) MALLOC_DH (nz * sizeof (int));
494  CHECK_V_ERROR;
495  J = (int *) MALLOC_DH (nz * sizeof (int));
496  CHECK_V_ERROR;
497  A = (double *) MALLOC_DH (nz * sizeof (double));
498  CHECK_V_ERROR;
499 
500  /* read <row, col, value> triples into arrays */
501  while (!feof (fp))
502  {
503  items = fscanf (fp, "%d %d %lg", &i, &j, &v);
504  if (items < 3)
505  break;
506  j--;
507  i--;
508  I[idx] = i;
509  J[idx] = j;
510  A[idx] = v;
511  ++idx;
512  }
513 
514  /* convert from triples to sparse-compressed-row storage */
515  convert_triples_to_scr_private (m, nz, I, J, A, rp, cval, aval);
516  CHECK_V_ERROR;
517 
518  /* if matrix is triangular */
519  {
520  int type;
521  type = isTriangular (m, rp, cval);
522  CHECK_V_ERROR;
523  if (type == IS_UPPER_TRI)
524  {
525  printf ("CAUTION: matrix is upper triangular; converting to full\n");
526  }
527  else if (type == IS_LOWER_TRI)
528  {
529  printf ("CAUTION: matrix is lower triangular; converting to full\n");
530  }
531 
532  if (type == IS_UPPER_TRI || type == IS_LOWER_TRI)
533  {
534  make_full_private (m, &rp, &cval, &aval);
535  CHECK_V_ERROR;
536  }
537  }
538 
539  *rpOUT = rp;
540  *cvalOUT = cval;
541  *avalOUT = aval;
542 
543  FREE_DH (I);
544  CHECK_V_ERROR;
545  FREE_DH (J);
546  CHECK_V_ERROR;
547  FREE_DH (A);
548  CHECK_V_ERROR;
549 
550 END_FUNC_DH}
551 
552 #undef __FUNC__
553 #define __FUNC__ "convert_triples_to_scr_private"
554 void
555 convert_triples_to_scr_private (int m, int nz, int *I, int *J, double *A,
556  int *rp, int *cval, double *aval)
557 {
558  START_FUNC_DH int i;
559  int *rowCounts;
560 
561  rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
562  CHECK_V_ERROR;
563  for (i = 0; i < m; ++i)
564  rowCounts[i] = 0;
565 
566  /* count number of entries in each row */
567  for (i = 0; i < nz; ++i)
568  {
569  int row = I[i];
570  rowCounts[row] += 1;
571  }
572 
573  /* prefix-sum to form rp[] */
574  rp[0] = 0;
575  for (i = 1; i <= m; ++i)
576  {
577  rp[i] = rp[i - 1] + rowCounts[i - 1];
578  }
579  memcpy (rowCounts, rp, (m + 1) * sizeof (int));
580 
581  /* write SCR arrays */
582  for (i = 0; i < nz; ++i)
583  {
584  int row = I[i];
585  int col = J[i];
586  double val = A[i];
587  int idx = rowCounts[row];
588  rowCounts[row] += 1;
589 
590  cval[idx] = col;
591  aval[idx] = val;
592  }
593 
594 
595  FREE_DH (rowCounts);
596  CHECK_V_ERROR;
597 END_FUNC_DH}
598 
599 
600 /*======================================================================
601  * utilities for use in drivers that read, write, convert, and/or
602  * compare different file types
603  *======================================================================*/
604 
605 void fix_diags_private (Mat_dh A);
606 void insert_missing_diags_private (Mat_dh A);
607 
608 #undef __FUNC__
609 #define __FUNC__ "readMat"
610 void
611 readMat (Mat_dh * Aout, char *ft, char *fn, int ignore)
612 {
613  START_FUNC_DH bool makeStructurallySymmetric;
614  bool fixDiags;
615  *Aout = NULL;
616 
617  makeStructurallySymmetric =
618  Parser_dhHasSwitch (parser_dh, "-makeSymmetric");
619  fixDiags = Parser_dhHasSwitch (parser_dh, "-fixDiags");
620 
621  if (fn == NULL)
622  {
623  SET_V_ERROR ("passed NULL filename; can't open for reading!");
624  }
625 
626  if (!strcmp (ft, "csr"))
627  {
628  Mat_dhReadCSR (Aout, fn);
629  CHECK_V_ERROR;
630  }
631 
632  else if (!strcmp (ft, "trip"))
633  {
634  Mat_dhReadTriples (Aout, ignore, fn);
635  CHECK_V_ERROR;
636  }
637 
638  else if (!strcmp (ft, "ebin"))
639  {
640  Mat_dhReadBIN (Aout, fn);
641  CHECK_V_ERROR;
642  }
643 
644  else if (!strcmp (ft, "petsc"))
645  {
646  sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
647  SET_V_ERROR (msgBuf_dh);
648  }
649 
650  else
651  {
652  sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft);
653  SET_V_ERROR (msgBuf_dh);
654  }
655 
656  if (makeStructurallySymmetric)
657  {
658  printf ("\npadding with zeros to make structurally symmetric\n");
659  Mat_dhMakeStructurallySymmetric (*Aout);
660  CHECK_V_ERROR;
661  }
662 
663  if ((*Aout)->m == 0)
664  {
665  SET_V_ERROR ("row count = 0; something's wrong!");
666  }
667 
668  if (fixDiags)
669  {
670  fix_diags_private (*Aout);
671  CHECK_V_ERROR;
672  }
673 
674 END_FUNC_DH}
675 
676 
677 #undef __FUNC__
678 #define __FUNC__ "fix_diags_private"
679 void
680 fix_diags_private (Mat_dh A)
681 {
682  START_FUNC_DH int i, j, m = A->m, *rp = A->rp, *cval = A->cval;
683  double *aval = A->aval;
684  bool insertDiags = false;
685 
686  /* verify that all diagonals are present */
687  for (i = 0; i < m; ++i)
688  {
689  bool isMissing = true;
690  for (j = rp[i]; j < rp[i + 1]; ++j)
691  {
692  if (cval[j] == i)
693  {
694  isMissing = false;
695  break;
696  }
697  }
698  if (isMissing)
699  {
700  insertDiags = true;
701  break;
702  }
703  }
704 
705  if (insertDiags)
706  {
707  insert_missing_diags_private (A);
708  CHECK_V_ERROR;
709  rp = A->rp;
710  cval = A->cval;
711  aval = A->aval;
712  }
713 
714  /* set value of all diags to largest absolute value in each row */
715  for (i = 0; i < m; ++i)
716  {
717  double sum = 0;
718  for (j = rp[i]; j < rp[i + 1]; ++j)
719  {
720  sum = MAX (sum, fabs (aval[j]));
721  }
722  for (j = rp[i]; j < rp[i + 1]; ++j)
723  {
724  if (cval[j] == i)
725  {
726  aval[j] = sum;
727  break;
728  }
729  }
730  }
731 
732 END_FUNC_DH}
733 
734 #undef __FUNC__
735 #define __FUNC__ "insert_missing_diags_private"
736 void
737 insert_missing_diags_private (Mat_dh A)
738 {
739  START_FUNC_DH int *RP = A->rp, *CVAL = A->cval, m = A->m;
740  int *rp, *cval;
741  double *AVAL = A->aval, *aval;
742  int i, j, nz = RP[m] + m;
743  int idx = 0;
744 
745  rp = A->rp = (int *) MALLOC_DH ((1 + m) * sizeof (int));
746  CHECK_V_ERROR;
747  cval = A->cval = (int *) MALLOC_DH (nz * sizeof (int));
748  CHECK_V_ERROR;
749  aval = A->aval = (double *) MALLOC_DH (nz * sizeof (double));
750  CHECK_V_ERROR;
751  rp[0] = 0;
752 
753  for (i = 0; i < m; ++i)
754  {
755  bool isMissing = true;
756  for (j = RP[i]; j < RP[i + 1]; ++j)
757  {
758  cval[idx] = CVAL[j];
759  aval[idx] = AVAL[j];
760  ++idx;
761  if (CVAL[j] == i)
762  isMissing = false;
763  }
764  if (isMissing)
765  {
766  cval[idx] = i;
767  aval[idx] = 0.0;
768  ++idx;
769  }
770  rp[i + 1] = idx;
771  }
772 
773  FREE_DH (RP);
774  CHECK_V_ERROR;
775  FREE_DH (CVAL);
776  CHECK_V_ERROR;
777  FREE_DH (AVAL);
778  CHECK_V_ERROR;
779 END_FUNC_DH}
780 
781 #undef __FUNC__
782 #define __FUNC__ "readVec"
783 void
784 readVec (Vec_dh * bout, char *ft, char *fn, int ignore)
785 {
786  START_FUNC_DH * bout = NULL;
787 
788  if (fn == NULL)
789  {
790  SET_V_ERROR ("passed NULL filename; can't open for reading!");
791  }
792 
793  if (!strcmp (ft, "csr") || !strcmp (ft, "trip"))
794  {
795  Vec_dhRead (bout, ignore, fn);
796  CHECK_V_ERROR;
797  }
798 
799  else if (!strcmp (ft, "ebin"))
800  {
801  Vec_dhReadBIN (bout, fn);
802  CHECK_V_ERROR;
803  }
804 
805  else if (!strcmp (ft, "petsc"))
806  {
807  sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
808  SET_V_ERROR (msgBuf_dh);
809  }
810 
811  else
812  {
813  sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft);
814  SET_V_ERROR (msgBuf_dh);
815  }
816 
817 END_FUNC_DH}
818 
819 
820 #undef __FUNC__
821 #define __FUNC__ "writeMat"
822 void
823 writeMat (Mat_dh Ain, char *ft, char *fn)
824 {
825  START_FUNC_DH if (fn == NULL)
826  {
827  SET_V_ERROR ("passed NULL filename; can't open for writing!");
828  }
829 
830  if (!strcmp (ft, "csr"))
831  {
832  Mat_dhPrintCSR (Ain, NULL, fn);
833  CHECK_V_ERROR;
834  }
835 
836  else if (!strcmp (ft, "trip"))
837  {
838  Mat_dhPrintTriples (Ain, NULL, fn);
839  CHECK_V_ERROR;
840  }
841 
842  else if (!strcmp (ft, "ebin"))
843  {
844  Mat_dhPrintBIN (Ain, NULL, fn);
845  CHECK_V_ERROR;
846  }
847 
848 
849  else if (!strcmp (ft, "petsc"))
850  {
851  sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
852  SET_V_ERROR (msgBuf_dh);
853  }
854 
855  else
856  {
857  sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft);
858  SET_V_ERROR (msgBuf_dh);
859  }
860 
861 END_FUNC_DH}
862 
863 #undef __FUNC__
864 #define __FUNC__ "writeVec"
865 void
866 writeVec (Vec_dh bin, char *ft, char *fn)
867 {
868  START_FUNC_DH if (fn == NULL)
869  {
870  SET_V_ERROR ("passed NULL filename; can't open for writing!");
871  }
872 
873  if (!strcmp (ft, "csr") || !strcmp (ft, "trip"))
874  {
875  Vec_dhPrint (bin, NULL, fn);
876  CHECK_V_ERROR;
877  }
878 
879  else if (!strcmp (ft, "ebin"))
880  {
881  Vec_dhPrintBIN (bin, NULL, fn);
882  CHECK_V_ERROR;
883  }
884 
885  else if (!strcmp (ft, "petsc"))
886  {
887  sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!");
888  SET_V_ERROR (msgBuf_dh);
889  }
890 
891  else
892  {
893  sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft);
894  SET_V_ERROR (msgBuf_dh);
895  }
896 
897 END_FUNC_DH}
898 
899 #undef __FUNC__
900 #define __FUNC__ "isTriangular"
901 int
902 isTriangular (int m, int *rp, int *cval)
903 {
904  START_FUNC_DH int row, j;
905  int type;
906  bool type_lower = false, type_upper = false;
907 
908  if (np_dh > 1)
909  {
910  SET_ERROR (-1, "only implemented for a single cpu");
911  }
912 
913  for (row = 0; row < m; ++row)
914  {
915  for (j = rp[row]; j < rp[row + 1]; ++j)
916  {
917  int col = cval[j];
918  if (col < row)
919  type_lower = true;
920  if (col > row)
921  type_upper = true;
922  }
923  if (type_lower && type_upper)
924  break;
925  }
926 
927  if (type_lower && type_upper)
928  {
929  type = IS_FULL;
930  }
931  else if (type_lower)
932  {
933  type = IS_LOWER_TRI;
934  }
935  else
936  {
937  type = IS_UPPER_TRI;
938  }
939 END_FUNC_VAL (type)}
940 
941 /*-----------------------------------------------------------------------------------*/
942 
943 static void mat_dh_transpose_reuse_private_private (bool allocateMem, int m,
944  int *rpIN, int *cvalIN,
945  double *avalIN,
946  int **rpOUT,
947  int **cvalOUT,
948  double **avalOUT);
949 
950 
951 #undef __FUNC__
952 #define __FUNC__ "mat_dh_transpose_reuse_private"
953 void
954 mat_dh_transpose_reuse_private (int m,
955  int *rpIN, int *cvalIN, double *avalIN,
956  int *rpOUT, int *cvalOUT, double *avalOUT)
957 {
958  START_FUNC_DH
959  mat_dh_transpose_reuse_private_private (false, m, rpIN, cvalIN, avalIN,
960  &rpOUT, &cvalOUT, &avalOUT);
961  CHECK_V_ERROR;
962 END_FUNC_DH}
963 
964 
965 #undef __FUNC__
966 #define __FUNC__ "mat_dh_transpose_private"
967 void
968 mat_dh_transpose_private (int m, int *RP, int **rpOUT,
969  int *CVAL, int **cvalOUT,
970  double *AVAL, double **avalOUT)
971 {
972  START_FUNC_DH
973  mat_dh_transpose_reuse_private_private (true, m, RP, CVAL, AVAL,
974  rpOUT, cvalOUT, avalOUT);
975  CHECK_V_ERROR;
976 END_FUNC_DH}
977 
978 #undef __FUNC__
979 #define __FUNC__ "mat_dh_transpose_private_private"
980 void
981 mat_dh_transpose_reuse_private_private (bool allocateMem, int m,
982  int *RP, int *CVAL, double *AVAL,
983  int **rpOUT, int **cvalOUT,
984  double **avalOUT)
985 {
986  START_FUNC_DH int *rp, *cval, *tmp;
987  int i, j, nz = RP[m];
988  double *aval;
989 
990  if (allocateMem)
991  {
992  rp = *rpOUT = (int *) MALLOC_DH ((1 + m) * sizeof (int));
993  CHECK_V_ERROR;
994  cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
995  CHECK_V_ERROR;
996  if (avalOUT != NULL)
997  {
998  aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double));
999  CHECK_V_ERROR;
1000  }
1001  }
1002  else
1003  {
1004  rp = *rpOUT;
1005  cval = *cvalOUT;
1006  if (avalOUT != NULL)
1007  aval = *avalOUT;
1008  }
1009 
1010 
1011  tmp = (int *) MALLOC_DH ((1 + m) * sizeof (int));
1012  CHECK_V_ERROR;
1013  for (i = 0; i <= m; ++i)
1014  tmp[i] = 0;
1015 
1016  for (i = 0; i < m; ++i)
1017  {
1018  for (j = RP[i]; j < RP[i + 1]; ++j)
1019  {
1020  int col = CVAL[j];
1021  tmp[col + 1] += 1;
1022  }
1023  }
1024  for (i = 1; i <= m; ++i)
1025  tmp[i] += tmp[i - 1];
1026  memcpy (rp, tmp, (m + 1) * sizeof (int));
1027 
1028  if (avalOUT != NULL)
1029  {
1030  for (i = 0; i < m; ++i)
1031  {
1032  for (j = RP[i]; j < RP[i + 1]; ++j)
1033  {
1034  int col = CVAL[j];
1035  int idx = tmp[col];
1036  cval[idx] = i;
1037  aval[idx] = AVAL[j];
1038  tmp[col] += 1;
1039  }
1040  }
1041  }
1042 
1043  else
1044  {
1045  for (i = 0; i < m; ++i)
1046  {
1047  for (j = RP[i]; j < RP[i + 1]; ++j)
1048  {
1049  int col = CVAL[j];
1050  int idx = tmp[col];
1051  cval[idx] = i;
1052  tmp[col] += 1;
1053  }
1054  }
1055  }
1056 
1057  FREE_DH (tmp);
1058  CHECK_V_ERROR;
1059 END_FUNC_DH}
1060 
1061 /*-----------------------------------------------------------------------------------*/
1062 
1063 #undef __FUNC__
1064 #define __FUNC__ "mat_find_owner"
1065 int
1066 mat_find_owner (int *beg_rows, int *end_rows, int index)
1067 {
1068  START_FUNC_DH int pe, owner = -1;
1069 
1070  for (pe = 0; pe < np_dh; ++pe)
1071  {
1072  if (index >= beg_rows[pe] && index < end_rows[pe])
1073  {
1074  owner = pe;
1075  break;
1076  }
1077  }
1078 
1079  if (owner == -1)
1080  {
1081  sprintf (msgBuf_dh, "failed to find owner for index= %i", index);
1082  SET_ERROR (-1, msgBuf_dh);
1083  }
1084 
1085 END_FUNC_VAL (owner)}
1086 
1087 
1088 #define AVAL_TAG 2
1089 #define CVAL_TAG 3
1090 void partition_and_distribute_private (Mat_dh A, Mat_dh * Bout);
1091 void partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout);
1092 
1093 #undef __FUNC__
1094 #define __FUNC__ "readMat_par"
1095 void
1096 readMat_par (Mat_dh * Aout, char *fileType, char *fileName, int ignore)
1097 {
1098  START_FUNC_DH Mat_dh A = NULL;
1099 
1100  if (myid_dh == 0)
1101  {
1102  int tmp = np_dh;
1103  np_dh = 1;
1104  readMat (&A, fileType, fileName, ignore);
1105  CHECK_V_ERROR;
1106  np_dh = tmp;
1107  }
1108 
1109  if (np_dh == 1)
1110  {
1111  *Aout = A;
1112  }
1113  else
1114  {
1115  if (Parser_dhHasSwitch (parser_dh, "-metis"))
1116  {
1117  partition_and_distribute_metis_private (A, Aout);
1118  CHECK_V_ERROR;
1119  }
1120  else
1121  {
1122  partition_and_distribute_private (A, Aout);
1123  CHECK_V_ERROR;
1124  }
1125  }
1126 
1127  if (np_dh > 1 && A != NULL)
1128  {
1129  Mat_dhDestroy (A);
1130  CHECK_V_ERROR;
1131  }
1132 
1133 
1134  if (Parser_dhHasSwitch (parser_dh, "-printMAT"))
1135  {
1136  char xname[] = "A", *name = xname;
1137  Parser_dhReadString (parser_dh, "-printMat", &name);
1138  Mat_dhPrintTriples (*Aout, NULL, name);
1139  CHECK_V_ERROR;
1140  printf_dh ("\n@@@ readMat_par: printed mat to %s\n\n", xname);
1141  }
1142 
1143 
1144 END_FUNC_DH}
1145 
1146 /* this is bad code! */
1147 #undef __FUNC__
1148 #define __FUNC__ "partition_and_distribute_metis_private"
1149 void
1150 partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout)
1151 {
1152  START_FUNC_DH Mat_dh B = NULL;
1153  Mat_dh C = NULL;
1154  int i, m;
1155  int *rowLengths = NULL;
1156  int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
1157  int *beg_row = NULL, *row_count = NULL;
1158  MPI_Request *send_req = NULL;
1159  MPI_Request *rcv_req = NULL;
1160  MPI_Status *send_status = NULL;
1161  MPI_Status *rcv_status = NULL;
1162 
1163  MPI_Barrier (comm_dh);
1164  printf_dh ("@@@ partitioning with metis\n");
1165 
1166  /* broadcast number of rows to all processors */
1167  if (myid_dh == 0)
1168  m = A->m;
1169  MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
1170 
1171  /* broadcast number of nonzeros in each row to all processors */
1172  rowLengths = (int *) MALLOC_DH (m * sizeof (int));
1173  CHECK_V_ERROR;
1174  rowToBlock = (int *) MALLOC_DH (m * sizeof (int));
1175  CHECK_V_ERROR;
1176 
1177  if (myid_dh == 0)
1178  {
1179  int *tmp = A->rp;
1180  for (i = 0; i < m; ++i)
1181  {
1182  rowLengths[i] = tmp[i + 1] - tmp[i];
1183  }
1184  }
1185  MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
1186 
1187  /* partition matrix */
1188  if (myid_dh == 0)
1189  {
1190  int idx = 0;
1191  int j;
1192 
1193  /* partition and permute matrix */
1194  Mat_dhPartition (A, np_dh, &beg_row, &row_count, &n2o_col, &o2n_row);
1195  ERRCHKA;
1196  Mat_dhPermute (A, n2o_col, &C);
1197  ERRCHKA;
1198 
1199  /* form rowToBlock array */
1200  for (i = 0; i < np_dh; ++i)
1201  {
1202  for (j = beg_row[i]; j < beg_row[i] + row_count[i]; ++j)
1203  {
1204  rowToBlock[idx++] = i;
1205  }
1206  }
1207  }
1208 
1209  /* broadcast partitiioning information to all processors */
1210  MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
1211 
1212  /* allocate storage for local portion of matrix */
1213  mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
1214  CHECK_V_ERROR;
1215 
1216  /* root sends each processor its portion of the matrix */
1217  if (myid_dh == 0)
1218  {
1219  int *cval = C->cval, *rp = C->rp;
1220  double *aval = C->aval;
1221  send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
1222  CHECK_V_ERROR;
1223  send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
1224  CHECK_V_ERROR;
1225  for (i = 0; i < m; ++i)
1226  {
1227  int owner = rowToBlock[i];
1228  int count = rp[i + 1] - rp[i];
1229 
1230  /* error check for empty row */
1231  if (!count)
1232  {
1233  sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m);
1234  SET_V_ERROR (msgBuf_dh);
1235  }
1236 
1237  MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
1238  send_req + 2 * i);
1239  MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
1240  comm_dh, send_req + 2 * i + 1);
1241  }
1242  }
1243 
1244  /* all processors receive their local rows */
1245  {
1246  int *cval = B->cval;
1247  int *rp = B->rp;
1248  double *aval = B->aval;
1249  m = B->m;
1250 
1251  rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
1252  CHECK_V_ERROR;
1253  rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
1254  CHECK_V_ERROR;
1255 
1256  for (i = 0; i < m; ++i)
1257  {
1258 
1259  /* error check for empty row */
1260  int count = rp[i + 1] - rp[i];
1261  if (!count)
1262  {
1263  sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m);
1264  SET_V_ERROR (msgBuf_dh);
1265  }
1266 
1267  MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
1268  rcv_req + 2 * i);
1269  MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
1270  rcv_req + 2 * i + 1);
1271  }
1272  }
1273 
1274  /* wait for all sends/receives to finish */
1275  if (myid_dh == 0)
1276  {
1277  MPI_Waitall (m * 2, send_req, send_status);
1278  }
1279  MPI_Waitall (2 * B->m, rcv_req, rcv_status);
1280 
1281  /* clean up */
1282  if (rowLengths != NULL)
1283  {
1284  FREE_DH (rowLengths);
1285  CHECK_V_ERROR;
1286  }
1287  if (o2n_row != NULL)
1288  {
1289  FREE_DH (o2n_row);
1290  CHECK_V_ERROR;
1291  }
1292  if (n2o_col != NULL)
1293  {
1294  FREE_DH (n2o_col);
1295  CHECK_V_ERROR;
1296  }
1297  if (rowToBlock != NULL)
1298  {
1299  FREE_DH (rowToBlock);
1300  CHECK_V_ERROR;
1301  }
1302  if (send_req != NULL)
1303  {
1304  FREE_DH (send_req);
1305  CHECK_V_ERROR;
1306  }
1307  if (rcv_req != NULL)
1308  {
1309  FREE_DH (rcv_req);
1310  CHECK_V_ERROR;
1311  }
1312  if (send_status != NULL)
1313  {
1314  FREE_DH (send_status);
1315  CHECK_V_ERROR;
1316  }
1317  if (rcv_status != NULL)
1318  {
1319  FREE_DH (rcv_status);
1320  CHECK_V_ERROR;
1321  }
1322  if (beg_row != NULL)
1323  {
1324  FREE_DH (beg_row);
1325  CHECK_V_ERROR;
1326  }
1327  if (row_count != NULL)
1328  {
1329  FREE_DH (row_count);
1330  CHECK_V_ERROR;
1331  }
1332  if (C != NULL)
1333  {
1334  Mat_dhDestroy (C);
1335  ERRCHKA;
1336  }
1337 
1338  *Bout = B;
1339 
1340 END_FUNC_DH}
1341 
1342 
1343 #undef __FUNC__
1344 #define __FUNC__ "partition_and_distribute_private"
1345 void
1346 partition_and_distribute_private (Mat_dh A, Mat_dh * Bout)
1347 {
1348  START_FUNC_DH Mat_dh B = NULL;
1349  int i, m;
1350  int *rowLengths = NULL;
1351  int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL;
1352  MPI_Request *send_req = NULL;
1353  MPI_Request *rcv_req = NULL;
1354  MPI_Status *send_status = NULL;
1355  MPI_Status *rcv_status = NULL;
1356 
1357  MPI_Barrier (comm_dh);
1358 
1359  /* broadcast number of rows to all processors */
1360  if (myid_dh == 0)
1361  m = A->m;
1362  MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
1363 
1364  /* broadcast number of nonzeros in each row to all processors */
1365  rowLengths = (int *) MALLOC_DH (m * sizeof (int));
1366  CHECK_V_ERROR;
1367  if (myid_dh == 0)
1368  {
1369  int *tmp = A->rp;
1370  for (i = 0; i < m; ++i)
1371  {
1372  rowLengths[i] = tmp[i + 1] - tmp[i];
1373  }
1374  }
1375  MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh);
1376 
1377  /* partition matrix */
1378  rowToBlock = (int *) MALLOC_DH (m * sizeof (int));
1379  CHECK_V_ERROR;
1380 
1381  if (myid_dh == 0)
1382  {
1383  o2n_row = (int *) MALLOC_DH (m * sizeof (int));
1384  CHECK_V_ERROR;
1385  mat_partition_private (A, np_dh, o2n_row, rowToBlock);
1386  CHECK_V_ERROR;
1387  }
1388 
1389  /* broadcast partitiioning information to all processors */
1390  MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh);
1391 
1392  /* allocate storage for local portion of matrix */
1393  mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock);
1394  CHECK_V_ERROR;
1395 
1396  /* root sends each processor its portion of the matrix */
1397  if (myid_dh == 0)
1398  {
1399  int *cval = A->cval, *rp = A->rp;
1400  double *aval = A->aval;
1401  send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
1402  CHECK_V_ERROR;
1403  send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
1404  CHECK_V_ERROR;
1405  for (i = 0; i < m; ++i)
1406  {
1407  int owner = rowToBlock[i];
1408  int count = rp[i + 1] - rp[i];
1409 
1410  /* error check for empty row */
1411  if (!count)
1412  {
1413  sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m);
1414  SET_V_ERROR (msgBuf_dh);
1415  }
1416 
1417  MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh,
1418  send_req + 2 * i);
1419  MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG,
1420  comm_dh, send_req + 2 * i + 1);
1421  }
1422  }
1423 
1424  /* all processors receive their local rows */
1425  {
1426  int *cval = B->cval;
1427  int *rp = B->rp;
1428  double *aval = B->aval;
1429  m = B->m;
1430 
1431  rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request));
1432  CHECK_V_ERROR;
1433  rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status));
1434  CHECK_V_ERROR;
1435 
1436  for (i = 0; i < m; ++i)
1437  {
1438 
1439  /* error check for empty row */
1440  int count = rp[i + 1] - rp[i];
1441  if (!count)
1442  {
1443  sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m);
1444  SET_V_ERROR (msgBuf_dh);
1445  }
1446 
1447  MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh,
1448  rcv_req + 2 * i);
1449  MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh,
1450  rcv_req + 2 * i + 1);
1451  }
1452  }
1453 
1454  /* wait for all sends/receives to finish */
1455  if (myid_dh == 0)
1456  {
1457  MPI_Waitall (m * 2, send_req, send_status);
1458  }
1459  MPI_Waitall (2 * B->m, rcv_req, rcv_status);
1460 
1461  /* clean up */
1462  if (rowLengths != NULL)
1463  {
1464  FREE_DH (rowLengths);
1465  CHECK_V_ERROR;
1466  }
1467  if (o2n_row != NULL)
1468  {
1469  FREE_DH (o2n_row);
1470  CHECK_V_ERROR;
1471  }
1472  if (n2o_col != NULL)
1473  {
1474  FREE_DH (n2o_col);
1475  CHECK_V_ERROR;
1476  }
1477  if (rowToBlock != NULL)
1478  {
1479  FREE_DH (rowToBlock);
1480  CHECK_V_ERROR;
1481  }
1482  if (send_req != NULL)
1483  {
1484  FREE_DH (send_req);
1485  CHECK_V_ERROR;
1486  }
1487  if (rcv_req != NULL)
1488  {
1489  FREE_DH (rcv_req);
1490  CHECK_V_ERROR;
1491  }
1492  if (send_status != NULL)
1493  {
1494  FREE_DH (send_status);
1495  CHECK_V_ERROR;
1496  }
1497  if (rcv_status != NULL)
1498  {
1499  FREE_DH (rcv_status);
1500  CHECK_V_ERROR;
1501  }
1502 
1503  *Bout = B;
1504 
1505 END_FUNC_DH}
1506 
1507 #undef __FUNC__
1508 #define __FUNC__ "mat_par_read_allocate_private"
1509 void
1510 mat_par_read_allocate_private (Mat_dh * Aout, int n, int *rowLengths,
1511  int *rowToBlock)
1512 {
1513  START_FUNC_DH Mat_dh A;
1514  int i, m, nz, beg_row, *rp, idx;
1515 
1516  Mat_dhCreate (&A);
1517  CHECK_V_ERROR;
1518  *Aout = A;
1519  A->n = n;
1520 
1521  /* count number of rows owned by this processor */
1522  m = 0;
1523  for (i = 0; i < n; ++i)
1524  {
1525  if (rowToBlock[i] == myid_dh)
1526  ++m;
1527  }
1528  A->m = m;
1529 
1530  /* compute global numbering of first locally owned row */
1531  beg_row = 0;
1532  for (i = 0; i < n; ++i)
1533  {
1534  if (rowToBlock[i] < myid_dh)
1535  ++beg_row;
1536  }
1537  A->beg_row = beg_row;
1538 
1539  /* allocate storage for row-pointer array */
1540  A->rp = rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1541  CHECK_V_ERROR;
1542  rp[0] = 0;
1543 
1544  /* count number of nonzeros owned by this processor, and form rp array */
1545  nz = 0;
1546  idx = 1;
1547  for (i = 0; i < n; ++i)
1548  {
1549  if (rowToBlock[i] == myid_dh)
1550  {
1551  nz += rowLengths[i];
1552  rp[idx++] = nz;
1553  }
1554  }
1555 
1556  /* allocate storage for column indices and values arrays */
1557  A->cval = (int *) MALLOC_DH (nz * sizeof (int));
1558  CHECK_V_ERROR;
1559  A->aval = (double *) MALLOC_DH (nz * sizeof (double));
1560  CHECK_V_ERROR;
1561 END_FUNC_DH}
1562 
1563 
1564 #undef __FUNC__
1565 #define __FUNC__ "mat_partition_private"
1566 void
1567 mat_partition_private (Mat_dh A, int blocks, int *o2n_row, int *rowToBlock)
1568 {
1569  START_FUNC_DH int i, j, n = A->n;
1570  int rpb = n / blocks; /* rows per block (except possibly last block) */
1571  int idx = 0;
1572 
1573  while (rpb * blocks < n)
1574  ++rpb;
1575 
1576  if (rpb * (blocks - 1) == n)
1577  {
1578  --rpb;
1579  printf_dh ("adjusted rpb to: %i\n", rpb);
1580  }
1581 
1582  for (i = 0; i < n; ++i)
1583  o2n_row[i] = i;
1584 
1585  /* assign all rows to blocks, except for last block, which may
1586  contain less than "rpb" rows
1587  */
1588  for (i = 0; i < blocks - 1; ++i)
1589  {
1590  for (j = 0; j < rpb; ++j)
1591  {
1592  rowToBlock[idx++] = i;
1593  }
1594  }
1595 
1596  /* now deal with the last block in the partition */
1597  i = blocks - 1;
1598  while (idx < n)
1599  rowToBlock[idx++] = i;
1600 
1601 END_FUNC_DH}
1602 
1603 
1604 /* may produce incorrect result if input is not triangular! */
1605 #undef __FUNC__
1606 #define __FUNC__ "make_full_private"
1607 void
1608 make_full_private (int m, int **rpIN, int **cvalIN, double **avalIN)
1609 {
1610  START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
1611  double *avalNew, *aval = *avalIN;
1612  int nz, *rowCounts = NULL;
1613 
1614  /* count the number of nonzeros in each row */
1615  rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1616  CHECK_V_ERROR;
1617  for (i = 0; i <= m; ++i)
1618  rowCounts[i] = 0;
1619 
1620  for (i = 0; i < m; ++i)
1621  {
1622  for (j = rp[i]; j < rp[i + 1]; ++j)
1623  {
1624  int col = cval[j];
1625  rowCounts[i + 1] += 1;
1626  if (col != i)
1627  rowCounts[col + 1] += 1;
1628  }
1629  }
1630 
1631  /* prefix sum to form row pointers for full representation */
1632  rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1633  CHECK_V_ERROR;
1634  for (i = 1; i <= m; ++i)
1635  rowCounts[i] += rowCounts[i - 1];
1636  memcpy (rpNew, rowCounts, (m + 1) * sizeof (int));
1637 
1638  /* form full representation */
1639  nz = rpNew[m];
1640 
1641  cvalNew = (int *) MALLOC_DH (nz * sizeof (int));
1642  CHECK_V_ERROR;
1643  avalNew = (double *) MALLOC_DH (nz * sizeof (double));
1644  CHECK_V_ERROR;
1645  for (i = 0; i < m; ++i)
1646  {
1647  for (j = rp[i]; j < rp[i + 1]; ++j)
1648  {
1649  int col = cval[j];
1650  double val = aval[j];
1651 
1652  cvalNew[rowCounts[i]] = col;
1653  avalNew[rowCounts[i]] = val;
1654  rowCounts[i] += 1;
1655  if (col != i)
1656  {
1657  cvalNew[rowCounts[col]] = i;
1658  avalNew[rowCounts[col]] = val;
1659  rowCounts[col] += 1;
1660  }
1661  }
1662  }
1663 
1664  if (rowCounts != NULL)
1665  {
1666  FREE_DH (rowCounts);
1667  CHECK_V_ERROR;
1668  }
1669  FREE_DH (cval);
1670  CHECK_V_ERROR;
1671  FREE_DH (rp);
1672  CHECK_V_ERROR;
1673  FREE_DH (aval);
1674  CHECK_V_ERROR;
1675  *rpIN = rpNew;
1676  *cvalIN = cvalNew;
1677  *avalIN = avalNew;
1678 END_FUNC_DH}
1679 
1680 #undef __FUNC__
1681 #define __FUNC__ "make_symmetric_private"
1682 void
1683 make_symmetric_private (int m, int **rpIN, int **cvalIN, double **avalIN)
1684 {
1685  START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN;
1686  double *avalNew, *aval = *avalIN;
1687  int nz, *rowCounts = NULL;
1688  int *rpTrans, *cvalTrans;
1689  int *work;
1690  double *avalTrans;
1691  int nzCount = 0, transCount = 0;
1692 
1693  mat_dh_transpose_private (m, rp, &rpTrans,
1694  cval, &cvalTrans, aval, &avalTrans);
1695  CHECK_V_ERROR;
1696 
1697  /* count the number of nonzeros in each row */
1698  work = (int *) MALLOC_DH (m * sizeof (int));
1699  CHECK_V_ERROR;
1700  for (i = 0; i < m; ++i)
1701  work[i] = -1;
1702  rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1703  CHECK_V_ERROR;
1704  for (i = 0; i <= m; ++i)
1705  rowCounts[i] = 0;
1706 
1707  for (i = 0; i < m; ++i)
1708  {
1709  int ct = 0;
1710  for (j = rp[i]; j < rp[i + 1]; ++j)
1711  {
1712  int col = cval[j];
1713  work[col] = i;
1714  ++ct;
1715  ++nzCount;
1716  }
1717  for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
1718  {
1719  int col = cvalTrans[j];
1720  if (work[col] != i)
1721  {
1722  ++ct;
1723  ++transCount;
1724  }
1725  }
1726  rowCounts[i + 1] = ct;
1727  }
1728 
1729  /*---------------------------------------------------------
1730  * if matrix is already symmetric, do nothing
1731  *---------------------------------------------------------*/
1732  if (transCount == 0)
1733  {
1734  printf
1735  ("make_symmetric_private: matrix is already structurally symmetric!\n");
1736  FREE_DH (rpTrans);
1737  CHECK_V_ERROR;
1738  FREE_DH (cvalTrans);
1739  CHECK_V_ERROR;
1740  FREE_DH (avalTrans);
1741  CHECK_V_ERROR;
1742  FREE_DH (work);
1743  CHECK_V_ERROR;
1744  FREE_DH (rowCounts);
1745  CHECK_V_ERROR;
1746  goto END_OF_FUNCTION;
1747  }
1748 
1749  /*---------------------------------------------------------
1750  * otherwise, finish symmetrizing
1751  *---------------------------------------------------------*/
1752  else
1753  {
1754  printf ("original nz= %i\n", rp[m]);
1755  printf ("zeros added= %i\n", transCount);
1756  printf
1757  ("ratio of added zeros to nonzeros = %0.2f (assumes all original entries were nonzero!)\n",
1758  (double) transCount / (double) (nzCount));
1759  }
1760 
1761  /* prefix sum to form row pointers for full representation */
1762  rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1763  CHECK_V_ERROR;
1764  for (i = 1; i <= m; ++i)
1765  rowCounts[i] += rowCounts[i - 1];
1766  memcpy (rpNew, rowCounts, (m + 1) * sizeof (int));
1767  for (i = 0; i < m; ++i)
1768  work[i] = -1;
1769 
1770  /* form full representation */
1771  nz = rpNew[m];
1772  cvalNew = (int *) MALLOC_DH (nz * sizeof (int));
1773  CHECK_V_ERROR;
1774  avalNew = (double *) MALLOC_DH (nz * sizeof (double));
1775  CHECK_V_ERROR;
1776  for (i = 0; i < m; ++i)
1777  work[i] = -1;
1778 
1779  for (i = 0; i < m; ++i)
1780  {
1781  for (j = rp[i]; j < rp[i + 1]; ++j)
1782  {
1783  int col = cval[j];
1784  double val = aval[j];
1785  work[col] = i;
1786  cvalNew[rowCounts[i]] = col;
1787  avalNew[rowCounts[i]] = val;
1788  rowCounts[i] += 1;
1789  }
1790  for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j)
1791  {
1792  int col = cvalTrans[j];
1793  if (work[col] != i)
1794  {
1795  cvalNew[rowCounts[i]] = col;
1796  avalNew[rowCounts[i]] = 0.0;
1797  rowCounts[i] += 1;
1798  }
1799  }
1800  }
1801 
1802  if (rowCounts != NULL)
1803  {
1804  FREE_DH (rowCounts);
1805  CHECK_V_ERROR;
1806  }
1807  FREE_DH (work);
1808  CHECK_V_ERROR;
1809  FREE_DH (cval);
1810  CHECK_V_ERROR;
1811  FREE_DH (rp);
1812  CHECK_V_ERROR;
1813  FREE_DH (aval);
1814  CHECK_V_ERROR;
1815  FREE_DH (cvalTrans);
1816  CHECK_V_ERROR;
1817  FREE_DH (rpTrans);
1818  CHECK_V_ERROR;
1819  FREE_DH (avalTrans);
1820  CHECK_V_ERROR;
1821  *rpIN = rpNew;
1822  *cvalIN = cvalNew;
1823  *avalIN = avalNew;
1824 
1825 END_OF_FUNCTION:;
1826 
1827 END_FUNC_DH}
1828 
1829 #undef __FUNC__
1830 #define __FUNC__ "profileMat"
1831 void
1832 profileMat (Mat_dh A)
1833 {
1834  START_FUNC_DH Mat_dh B = NULL;
1835  int type;
1836  int m;
1837  int i, j;
1838  int *work1;
1839  double *work2;
1840  bool isStructurallySymmetric = true;
1841  bool isNumericallySymmetric = true;
1842  bool is_Triangular = false;
1843  int zeroCount = 0, nz;
1844 
1845  if (myid_dh > 0)
1846  {
1847  SET_V_ERROR ("only for a single MPI task!");
1848  }
1849 
1850  m = A->m;
1851 
1852  printf ("\nYY----------------------------------------------------\n");
1853 
1854  /* count number of explicit zeros */
1855  nz = A->rp[m];
1856  for (i = 0; i < nz; ++i)
1857  {
1858  if (A->aval[i] == 0)
1859  ++zeroCount;
1860  }
1861  printf ("YY row count: %i\n", m);
1862  printf ("YY nz count: %i\n", nz);
1863  printf ("YY explicit zeros: %i (entire matrix)\n", zeroCount);
1864 
1865  /* count number of missing or zero diagonals */
1866  {
1867  int m_diag = 0, z_diag = 0;
1868  for (i = 0; i < m; ++i)
1869  {
1870  bool flag = true;
1871  for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
1872  {
1873  int col = A->cval[j];
1874 
1875  /* row has an explicit diagonal element */
1876  if (col == i)
1877  {
1878  double val = A->aval[j];
1879  flag = false;
1880  if (val == 0.0)
1881  ++z_diag;
1882  break;
1883  }
1884  }
1885 
1886  /* row has an implicit zero diagonal element */
1887  if (flag)
1888  ++m_diag;
1889  }
1890  printf ("YY missing diagonals: %i\n", m_diag);
1891  printf ("YY explicit zero diags: %i\n", z_diag);
1892  }
1893 
1894  /* check to see if matrix is triangular */
1895  type = isTriangular (m, A->rp, A->cval);
1896  CHECK_V_ERROR;
1897  if (type == IS_UPPER_TRI)
1898  {
1899  printf ("YY matrix is upper triangular\n");
1900  is_Triangular = true;
1901  goto END_OF_FUNCTION;
1902  }
1903  else if (type == IS_LOWER_TRI)
1904  {
1905  printf ("YY matrix is lower triangular\n");
1906  is_Triangular = true;
1907  goto END_OF_FUNCTION;
1908  }
1909 
1910  /* if not triangular, count nz in each triangle */
1911  {
1912  int unz = 0, lnz = 0;
1913  for (i = 0; i < m; ++i)
1914  {
1915  for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
1916  {
1917  int col = A->cval[j];
1918  if (col < i)
1919  ++lnz;
1920  if (col > i)
1921  ++unz;
1922  }
1923  }
1924  printf ("YY strict upper triangular nonzeros: %i\n", unz);
1925  printf ("YY strict lower triangular nonzeros: %i\n", lnz);
1926  }
1927 
1928 
1929 
1930 
1931  Mat_dhTranspose (A, &B);
1932  CHECK_V_ERROR;
1933 
1934  /* check for structural and numerical symmetry */
1935 
1936  work1 = (int *) MALLOC_DH (m * sizeof (int));
1937  CHECK_V_ERROR;
1938  work2 = (double *) MALLOC_DH (m * sizeof (double));
1939  CHECK_V_ERROR;
1940  for (i = 0; i < m; ++i)
1941  work1[i] = -1;
1942  for (i = 0; i < m; ++i)
1943  work2[i] = 0.0;
1944 
1945  for (i = 0; i < m; ++i)
1946  {
1947  for (j = A->rp[i]; j < A->rp[i + 1]; ++j)
1948  {
1949  int col = A->cval[j];
1950  double val = A->aval[j];
1951  work1[col] = i;
1952  work2[col] = val;
1953  }
1954  for (j = B->rp[i]; j < B->rp[i + 1]; ++j)
1955  {
1956  int col = B->cval[j];
1957  double val = B->aval[j];
1958 
1959  if (work1[col] != i)
1960  {
1961  isStructurallySymmetric = false;
1962  isNumericallySymmetric = false;
1963  goto END_OF_FUNCTION;
1964  }
1965  if (work2[col] != val)
1966  {
1967  isNumericallySymmetric = false;
1968  work2[col] = 0.0;
1969  }
1970  }
1971  }
1972 
1973 
1974 END_OF_FUNCTION:;
1975 
1976  if (!is_Triangular)
1977  {
1978  printf ("YY matrix is NOT triangular\n");
1979  if (isStructurallySymmetric)
1980  {
1981  printf ("YY matrix IS structurally symmetric\n");
1982  }
1983  else
1984  {
1985  printf ("YY matrix is NOT structurally symmetric\n");
1986  }
1987  if (isNumericallySymmetric)
1988  {
1989  printf ("YY matrix IS numerically symmetric\n");
1990  }
1991  else
1992  {
1993  printf ("YY matrix is NOT numerically symmetric\n");
1994  }
1995  }
1996 
1997  if (work1 != NULL)
1998  {
1999  FREE_DH (work1);
2000  CHECK_V_ERROR;
2001  }
2002  if (work2 != NULL)
2003  {
2004  FREE_DH (work2);
2005  CHECK_V_ERROR;
2006  }
2007  if (B != NULL)
2008  {
2009  Mat_dhDestroy (B);
2010  CHECK_V_ERROR;
2011  }
2012 
2013  printf ("YY----------------------------------------------------\n");
2014 
2015 END_FUNC_DH}
Definition: Mat_dh.h:62
Definition: Vec_dh.h:52