IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
Mat_dh.c
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Mat_dh.h"
44 #include "getRow_dh.h"
45 #include "SubdomainGraph_dh.h"
46 #include "TimeLog_dh.h"
47 #include "Mem_dh.h"
48 #include "Numbering_dh.h"
49 #include "Parser_dh.h"
50 #include "mat_dh_private.h"
51 #include "io_dh.h"
52 #include "Hash_i_dh.h"
53 
54 static void setup_matvec_sends_private (Mat_dh mat, int *inlist);
55 static void setup_matvec_receives_private (Mat_dh mat, int *beg_rows,
56  int *end_rows, int reqlen,
57  int *reqind, int *outlist);
58 
59 #if 0
60 
61 partial (?)
62  implementation below;
63  not used anyplace, I think;
64 for future
65  expansion ?[mar 21, 2 K + 1]
66  static void Mat_dhAllocate_getRow_private (Mat_dh A);
67 #endif
68 
69  static bool commsOnly = false; /* experimental, for matvec functions */
70 
71 #undef __FUNC__
72 #define __FUNC__ "Mat_dhCreate"
73  void Mat_dhCreate (Mat_dh * mat)
74 {
75  START_FUNC_DH
76  struct _mat_dh *tmp =
77  (struct _mat_dh *) MALLOC_DH (sizeof (struct _mat_dh));
78  CHECK_V_ERROR;
79  *mat = tmp;
80 
81  commsOnly = Parser_dhHasSwitch (parser_dh, "-commsOnly");
82  if (myid_dh == 0 && commsOnly == true)
83  {
84 /* printf("\n@@@ commsOnly == true for matvecs! @@@\n"); */
85  fflush (stdout);
86  }
87 
88  tmp->m = 0;
89  tmp->n = 0;
90  tmp->beg_row = 0;
91  tmp->bs = 1;
92 
93  tmp->rp = NULL;
94  tmp->len = NULL;
95  tmp->cval = NULL;
96  tmp->aval = NULL;
97  tmp->diag = NULL;
98  tmp->fill = NULL;
99  tmp->owner = true;
100 
101  tmp->len_private = 0;
102  tmp->rowCheckedOut = -1;
103  tmp->cval_private = NULL;
104  tmp->aval_private = NULL;
105 
106  tmp->row_perm = NULL;
107 
108  tmp->num_recv = 0;
109  tmp->num_send = 0;
110  tmp->recv_req = NULL;
111  tmp->send_req = NULL;
112  tmp->status = NULL;
113  tmp->recvbuf = NULL;
114  tmp->sendbuf = NULL;
115  tmp->sendind = NULL;
116  tmp->sendlen = 0;
117  tmp->recvlen = 0;
118  tmp->numb = NULL;
119  tmp->matvecIsSetup = false;
120 
121  Mat_dhZeroTiming (tmp);
122  CHECK_V_ERROR;
123  tmp->matvec_timing = true;
124 
125  tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Mat");
126 END_FUNC_DH}
127 
128 #undef __FUNC__
129 #define __FUNC__ "Mat_dhDestroy"
130 void
131 Mat_dhDestroy (Mat_dh mat)
132 {
133  START_FUNC_DH int i;
134 
135  if (mat->owner)
136  {
137  if (mat->rp != NULL)
138  {
139  FREE_DH (mat->rp);
140  CHECK_V_ERROR;
141  }
142  if (mat->len != NULL)
143  {
144  FREE_DH (mat->len);
145  CHECK_V_ERROR;
146  }
147  if (mat->cval != NULL)
148  {
149  FREE_DH (mat->cval);
150  CHECK_V_ERROR;
151  }
152  if (mat->aval != NULL)
153  {
154  FREE_DH (mat->aval);
155  CHECK_V_ERROR;
156  }
157  if (mat->diag != NULL)
158  {
159  FREE_DH (mat->diag);
160  CHECK_V_ERROR;
161  }
162  if (mat->fill != NULL)
163  {
164  FREE_DH (mat->fill);
165  CHECK_V_ERROR;
166  }
167  if (mat->cval_private != NULL)
168  {
169  FREE_DH (mat->cval_private);
170  CHECK_V_ERROR;
171  }
172  if (mat->aval_private != NULL)
173  {
174  FREE_DH (mat->aval_private);
175  CHECK_V_ERROR;
176  }
177  if (mat->row_perm != NULL)
178  {
179  FREE_DH (mat->row_perm);
180  CHECK_V_ERROR;
181  }
182  }
183 
184  for (i = 0; i < mat->num_recv; i++)
185  MPI_Request_free (&mat->recv_req[i]);
186  for (i = 0; i < mat->num_send; i++)
187  MPI_Request_free (&mat->send_req[i]);
188  if (mat->recv_req != NULL)
189  {
190  FREE_DH (mat->recv_req);
191  CHECK_V_ERROR;
192  }
193  if (mat->send_req != NULL)
194  {
195  FREE_DH (mat->send_req);
196  CHECK_V_ERROR;
197  }
198  if (mat->status != NULL)
199  {
200  FREE_DH (mat->status);
201  CHECK_V_ERROR;
202  }
203  if (mat->recvbuf != NULL)
204  {
205  FREE_DH (mat->recvbuf);
206  CHECK_V_ERROR;
207  }
208  if (mat->sendbuf != NULL)
209  {
210  FREE_DH (mat->sendbuf);
211  CHECK_V_ERROR;
212  }
213  if (mat->sendind != NULL)
214  {
215  FREE_DH (mat->sendind);
216  CHECK_V_ERROR;
217  }
218 
219  if (mat->matvecIsSetup)
220  {
221  Mat_dhMatVecSetdown (mat);
222  CHECK_V_ERROR;
223  }
224  if (mat->numb != NULL)
225  {
226  Numbering_dhDestroy (mat->numb);
227  CHECK_V_ERROR;
228  }
229  FREE_DH (mat);
230  CHECK_V_ERROR;
231 END_FUNC_DH}
232 
233 
234 /* this should put the cval array back the way it was! */
235 #undef __FUNC__
236 #define __FUNC__ "Mat_dhMatVecSetDown"
237 void
238 Mat_dhMatVecSetdown (Mat_dh mat)
239 {
240  START_FUNC_DH if (ignoreMe)
241  SET_V_ERROR ("not implemented");
242 END_FUNC_DH}
243 
244 
245 /* adopted from Edmond Chow's ParaSails */
246 #undef __FUNC__
247 #define __FUNC__ "Mat_dhMatVecSetup"
248 void
249 Mat_dhMatVecSetup (Mat_dh mat)
250 {
251  START_FUNC_DH if (np_dh == 1)
252  {
253  goto DO_NOTHING;
254  }
255 
256  else
257  {
258  int *outlist, *inlist;
259  int ierr, i, row, *rp = mat->rp, *cval = mat->cval;
260  Numbering_dh numb;
261  int m = mat->m;
262  int firstLocal = mat->beg_row;
263  int lastLocal = firstLocal + m;
264  int *beg_rows, *end_rows;
265 
266  mat->recv_req =
267  (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
268  CHECK_V_ERROR;
269  mat->send_req =
270  (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
271  CHECK_V_ERROR;
272  mat->status = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
273  CHECK_V_ERROR;
274  beg_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
275  CHECK_V_ERROR;
276  end_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
277  CHECK_V_ERROR;
278 
279  if (np_dh == 1)
280  { /* this is for debugging purposes in some of the drivers */
281  beg_rows[0] = 0;
282  end_rows[0] = m;
283  }
284  else
285  {
286  ierr =
287  MPI_Allgather (&firstLocal, 1, MPI_INT, beg_rows, 1, MPI_INT,
288  comm_dh);
289 
290  CHECK_MPI_V_ERROR (ierr);
291 
292  ierr =
293  MPI_Allgather (&lastLocal, 1, MPI_INT, end_rows, 1, MPI_INT,
294  comm_dh);
295  CHECK_MPI_V_ERROR (ierr);
296  }
297 
298  outlist = (int *) MALLOC_DH (np_dh * sizeof (int));
299  CHECK_V_ERROR;
300  inlist = (int *) MALLOC_DH (np_dh * sizeof (int));
301  CHECK_V_ERROR;
302  for (i = 0; i < np_dh; ++i)
303  {
304  outlist[i] = 0;
305  inlist[i] = 0;
306  }
307 
308  /* Create Numbering object */
309  Numbering_dhCreate (&(mat->numb));
310  CHECK_V_ERROR;
311  numb = mat->numb;
312  Numbering_dhSetup (numb, mat);
313  CHECK_V_ERROR;
314 
315  setup_matvec_receives_private (mat, beg_rows, end_rows, numb->num_ext,
316  numb->idx_ext, outlist);
317  CHECK_V_ERROR;
318 
319  if (np_dh == 1)
320  { /* this is for debugging purposes in some of the drivers */
321  inlist[0] = outlist[0];
322  }
323  else
324  {
325  ierr =
326  MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh);
327  CHECK_MPI_V_ERROR (ierr);
328  }
329 
330  setup_matvec_sends_private (mat, inlist);
331  CHECK_V_ERROR;
332 
333  /* Convert to local indices */
334  for (row = 0; row < m; row++)
335  {
336  int len = rp[row + 1] - rp[row];
337  int *ind = cval + rp[row];
338  Numbering_dhGlobalToLocal (numb, len, ind, ind);
339  CHECK_V_ERROR;
340  }
341 
342  FREE_DH (outlist);
343  CHECK_V_ERROR;
344  FREE_DH (inlist);
345  CHECK_V_ERROR;
346  FREE_DH (beg_rows);
347  CHECK_V_ERROR;
348  FREE_DH (end_rows);
349  CHECK_V_ERROR;
350  }
351 
352 DO_NOTHING:;
353 
354 END_FUNC_DH}
355 
356 /* adopted from Edmond Chow's ParaSails */
357 #undef __FUNC__
358 #define __FUNC__ "setup_matvec_receives_private"
359 void
360 setup_matvec_receives_private (Mat_dh mat, int *beg_rows, int *end_rows,
361  int reqlen, int *reqind, int *outlist)
362 {
363  START_FUNC_DH int ierr, i, j, this_pe;
364  MPI_Request request;
365  int m = mat->m;
366 
367  mat->num_recv = 0;
368 
369  /* Allocate recvbuf */
370  /* recvbuf has numlocal entries saved for local part of x, used in matvec */
371  mat->recvbuf = (double *) MALLOC_DH ((reqlen + m) * sizeof (double));
372 
373  for (i = 0; i < reqlen; i = j)
374  { /* j is set below */
375  /* The processor that owns the row with index reqind[i] */
376  this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]);
377  CHECK_V_ERROR;
378 
379  /* Figure out other rows we need from this_pe */
380  for (j = i + 1; j < reqlen; j++)
381  {
382  /* if row is on different pe */
383  if (reqind[j] < beg_rows[this_pe] || reqind[j] > end_rows[this_pe])
384  break;
385  }
386 
387  /* Request rows in reqind[i..j-1] */
388  ierr =
389  MPI_Isend (&reqind[i], j - i, MPI_INT, this_pe, 444, comm_dh,
390  &request);
391  CHECK_MPI_V_ERROR (ierr);
392  ierr = MPI_Request_free (&request);
393  CHECK_MPI_V_ERROR (ierr);
394 
395  /* Count of number of number of indices needed from this_pe */
396  outlist[this_pe] = j - i;
397 
398  ierr =
399  MPI_Recv_init (&mat->recvbuf[i + m], j - i, MPI_DOUBLE, this_pe, 555,
400  comm_dh, &mat->recv_req[mat->num_recv]);
401  CHECK_MPI_V_ERROR (ierr);
402 
403  mat->num_recv++;
404  mat->recvlen += j - i; /* only used for statistical reporting */
405  }
406 END_FUNC_DH}
407 
408 
409 /* adopted from Edmond Chow's ParaSails */
410 #undef __FUNC__
411 #define __FUNC__ "setup_matvec_sends_private"
412 void
413 setup_matvec_sends_private (Mat_dh mat, int *inlist)
414 {
415  START_FUNC_DH int ierr, i, j, sendlen, first = mat->beg_row;
416  MPI_Request *requests;
417  MPI_Status *statuses;
418 
419  requests = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
420  CHECK_V_ERROR;
421  statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
422  CHECK_V_ERROR;
423 
424  /* Determine size of and allocate sendbuf and sendind */
425  sendlen = 0;
426  for (i = 0; i < np_dh; i++)
427  sendlen += inlist[i];
428  mat->sendlen = sendlen;
429  mat->sendbuf = (double *) MALLOC_DH (sendlen * sizeof (double));
430  CHECK_V_ERROR;
431  mat->sendind = (int *) MALLOC_DH (sendlen * sizeof (int));
432  CHECK_V_ERROR;
433 
434  j = 0;
435  mat->num_send = 0;
436  for (i = 0; i < np_dh; i++)
437  {
438  if (inlist[i] != 0)
439  {
440  /* Post receive for the actual indices */
441  ierr =
442  MPI_Irecv (&mat->sendind[j], inlist[i], MPI_INT, i, 444, comm_dh,
443  &requests[mat->num_send]);
444  CHECK_MPI_V_ERROR (ierr);
445  /* Set up the send */
446  ierr =
447  MPI_Send_init (&mat->sendbuf[j], inlist[i], MPI_DOUBLE, i, 555,
448  comm_dh, &mat->send_req[mat->num_send]);
449  CHECK_MPI_V_ERROR (ierr);
450 
451  mat->num_send++;
452  j += inlist[i];
453  }
454  }
455 
456  /* total bytes to be sent during matvec */
457  mat->time[MATVEC_WORDS] = j;
458 
459 
460  ierr = MPI_Waitall (mat->num_send, requests, statuses);
461  CHECK_MPI_V_ERROR (ierr);
462  /* convert global indices to local indices */
463  /* these are all indices on this processor */
464  for (i = 0; i < mat->sendlen; i++)
465  mat->sendind[i] -= first;
466 
467  FREE_DH (requests);
468  FREE_DH (statuses);
469 END_FUNC_DH}
470 
471 
472 /* unthreaded MPI version */
473 #undef __FUNC__
474 #define __FUNC__ "Mat_dhMatVec"
475 void
476 Mat_dhMatVec (Mat_dh mat, double *x, double *b)
477 {
478  START_FUNC_DH if (np_dh == 1)
479  {
480  Mat_dhMatVec_uni (mat, x, b);
481  CHECK_V_ERROR;
482  }
483 
484  else
485  {
486  int ierr, i, row, m = mat->m;
487  int *rp = mat->rp, *cval = mat->cval;
488  double *aval = mat->aval;
489  int *sendind = mat->sendind;
490  int sendlen = mat->sendlen;
491  double *sendbuf = mat->sendbuf;
492  double *recvbuf = mat->recvbuf;
493  double t1 = 0, t2 = 0, t3 = 0, t4 = 0;
494  bool timeFlag = mat->matvec_timing;
495 
496 
497  if (timeFlag)
498  t1 = MPI_Wtime ();
499 
500  /* Put components of x into the right outgoing buffers */
501  if (!commsOnly)
502  {
503  for (i = 0; i < sendlen; i++)
504  sendbuf[i] = x[sendind[i]];
505  }
506 
507  if (timeFlag)
508  {
509  t2 = MPI_Wtime ();
510  mat->time[MATVEC_TIME] += (t2 - t1);
511 
512  }
513 
514  ierr = MPI_Startall (mat->num_recv, mat->recv_req);
515  CHECK_MPI_V_ERROR (ierr);
516  ierr = MPI_Startall (mat->num_send, mat->send_req);
517  CHECK_MPI_V_ERROR (ierr);
518  ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status);
519  CHECK_MPI_V_ERROR (ierr);
520  ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status);
521  CHECK_MPI_V_ERROR (ierr);
522 
523 
524  if (timeFlag)
525  {
526  t3 = MPI_Wtime ();
527  mat->time[MATVEC_MPI_TIME] += (t3 - t2);
528  }
529 
530  /* Copy local part of x into top part of recvbuf */
531  if (!commsOnly)
532  {
533  for (i = 0; i < m; i++)
534  recvbuf[i] = x[i];
535 
536  /* do the multiply */
537  for (row = 0; row < m; row++)
538  {
539  int len = rp[row + 1] - rp[row];
540  int *ind = cval + rp[row];
541  double *val = aval + rp[row];
542  double temp = 0.0;
543  for (i = 0; i < len; i++)
544  {
545  temp += (val[i] * recvbuf[ind[i]]);
546  }
547  b[row] = temp;
548  }
549  } /* if (! commsOnly) */
550 
551  if (timeFlag)
552  {
553  t4 = MPI_Wtime ();
554  mat->time[MATVEC_TOTAL_TIME] += (t4 - t1);
555  mat->time[MATVEC_TIME] += (t4 - t3);
556  }
557  }
558 END_FUNC_DH}
559 
560 /* OpenMP/MPI version */
561 #undef __FUNC__
562 #define __FUNC__ "Mat_dhMatVec_omp"
563 void
564 Mat_dhMatVec_omp (Mat_dh mat, double *x, double *b)
565 {
566  START_FUNC_DH int ierr, i, row, m = mat->m;
567  int *rp = mat->rp, *cval = mat->cval;
568  double *aval = mat->aval;
569  int *sendind = mat->sendind;
570  int sendlen = mat->sendlen;
571  double *sendbuf = mat->sendbuf;
572  double *recvbuf = mat->recvbuf;
573  double t1 = 0, t2 = 0, t3 = 0, t4 = 0, tx = 0;
574  double *val, temp;
575  int len, *ind;
576  bool timeFlag = mat->matvec_timing;
577 
578  if (timeFlag)
579  t1 = MPI_Wtime ();
580 
581  /* Put components of x into the right outgoing buffers */
582  for (i = 0; i < sendlen; i++)
583  sendbuf[i] = x[sendind[i]];
584 
585  if (timeFlag)
586  {
587  t2 = MPI_Wtime ();
588  mat->time[MATVEC_TIME] += (t2 - t1);
589  }
590 
591  ierr = MPI_Startall (mat->num_recv, mat->recv_req);
592  CHECK_MPI_V_ERROR (ierr);
593  ierr = MPI_Startall (mat->num_send, mat->send_req);
594  CHECK_MPI_V_ERROR (ierr);
595  ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status);
596  CHECK_MPI_V_ERROR (ierr);
597  ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status);
598  CHECK_MPI_V_ERROR (ierr);
599 
600  if (timeFlag)
601  {
602  t3 = MPI_Wtime ();
603  mat->time[MATVEC_MPI_TIME] += (t3 - t2);
604  }
605 
606  /* Copy local part of x into top part of recvbuf */
607  for (i = 0; i < m; i++)
608  recvbuf[i] = x[i];
609 
610  if (timeFlag)
611  {
612  tx = MPI_Wtime ();
613  mat->time[MATVEC_MPI_TIME2] += (tx - t1);
614  }
615 
616 
617  /* do the multiply */
618  for (row = 0; row < m; row++)
619  {
620  len = rp[row + 1] - rp[row];
621  ind = cval + rp[row];
622  val = aval + rp[row];
623  temp = 0.0;
624  for (i = 0; i < len; i++)
625  {
626  temp += (val[i] * recvbuf[ind[i]]);
627  }
628  b[row] = temp;
629  }
630 
631  if (timeFlag)
632  {
633  t4 = MPI_Wtime ();
634  mat->time[MATVEC_TOTAL_TIME] += (t4 - t1);
635  mat->time[MATVEC_TIME] += (t4 - t3);
636  }
637 
638 END_FUNC_DH}
639 
640 
641 /* OpenMP/single primary task version */
642 #undef __FUNC__
643 #define __FUNC__ "Mat_dhMatVec_uni_omp"
644 void
645 Mat_dhMatVec_uni_omp (Mat_dh mat, double *x, double *b)
646 {
647  START_FUNC_DH int i, row, m = mat->m;
648  int *rp = mat->rp, *cval = mat->cval;
649  double *aval = mat->aval;
650  double t1 = 0, t2 = 0;
651  bool timeFlag = mat->matvec_timing;
652 
653  if (timeFlag)
654  {
655  t1 = MPI_Wtime ();
656  }
657 
658  /* do the multiply */
659  for (row = 0; row < m; row++)
660  {
661  int len = rp[row + 1] - rp[row];
662  int *ind = cval + rp[row];
663  double *val = aval + rp[row];
664  double temp = 0.0;
665  for (i = 0; i < len; i++)
666  {
667  temp += (val[i] * x[ind[i]]);
668  }
669  b[row] = temp;
670  }
671 
672  if (timeFlag)
673  {
674  t2 = MPI_Wtime ();
675  mat->time[MATVEC_TIME] += (t2 - t1);
676  mat->time[MATVEC_TOTAL_TIME] += (t2 - t1);
677  }
678 
679 END_FUNC_DH}
680 
681 
682 /* unthreaded, single-task version */
683 #undef __FUNC__
684 #define __FUNC__ "Mat_dhMatVec_uni"
685 void
686 Mat_dhMatVec_uni (Mat_dh mat, double *x, double *b)
687 {
688  START_FUNC_DH int i, row, m = mat->m;
689  int *rp = mat->rp, *cval = mat->cval;
690  double *aval = mat->aval;
691  double t1 = 0, t2 = 0;
692  bool timeFlag = mat->matvec_timing;
693 
694  if (timeFlag)
695  t1 = MPI_Wtime ();
696 
697  for (row = 0; row < m; row++)
698  {
699  int len = rp[row + 1] - rp[row];
700  int *ind = cval + rp[row];
701  double *val = aval + rp[row];
702  double temp = 0.0;
703  for (i = 0; i < len; i++)
704  {
705  temp += (val[i] * x[ind[i]]);
706  }
707  b[row] = temp;
708  }
709 
710  if (timeFlag)
711  {
712  t2 = MPI_Wtime ();
713  mat->time[MATVEC_TIME] += (t2 - t1);
714  mat->time[MATVEC_TOTAL_TIME] += (t2 - t1);
715  }
716 
717 END_FUNC_DH}
718 
719 
720 #undef __FUNC__
721 #define __FUNC__ "Mat_dhReadNz"
722 int
723 Mat_dhReadNz (Mat_dh mat)
724 {
725  START_FUNC_DH int ierr, retval = mat->rp[mat->m];
726  int nz = retval;
727  ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh);
728  CHECK_MPI_ERROR (ierr);
729 END_FUNC_VAL (retval)}
730 
731 
732 
733 #if 0
734 
735 #undef __FUNC__
736 #define __FUNC__ "Mat_dhAllocate_getRow_private"
737 void
738 Mat_dhAllocate_getRow_private (Mat_dh A)
739 {
740  START_FUNC_DH int i, *rp = A->rp, len = 0;
741  int m = A->m;
742 
743  /* find longest row in matrix */
744  for (i = 0; i < m; ++i)
745  len = MAX (len, rp[i + 1] - rp[i]);
746  len *= A->bs;
747 
748  /* free any previously allocated private storage */
749  if (len > A->len_private)
750  {
751  if (A->cval_private != NULL)
752  {
753  FREE_DH (A->cval_private);
754  CHECK_V_ERROR;
755  }
756  if (A->aval_private != NULL)
757  {
758  FREE_DH (A->aval_private);
759  CHECK_V_ERROR;
760  }
761  }
762 
763  /* allocate private storage */
764  A->cval_private = (int *) MALLOC_DH (len * sizeof (int));
765  CHECK_V_ERROR;
766  A->aval_private = (double *) MALLOC_DH (len * sizeof (double));
767  CHECK_V_ERROR;
768  A->len_private = len;
769 END_FUNC_DH}
770 
771 #endif
772 
773 #undef __FUNC__
774 #define __FUNC__ "Mat_dhZeroTiming"
775 void
776 Mat_dhZeroTiming (Mat_dh mat)
777 {
778  START_FUNC_DH int i;
779 
780  for (i = 0; i < MAT_DH_BINS; ++i)
781  {
782  mat->time[i] = 0;
783  mat->time_max[i] = 0;
784  mat->time_min[i] = 0;
785  }
786 END_FUNC_DH}
787 
788 #undef __FUNC__
789 #define __FUNC__ "Mat_dhReduceTiming"
790 void
791 Mat_dhReduceTiming (Mat_dh mat)
792 {
793  START_FUNC_DH if (mat->time[MATVEC_MPI_TIME])
794  {
795  mat->time[MATVEC_RATIO] =
796  mat->time[MATVEC_TIME] / mat->time[MATVEC_MPI_TIME];
797  }
798  MPI_Allreduce (mat->time, mat->time_min, MAT_DH_BINS, MPI_DOUBLE, MPI_MIN,
799  comm_dh);
800  MPI_Allreduce (mat->time, mat->time_max, MAT_DH_BINS, MPI_DOUBLE, MPI_MAX,
801  comm_dh);
802 END_FUNC_DH}
803 
804 #undef __FUNC__
805 #define __FUNC__ "Mat_dhPermute"
806 void
807 Mat_dhPermute (Mat_dh A, int *n2o, Mat_dh * Bout)
808 {
809  START_FUNC_DH Mat_dh B;
810  int i, j, *RP = A->rp, *CVAL = A->cval;
811  int *o2n, *rp, *cval, m = A->m, nz = RP[m];
812  double *aval, *AVAL = A->aval;
813 
814  Mat_dhCreate (&B);
815  CHECK_V_ERROR;
816  B->m = B->n = m;
817  *Bout = B;
818 
819  /* form inverse permutation */
820  o2n = (int *) MALLOC_DH (m * sizeof (int));
821  CHECK_V_ERROR;
822  for (i = 0; i < m; ++i)
823  o2n[n2o[i]] = i;
824 
825  /* allocate storage for permuted matrix */
826  rp = B->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
827  CHECK_V_ERROR;
828  cval = B->cval = (int *) MALLOC_DH (nz * sizeof (int));
829  CHECK_V_ERROR;
830  aval = B->aval = (double *) MALLOC_DH (nz * sizeof (double));
831  CHECK_V_ERROR;
832 
833  /* form new rp array */
834  rp[0] = 0;
835  for (i = 0; i < m; ++i)
836  {
837  int oldRow = n2o[i];
838  rp[i + 1] = RP[oldRow + 1] - RP[oldRow];
839  }
840  for (i = 1; i <= m; ++i)
841  rp[i] = rp[i] + rp[i - 1];
842 
843  for (i = 0; i < m; ++i)
844  {
845  int oldRow = n2o[i];
846  int idx = rp[i];
847  for (j = RP[oldRow]; j < RP[oldRow + 1]; ++j)
848  {
849  cval[idx] = o2n[CVAL[j]];
850  aval[idx] = AVAL[j];
851  ++idx;
852  }
853  }
854 
855  FREE_DH (o2n);
856  CHECK_V_ERROR;
857 END_FUNC_DH}
858 
859 
860 /*----------------------------------------------------------------------
861  * Print methods
862  *----------------------------------------------------------------------*/
863 
864 /* seq or mpi */
865 #undef __FUNC__
866 #define __FUNC__ "Mat_dhPrintGraph"
867 void
868 Mat_dhPrintGraph (Mat_dh A, SubdomainGraph_dh sg, FILE * fp)
869 {
870  START_FUNC_DH int pe, id = myid_dh;
871  int ierr;
872 
873  if (sg != NULL)
874  {
875  id = sg->o2n_sub[id];
876  }
877 
878  for (pe = 0; pe < np_dh; ++pe)
879  {
880  ierr = MPI_Barrier (comm_dh);
881  CHECK_MPI_V_ERROR (ierr);
882  if (id == pe)
883  {
884  if (sg == NULL)
885  {
886  mat_dh_print_graph_private (A->m, A->beg_row, A->rp, A->cval,
887  A->aval, NULL, NULL, NULL, fp);
888  CHECK_V_ERROR;
889  }
890  else
891  {
892  int beg_row = sg->beg_rowP[myid_dh];
893  mat_dh_print_graph_private (A->m, beg_row, A->rp, A->cval,
894  A->aval, sg->n2o_row, sg->o2n_col,
895  sg->o2n_ext, fp);
896  CHECK_V_ERROR;
897  }
898  }
899  }
900 END_FUNC_DH}
901 
902 
903 #undef __FUNC__
904 #define __FUNC__ "Mat_dhPrintRows"
905 void
906 Mat_dhPrintRows (Mat_dh A, SubdomainGraph_dh sg, FILE * fp)
907 {
908  START_FUNC_DH bool noValues;
909  int m = A->m, *rp = A->rp, *cval = A->cval;
910  double *aval = A->aval;
911 
912  noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
913  if (noValues)
914  aval = NULL;
915 
916  /*----------------------------------------------------------------
917  * case 1: print local portion of unpermuted matrix
918  *----------------------------------------------------------------*/
919  if (sg == NULL)
920  {
921  int i, j;
922  int beg_row = A->beg_row;
923 
924  fprintf (fp,
925  "\n----- A, unpermuted ------------------------------------\n");
926  for (i = 0; i < m; ++i)
927  {
928  fprintf (fp, "%i :: ", 1 + i + beg_row);
929  for (j = rp[i]; j < rp[i + 1]; ++j)
930  {
931  if (noValues)
932  {
933  fprintf (fp, "%i ", 1 + cval[j]);
934  }
935  else
936  {
937  fprintf (fp, "%i,%g ; ", 1 + cval[j], aval[j]);
938  }
939  }
940  fprintf (fp, "\n");
941  }
942  }
943 
944  /*----------------------------------------------------------------
945  * case 2: single mpi task, with multiple subdomains
946  *----------------------------------------------------------------*/
947  else if (np_dh == 1)
948  {
949  int i, k, idx = 1;
950  int oldRow;
951 
952  for (i = 0; i < sg->blocks; ++i)
953  {
954  int oldBlock = sg->n2o_sub[i];
955 
956  /* here, 'beg_row' and 'end_row' refer to rows in the
957  original ordering of A.
958  */
959  int beg_row = sg->beg_row[oldBlock];
960  int end_row = beg_row + sg->row_count[oldBlock];
961 
962  fprintf (fp, "\n");
963  fprintf (fp,
964  "\n----- A, permuted, single mpi task ------------------\n");
965  fprintf (fp, "---- new subdomain: %i; old subdomain: %i\n", i,
966  oldBlock);
967  fprintf (fp, " old beg_row: %i; new beg_row: %i\n",
968  sg->beg_row[oldBlock], sg->beg_rowP[oldBlock]);
969  fprintf (fp, " local rows in this block: %i\n",
970  sg->row_count[oldBlock]);
971  fprintf (fp, " bdry rows in this block: %i\n",
972  sg->bdry_count[oldBlock]);
973  fprintf (fp, " 1st bdry row= %i \n",
974  1 + end_row - sg->bdry_count[oldBlock]);
975 
976  for (oldRow = beg_row; oldRow < end_row; ++oldRow)
977  {
978  int len = 0, *cval;
979  double *aval;
980 
981  fprintf (fp, "%3i (old= %3i) :: ", idx, 1 + oldRow);
982  ++idx;
983  Mat_dhGetRow (A, oldRow, &len, &cval, &aval);
984  CHECK_V_ERROR;
985 
986  for (k = 0; k < len; ++k)
987  {
988  if (noValues)
989  {
990  fprintf (fp, "%i ", 1 + sg->o2n_col[cval[k]]);
991  }
992  else
993  {
994  fprintf (fp, "%i,%g ; ", 1 + sg->o2n_col[cval[k]],
995  aval[k]);
996  }
997  }
998 
999  fprintf (fp, "\n");
1000  Mat_dhRestoreRow (A, oldRow, &len, &cval, &aval);
1001  CHECK_V_ERROR;
1002  }
1003  }
1004  }
1005 
1006  /*----------------------------------------------------------------
1007  * case 3: multiple mpi tasks, one subdomain per task
1008  *----------------------------------------------------------------*/
1009  else
1010  {
1011  Hash_i_dh hash = sg->o2n_ext;
1012  int *o2n_col = sg->o2n_col, *n2o_row = sg->n2o_row;
1013  int beg_row = sg->beg_row[myid_dh];
1014  int beg_rowP = sg->beg_rowP[myid_dh];
1015  int i, j;
1016 
1017  for (i = 0; i < m; ++i)
1018  {
1019  int row = n2o_row[i];
1020  fprintf (fp, "%3i (old= %3i) :: ", 1 + i + beg_rowP,
1021  1 + row + beg_row);
1022  for (j = rp[row]; j < rp[row + 1]; ++j)
1023  {
1024  int col = cval[j];
1025 
1026  /* find permuted (old-to-new) value for the column */
1027  /* case i: column is locally owned */
1028  if (col >= beg_row && col < beg_row + m)
1029  {
1030  col = o2n_col[col - beg_row] + beg_rowP;
1031  }
1032 
1033  /* case ii: column is external */
1034  else
1035  {
1036  int tmp = col;
1037  tmp = Hash_i_dhLookup (hash, col);
1038  CHECK_V_ERROR;
1039  if (tmp == -1)
1040  {
1041  sprintf (msgBuf_dh,
1042  "nonlocal column= %i not in hash table",
1043  1 + col);
1044  SET_V_ERROR (msgBuf_dh);
1045  }
1046  else
1047  {
1048  col = tmp;
1049  }
1050  }
1051 
1052  if (noValues)
1053  {
1054  fprintf (fp, "%i ", 1 + col);
1055  }
1056  else
1057  {
1058  fprintf (fp, "%i,%g ; ", 1 + col, aval[j]);
1059  }
1060  }
1061  fprintf (fp, "\n");
1062  }
1063  }
1064 END_FUNC_DH}
1065 
1066 
1067 
1068 #undef __FUNC__
1069 #define __FUNC__ "Mat_dhPrintTriples"
1070 void
1071 Mat_dhPrintTriples (Mat_dh A, SubdomainGraph_dh sg, char *filename)
1072 {
1073  START_FUNC_DH int m = A->m, *rp = A->rp, *cval = A->cval;
1074  double *aval = A->aval;
1075  bool noValues;
1076  bool matlab;
1077  FILE *fp;
1078 
1079  noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
1080  if (noValues)
1081  aval = NULL;
1082  matlab = (Parser_dhHasSwitch (parser_dh, "-matlab"));
1083 
1084  /*----------------------------------------------------------------
1085  * case 1: unpermuted matrix, single or multiple mpi tasks
1086  *----------------------------------------------------------------*/
1087  if (sg == NULL)
1088  {
1089  int i, j, pe;
1090  int beg_row = A->beg_row;
1091  double val;
1092 
1093  for (pe = 0; pe < np_dh; ++pe)
1094  {
1095  MPI_Barrier (comm_dh);
1096  if (pe == myid_dh)
1097  {
1098  if (pe == 0)
1099  {
1100  fp = openFile_dh (filename, "w");
1101  CHECK_V_ERROR;
1102  }
1103  else
1104  {
1105  fp = openFile_dh (filename, "a");
1106  CHECK_V_ERROR;
1107  }
1108 
1109  for (i = 0; i < m; ++i)
1110  {
1111  for (j = rp[i]; j < rp[i + 1]; ++j)
1112  {
1113  if (noValues)
1114  {
1115  fprintf (fp, "%i %i\n", 1 + i + beg_row,
1116  1 + cval[j]);
1117  }
1118  else
1119  {
1120  val = aval[j];
1121  if (val == 0.0 && matlab)
1122  val = _MATLAB_ZERO_;
1123  fprintf (fp, TRIPLES_FORMAT, 1 + i + beg_row,
1124  1 + cval[j], val);
1125  }
1126  }
1127  }
1128  closeFile_dh (fp);
1129  CHECK_V_ERROR;
1130  }
1131  }
1132  }
1133 
1134  /*----------------------------------------------------------------
1135  * case 2: single mpi task, with multiple subdomains
1136  *----------------------------------------------------------------*/
1137  else if (np_dh == 1)
1138  {
1139  int i, j, k, idx = 1;
1140 
1141  fp = openFile_dh (filename, "w");
1142  CHECK_V_ERROR;
1143 
1144  for (i = 0; i < sg->blocks; ++i)
1145  {
1146  int oldBlock = sg->n2o_sub[i];
1147  int beg_row = sg->beg_rowP[oldBlock];
1148  int end_row = beg_row + sg->row_count[oldBlock];
1149 
1150  for (j = beg_row; j < end_row; ++j)
1151  {
1152  int len = 0, *cval;
1153  double *aval;
1154  int oldRow = sg->n2o_row[j];
1155 
1156  Mat_dhGetRow (A, oldRow, &len, &cval, &aval);
1157  CHECK_V_ERROR;
1158 
1159  if (noValues)
1160  {
1161  for (k = 0; k < len; ++k)
1162  {
1163  fprintf (fp, "%i %i\n", idx, 1 + sg->o2n_col[cval[k]]);
1164  }
1165  ++idx;
1166  }
1167 
1168  else
1169  {
1170  for (k = 0; k < len; ++k)
1171  {
1172  double val = aval[k];
1173  if (val == 0.0 && matlab)
1174  val = _MATLAB_ZERO_;
1175  fprintf (fp, TRIPLES_FORMAT, idx,
1176  1 + sg->o2n_col[cval[k]], val);
1177  }
1178  ++idx;
1179  }
1180  Mat_dhRestoreRow (A, oldRow, &len, &cval, &aval);
1181  CHECK_V_ERROR;
1182  }
1183  }
1184  }
1185 
1186  /*----------------------------------------------------------------
1187  * case 3: multiple mpi tasks, one subdomain per task
1188  *----------------------------------------------------------------*/
1189  else
1190  {
1191  Hash_i_dh hash = sg->o2n_ext;
1192  int *o2n_col = sg->o2n_col, *n2o_row = sg->n2o_row;
1193  int beg_row = sg->beg_row[myid_dh];
1194  int beg_rowP = sg->beg_rowP[myid_dh];
1195  int i, j, pe;
1196  int id = sg->o2n_sub[myid_dh];
1197 
1198  for (pe = 0; pe < np_dh; ++pe)
1199  {
1200  MPI_Barrier (comm_dh);
1201  if (id == pe)
1202  {
1203  if (pe == 0)
1204  {
1205  fp = openFile_dh (filename, "w");
1206  CHECK_V_ERROR;
1207  }
1208  else
1209  {
1210  fp = openFile_dh (filename, "a");
1211  CHECK_V_ERROR;
1212  }
1213 
1214  for (i = 0; i < m; ++i)
1215  {
1216  int row = n2o_row[i];
1217  for (j = rp[row]; j < rp[row + 1]; ++j)
1218  {
1219  int col = cval[j];
1220  double val = 0.0;
1221 
1222  if (aval != NULL)
1223  val = aval[j];
1224  if (val == 0.0 && matlab)
1225  val = _MATLAB_ZERO_;
1226 
1227  /* find permuted (old-to-new) value for the column */
1228  /* case i: column is locally owned */
1229  if (col >= beg_row && col < beg_row + m)
1230  {
1231  col = o2n_col[col - beg_row] + beg_rowP;
1232  }
1233 
1234  /* case ii: column is external */
1235  else
1236  {
1237  int tmp = col;
1238  tmp = Hash_i_dhLookup (hash, col);
1239  CHECK_V_ERROR;
1240  if (tmp == -1)
1241  {
1242  sprintf (msgBuf_dh,
1243  "nonlocal column= %i not in hash table",
1244  1 + col);
1245  SET_V_ERROR (msgBuf_dh);
1246  }
1247  else
1248  {
1249  col = tmp;
1250  }
1251  }
1252 
1253  if (noValues)
1254  {
1255  fprintf (fp, "%i %i\n", 1 + i + beg_rowP, 1 + col);
1256  }
1257  else
1258  {
1259  fprintf (fp, TRIPLES_FORMAT, 1 + i + beg_rowP,
1260  1 + col, val);
1261  }
1262  }
1263  }
1264  closeFile_dh (fp);
1265  CHECK_V_ERROR;
1266  }
1267  }
1268  }
1269 END_FUNC_DH}
1270 
1271 
1272 /* seq only */
1273 #undef __FUNC__
1274 #define __FUNC__ "Mat_dhPrintCSR"
1275 void
1276 Mat_dhPrintCSR (Mat_dh A, SubdomainGraph_dh sg, char *filename)
1277 {
1278  START_FUNC_DH FILE * fp;
1279 
1280  if (np_dh > 1)
1281  {
1282  SET_V_ERROR ("only implemented for a single mpi task");
1283  }
1284  if (sg != NULL)
1285  {
1286  SET_V_ERROR
1287  ("not implemented for reordered matrix (SubdomainGraph_dh should be NULL)");
1288  }
1289 
1290  fp = openFile_dh (filename, "w");
1291  CHECK_V_ERROR;
1292 
1293  if (sg == NULL)
1294  {
1295  mat_dh_print_csr_private (A->m, A->rp, A->cval, A->aval, fp);
1296  CHECK_V_ERROR;
1297  }
1298  else
1299  {
1300  mat_dh_print_csr_private (A->m, A->rp, A->cval, A->aval, fp);
1301  CHECK_V_ERROR;
1302  }
1303  closeFile_dh (fp);
1304  CHECK_V_ERROR;
1305 END_FUNC_DH}
1306 
1307 /* seq */
1308 /* no reordering */
1309 #undef __FUNC__
1310 #define __FUNC__ "Mat_dhPrintBIN"
1311 void
1312 Mat_dhPrintBIN (Mat_dh A, SubdomainGraph_dh sg, char *filename)
1313 {
1314  START_FUNC_DH if (np_dh > 1)
1315  {
1316  SET_V_ERROR ("only implemented for a single MPI task");
1317  }
1318 /* if (n2o != NULL || o2n != NULL || hash != NULL) {
1319 */
1320  if (sg != NULL)
1321  {
1322  SET_V_ERROR ("not implemented for reordering; ensure sg=NULL");
1323  }
1324 
1325  io_dh_print_ebin_mat_private (A->m, A->beg_row, A->rp, A->cval, A->aval,
1326  NULL, NULL, NULL, filename);
1327  CHECK_V_ERROR;
1328 END_FUNC_DH}
1329 
1330 
1331 /*----------------------------------------------------------------------
1332  * Read methods
1333  *----------------------------------------------------------------------*/
1334 /* seq only */
1335 #undef __FUNC__
1336 #define __FUNC__ "Mat_dhReadCSR"
1337 void
1338 Mat_dhReadCSR (Mat_dh * mat, char *filename)
1339 {
1340  START_FUNC_DH Mat_dh A;
1341  FILE *fp;
1342 
1343  if (np_dh > 1)
1344  {
1345  SET_V_ERROR ("only implemented for a single MPI task");
1346  }
1347 
1348  fp = openFile_dh (filename, "r");
1349  CHECK_V_ERROR;
1350 
1351  Mat_dhCreate (&A);
1352  CHECK_V_ERROR;
1353  mat_dh_read_csr_private (&A->m, &A->rp, &A->cval, &A->aval, fp);
1354  CHECK_V_ERROR;
1355  A->n = A->m;
1356  *mat = A;
1357 
1358  closeFile_dh (fp);
1359  CHECK_V_ERROR;
1360 END_FUNC_DH}
1361 
1362 /* seq only */
1363 #undef __FUNC__
1364 #define __FUNC__ "Mat_dhReadTriples"
1365 void
1366 Mat_dhReadTriples (Mat_dh * mat, int ignore, char *filename)
1367 {
1368  START_FUNC_DH FILE * fp = NULL;
1369  Mat_dh A = NULL;
1370 
1371  if (np_dh > 1)
1372  {
1373  SET_V_ERROR ("only implemented for a single MPI task");
1374  }
1375 
1376  fp = openFile_dh (filename, "r");
1377  CHECK_V_ERROR;
1378 
1379  Mat_dhCreate (&A);
1380  CHECK_V_ERROR;
1381  mat_dh_read_triples_private (ignore, &A->m, &A->rp, &A->cval, &A->aval, fp);
1382  CHECK_V_ERROR;
1383  A->n = A->m;
1384  *mat = A;
1385 
1386  closeFile_dh (fp);
1387  CHECK_V_ERROR;
1388 END_FUNC_DH}
1389 
1390 /* here we pass the private function a filename, instead of an open file,
1391  the reason being that Euclid's binary format is more complicated,
1392  i.e, the other "Read" methods are only for a single mpi task.
1393 */
1394 #undef __FUNC__
1395 #define __FUNC__ "Mat_dhReadBIN"
1396 void
1397 Mat_dhReadBIN (Mat_dh * mat, char *filename)
1398 {
1399  START_FUNC_DH Mat_dh A;
1400 
1401  if (np_dh > 1)
1402  {
1403  SET_V_ERROR ("only implemented for a single MPI task");
1404  }
1405 
1406  Mat_dhCreate (&A);
1407  CHECK_V_ERROR;
1408  io_dh_read_ebin_mat_private (&A->m, &A->rp, &A->cval, &A->aval, filename);
1409  CHECK_V_ERROR;
1410  A->n = A->m;
1411  *mat = A;
1412 END_FUNC_DH}
1413 
1414 #undef __FUNC__
1415 #define __FUNC__ "Mat_dhTranspose"
1416 void
1417 Mat_dhTranspose (Mat_dh A, Mat_dh * Bout)
1418 {
1419  START_FUNC_DH Mat_dh B;
1420 
1421  if (np_dh > 1)
1422  {
1423  SET_V_ERROR ("only for sequential");
1424  }
1425 
1426  Mat_dhCreate (&B);
1427  CHECK_V_ERROR;
1428  *Bout = B;
1429  B->m = B->n = A->m;
1430  mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
1431  A->aval, &B->aval);
1432  CHECK_V_ERROR;
1433 END_FUNC_DH}
1434 
1435 #undef __FUNC__
1436 #define __FUNC__ "Mat_dhMakeStructurallySymmetric"
1437 void
1438 Mat_dhMakeStructurallySymmetric (Mat_dh A)
1439 {
1440  START_FUNC_DH if (np_dh > 1)
1441  {
1442  SET_V_ERROR ("only for sequential");
1443  }
1444  make_symmetric_private (A->m, &A->rp, &A->cval, &A->aval);
1445  CHECK_V_ERROR;
1446 END_FUNC_DH}
1447 
1448 void insert_diags_private (Mat_dh A, int ct);
1449 
1450 /* inserts diagonal if not explicitly present;
1451  sets diagonal value in row i to sum of absolute
1452  values of all elts in row i.
1453 */
1454 #undef __FUNC__
1455 #define __FUNC__ "Mat_dhFixDiags"
1456 void
1457 Mat_dhFixDiags (Mat_dh A)
1458 {
1459  START_FUNC_DH int i, j;
1460  int *rp = A->rp, *cval = A->cval, m = A->m;
1461  bool ct = 0; /* number of missing diagonals */
1462  double *aval = A->aval;
1463 
1464  /* determine if any diagonals are missing */
1465  for (i = 0; i < m; ++i)
1466  {
1467  bool flag = true;
1468  for (j = rp[i]; j < rp[i + 1]; ++j)
1469  {
1470  int col = cval[j];
1471  if (col == i)
1472  {
1473  flag = false;
1474  break;
1475  }
1476  }
1477  if (flag)
1478  ++ct;
1479  }
1480 
1481  /* insert any missing diagonal elements */
1482  if (ct)
1483  {
1484  printf
1485  ("\nMat_dhFixDiags:: %i diags not explicitly present; inserting!\n",
1486  ct);
1487  insert_diags_private (A, ct);
1488  CHECK_V_ERROR;
1489  rp = A->rp;
1490  cval = A->cval;
1491  aval = A->aval;
1492  }
1493 
1494  /* set the value of all diagonal elements */
1495  for (i = 0; i < m; ++i)
1496  {
1497  double sum = 0.0;
1498  for (j = rp[i]; j < rp[i + 1]; ++j)
1499  {
1500  sum += fabs (aval[j]);
1501  }
1502  for (j = rp[i]; j < rp[i + 1]; ++j)
1503  {
1504  if (cval[j] == i)
1505  {
1506  aval[j] = sum;
1507  }
1508  }
1509  }
1510 END_FUNC_DH}
1511 
1512 
1513 #undef __FUNC__
1514 #define __FUNC__ "insert_diags_private"
1515 void
1516 insert_diags_private (Mat_dh A, int ct)
1517 {
1518  START_FUNC_DH int *RP = A->rp, *CVAL = A->cval;
1519  int *rp, *cval, m = A->m;
1520  double *aval, *AVAL = A->aval;
1521  int nz = RP[m] + ct;
1522  int i, j, idx = 0;
1523 
1524  rp = A->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1525  CHECK_V_ERROR;
1526  cval = A->cval = (int *) MALLOC_DH (nz * sizeof (int));
1527  CHECK_V_ERROR;
1528  aval = A->aval = (double *) MALLOC_DH (nz * sizeof (double));
1529  CHECK_V_ERROR;
1530  rp[0] = 0;
1531 
1532  for (i = 0; i < m; ++i)
1533  {
1534  bool flag = true;
1535  for (j = RP[i]; j < RP[i + 1]; ++j)
1536  {
1537  cval[idx] = CVAL[j];
1538  aval[idx] = AVAL[j];
1539  ++idx;
1540  if (CVAL[j] == i)
1541  flag = false;
1542  }
1543 
1544  if (flag)
1545  {
1546  cval[idx] = i;
1547  aval[idx] = 0.0;
1548  ++idx;
1549  }
1550  rp[i + 1] = idx;
1551  }
1552 
1553  FREE_DH (RP);
1554  CHECK_V_ERROR;
1555  FREE_DH (CVAL);
1556  CHECK_V_ERROR;
1557  FREE_DH (AVAL);
1558  CHECK_V_ERROR;
1559 
1560 END_FUNC_DH}
1561 
1562 #undef __FUNC__
1563 #define __FUNC__ "Mat_dhPrintDiags"
1564 void
1565 Mat_dhPrintDiags (Mat_dh A, FILE * fp)
1566 {
1567  START_FUNC_DH int i, j, m = A->m;
1568  int *rp = A->rp, *cval = A->cval;
1569  double *aval = A->aval;
1570 
1571  fprintf (fp,
1572  "=================== diagonal elements ====================\n");
1573  for (i = 0; i < m; ++i)
1574  {
1575  bool flag = true;
1576  for (j = rp[i]; j < rp[i + 1]; ++j)
1577  {
1578  if (cval[j] == i)
1579  {
1580  fprintf (fp, "%i %g\n", i + 1, aval[j]);
1581  flag = false;
1582  break;
1583  }
1584  }
1585  if (flag)
1586  {
1587  fprintf (fp, "%i ---------- missing\n", i + 1);
1588  }
1589  }
1590 END_FUNC_DH}
1591 
1592 
1593 #undef __FUNC__
1594 #define __FUNC__ "Mat_dhGetRow"
1595 void
1596 Mat_dhGetRow (Mat_dh B, int globalRow, int *len, int **ind, double **val)
1597 {
1598  START_FUNC_DH int row = globalRow - B->beg_row;
1599  if (row > B->m)
1600  {
1601  sprintf (msgBuf_dh,
1602  "requested globalRow= %i, which is local row= %i, but only have %i rows!",
1603  globalRow, row, B->m);
1604  SET_V_ERROR (msgBuf_dh);
1605  }
1606  *len = B->rp[row + 1] - B->rp[row];
1607  if (ind != NULL)
1608  *ind = B->cval + B->rp[row];
1609  if (val != NULL)
1610  *val = B->aval + B->rp[row];
1611 END_FUNC_DH}
1612 
1613 #undef __FUNC__
1614 #define __FUNC__ "Mat_dhRestoreRow"
1615 void
1616 Mat_dhRestoreRow (Mat_dh B, int row, int *len, int **ind, double **val)
1617 {
1618 START_FUNC_DH END_FUNC_DH}
1619 
1620 #undef __FUNC__
1621 #define __FUNC__ "Mat_dhRowPermute"
1622 void
1623 Mat_dhRowPermute (Mat_dh mat)
1624 {
1625  START_FUNC_DH if (ignoreMe)
1626  SET_V_ERROR ("turned off; compilation problem on blue");
1627 
1628 #if 0
1629  int i, j, m = mat->m, nz = mat->rp[m];
1630  int *o2n, *cval;
1631  int algo = 1;
1632  double *r1, *c1;
1633  bool debug = mat->debug;
1634  bool isNatural;
1635  Mat_dh B;
1636 
1637 #if 0
1638 * = 1:Compute a row permutation of the matrix so that the
1639  * permuted matrix has as many entries on its diagonal as
1640  * possible.The values on the diagonal are of arbitrary size.
1641  * HSL subroutine MC21A / AD is used for this
1642  . * = 2: Compute a row permutation of the matrix so that the smallest * value on the diagonal of the permuted matrix is maximized. * = 3:Compute a row permutation of the matrix so that the smallest
1643  * value on the diagonal of the permuted matrix is maximized.
1644  * The algorithm differs from the one used for JOB
1645  = 2 and may * have quite a different performance. * = 4: Compute a row permutation of the matrix so that the sum * of the diagonal entries of the permuted matrix is maximized. * = 5:Compute a row permutation of the matrix so that the product
1646  * of the diagonal entries of the permuted matrix is maximized
1647  * and vectors to scale the matrix so that the nonzero diagonal
1648  * entries of the permuted matrix are one in absolute value and
1649  * all the off - diagonal entries are less than or equal to one in
1650  * absolute value.
1651 #endif
1652  Parser_dhReadInt (parser_dh, "-rowPermute", &algo);
1653  CHECK_V_ERROR;
1654  if (algo < 1)
1655  algo = 1;
1656  if (algo > 5)
1657  algo = 1;
1658  sprintf (msgBuf_dh, "calling row permutation with algo= %i", algo);
1659  SET_INFO (msgBuf_dh);
1660 
1661  r1 = (double *) MALLOC_DH (m * sizeof (double));
1662  CHECK_V_ERROR;
1663  c1 = (double *) MALLOC_DH (m * sizeof (double));
1664  CHECK_V_ERROR;
1665  if (mat->row_perm == NULL)
1666  {
1667  mat->row_perm = o2n = (int *) MALLOC_DH (m * sizeof (int));
1668  CHECK_V_ERROR;
1669  }
1670  else
1671  {
1672  o2n = mat->row_perm;
1673  }
1674 
1675  Mat_dhTranspose (mat, &B);
1676  CHECK_V_ERROR;
1677 
1678  /* get row permutation and scaling vectors */
1679  dldperm (algo, m, nz, B->rp, B->cval, B->aval, o2n, r1, c1);
1680 
1681  /* permute column indices, then turn the matrix rightside up */
1682  cval = B->cval;
1683  for (i = 0; i < nz; ++i)
1684  cval[i] = o2n[cval[i]];
1685 
1686  /* debug block */
1687  if (debug && logFile != NULL)
1688  {
1689  fprintf (logFile, "\n-------- row permutation vector --------\n");
1690  for (i = 0; i < m; ++i)
1691  fprintf (logFile, "%i ", 1 + o2n[i]);
1692  fprintf (logFile, "\n");
1693 
1694  if (myid_dh == 0)
1695  {
1696  printf ("\n-------- row permutation vector --------\n");
1697  for (i = 0; i < m; ++i)
1698  printf ("%i ", 1 + o2n[i]);
1699  printf ("\n");
1700  }
1701  }
1702 
1703  /* check to see if permutation is non-natural */
1704  isNatural = true;
1705  for (i = 0; i < m; ++i)
1706  {
1707  if (o2n[i] != i)
1708  {
1709  isNatural = false;
1710  break;
1711  }
1712  }
1713 
1714  if (isNatural)
1715  {
1716  printf ("@@@ [%i] Mat_dhRowPermute :: got natural ordering!\n",
1717  myid_dh);
1718  }
1719  else
1720  {
1721  int *rp = B->rp, *cval = B->cval;
1722  double *aval = B->aval;
1723 
1724  if (algo == 5)
1725  {
1726  printf
1727  ("@@@ [%i] Mat_dhRowPermute :: scaling matrix rows and columns!\n",
1728  myid_dh);
1729 
1730  /* scale matrix */
1731  for (i = 0; i < m; i++)
1732  {
1733  r1[i] = exp (r1[i]);
1734  c1[i] = exp (c1[i]);
1735  }
1736  for (i = 0; i < m; i++)
1737  for (j = rp[i]; j < rp[i + 1]; j++)
1738  aval[j] *= r1[cval[j]] * c1[i];
1739  }
1740 
1741  mat_dh_transpose_reuse_private (B->m, B->rp, B->cval, B->aval,
1742  mat->rp, mat->cval, mat->aval);
1743  CHECK_V_ERROR;
1744  }
1745 
1746 
1747  Mat_dhDestroy (B);
1748  CHECK_V_ERROR;
1749  FREE_DH (r1);
1750  CHECK_V_ERROR;
1751  FREE_DH (c1);
1752  CHECK_V_ERROR;
1753 
1754 #endif
1755 END_FUNC_DH}
1756 
1757 
1758 /*==============================================================================*/
1759 #undef __FUNC__
1760 #define __FUNC__ "Mat_dhPartition"
1761 void
1762 build_adj_lists_private (Mat_dh mat, int **rpOUT, int **cvalOUT)
1763 {
1764  START_FUNC_DH int m = mat->m;
1765  int *RP = mat->rp, *CVAL = mat->cval;
1766  int nz = RP[m];
1767  int i, j, *rp, *cval, idx = 0;
1768 
1769  rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1770  CHECK_V_ERROR;
1771  cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int));
1772  CHECK_V_ERROR;
1773  rp[0] = 0;
1774 
1775  /* assume symmetry for now! */
1776  for (i = 0; i < m; ++i)
1777  {
1778  for (j = RP[i]; j < RP[i + 1]; ++j)
1779  {
1780  int col = CVAL[j];
1781  if (col != i)
1782  {
1783  cval[idx++] = col;
1784  }
1785  }
1786  rp[i + 1] = idx;
1787  }
1788 END_FUNC_DH}
1789 
1790 
1791 #undef __FUNC__
1792 #define __FUNC__ "Mat_dhPartition"
1793 void
1794 Mat_dhPartition (Mat_dh mat, int blocks,
1795  int **beg_rowOUT, int **row_countOUT, int **n2oOUT,
1796  int **o2nOUT)
1797 {
1798  START_FUNC_DH
1799 #ifndef HAVE_METIS_DH
1800  if (ignoreMe)
1801  SET_V_ERROR ("not compiled for metis!");
1802 
1803 #else
1804  int *beg_row, *row_count, *n2o, *o2n, bk, new, *part;
1805  int m = mat->m;
1806  int i, cutEdgeCount;
1807  double zero = 0.0;
1808  int metisOpts[5] = { 0, 0, 0, 0, 0 };
1809  int *rp, *cval;
1810 
1811  /* allocate storage for returned arrays */
1812  beg_row = *beg_rowOUT = (int *) MALLOC_DH (blocks * sizeof (int));
1813  CHECK_V_ERROR;
1814  row_count = *row_countOUT = (int *) MALLOC_DH (blocks * sizeof (int));
1815  CHECK_V_ERROR;
1816  *n2oOUT = n2o = (int *) MALLOC_DH (m * sizeof (int));
1817  CHECK_V_ERROR;
1818  *o2nOUT = o2n = (int *) MALLOC_DH (m * sizeof (int));
1819  CHECK_V_ERROR;
1820 
1821 #if 0
1822 == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == = Metis arguments:
1823 
1824  n - number of nodes rp[], cval[]NULL, NULL, 0 /*no edge or vertex weights */
1825  0 /*use zero-based numbering */
1826  blocksIN, options[5] = 0::0 / 1 use defauls;
1827  use uptions 1. .4
1828  1::
1829  edgecutOUT,
1830  part[]
1831  == == == == == == == == == == == == == == == == == == == == == == == == ==
1832  == == == == == =
1833 #endif
1834  /* form the graph representation that metis wants */
1835  build_adj_lists_private (mat, &rp, &cval);
1836  CHECK_V_ERROR;
1837  part = (int *) MALLOC_DH (m * sizeof (int));
1838  CHECK_V_ERROR;
1839 
1840  /* get parition vector from metis */
1841  METIS_PartGraphKway (&m, rp, cval, NULL, NULL,
1842  &zero, &zero, &blocks, metisOpts, &cutEdgeCount, part);
1843 
1844  FREE_DH (rp);
1845  CHECK_V_ERROR;
1846  FREE_DH (cval);
1847  CHECK_V_ERROR;
1848 
1849  if (mat->debug)
1850  {
1851  printf_dh ("\nmetis partitioning vector; blocks= %i\n", blocks);
1852  for (i = 0; i < m; ++i)
1853  printf_dh (" %i %i\n", i + 1, part[i]);
1854  }
1855 
1856  /* compute beg_row, row_count arrays from partition vector */
1857  for (i = 0; i < blocks; ++i)
1858  row_count[i] = 0;
1859  for (i = 0; i < m; ++i)
1860  {
1861  bk = part[i]; /* block to which row i belongs */
1862  row_count[bk] += 1;
1863  }
1864  beg_row[0] = 0;
1865  for (i = 1; i < blocks; ++i)
1866  beg_row[i] = beg_row[i - 1] + row_count[i - 1];
1867 
1868  if (mat->debug)
1869  {
1870  printf_dh ("\nrow_counts: ");
1871  for (i = 0; i < blocks; ++i)
1872  printf_dh (" %i", row_count[i]);
1873  printf_dh ("\nbeg_row: ");
1874  for (i = 0; i < blocks; ++i)
1875  printf_dh (" %i", beg_row[i] + 1);
1876  printf_dh ("\n");
1877  }
1878 
1879  /* compute permutation vector */
1880  {
1881  int *tmp = (int *) MALLOC_DH (blocks * sizeof (int));
1882  CHECK_V_ERROR;
1883  memcpy (tmp, beg_row, blocks * sizeof (int));
1884  for (i = 0; i < m; ++i)
1885  {
1886  bk = part[i]; /* block to which row i belongs */
1887  new = tmp[bk];
1888  tmp[bk] += 1;
1889  o2n[i] = new;
1890  n2o[new] = i;
1891  }
1892  FREE_DH (tmp);
1893  }
1894 
1895  FREE_DH (part);
1896  CHECK_V_ERROR;
1897 
1898 #endif
1899 
1900 END_FUNC_DH}
Definition: Mat_dh.h:68