IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
Factor_dh.c
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Factor_dh.h"
44 #include "Vec_dh.h"
45 #include "Mat_dh.h"
46 #include "SubdomainGraph_dh.h"
47 #include "TimeLog_dh.h"
48 #include "Mem_dh.h"
49 #include "Numbering_dh.h"
50 #include "Hash_i_dh.h"
51 #include "Parser_dh.h"
52 #include "mat_dh_private.h"
53 #include "getRow_dh.h"
54 #include "Euclid_dh.h"
55 #include "io_dh.h"
56 
57 /* suppress compiler complaints */
58 void
59 Factor_dh_junk ()
60 {
61 }
62 
63 static void adjust_bj_private (Factor_dh mat);
64 static void unadjust_bj_private (Factor_dh mat);
65 
66 
67 #undef __FUNC__
68 #define __FUNC__ "Factor_dhCreate"
69 void
70 Factor_dhCreate (Factor_dh * mat)
71 {
72  START_FUNC_DH struct _factor_dh *tmp;
73 
74  if (np_dh > MAX_MPI_TASKS)
75  {
76  SET_V_ERROR ("you must change MAX_MPI_TASKS and recompile!");
77  }
78 
79  tmp = (struct _factor_dh *) MALLOC_DH (sizeof (struct _factor_dh));
80  CHECK_V_ERROR;
81  *mat = tmp;
82 
83  tmp->m = 0;
84  tmp->n = 0;
85  tmp->id = myid_dh;
86  tmp->beg_row = 0;
87  tmp->first_bdry = 0;
88  tmp->bdry_count = 0;
89  tmp->blockJacobi = false;
90 
91  tmp->rp = NULL;
92  tmp->cval = NULL;
93  tmp->aval = NULL;
94  tmp->fill = NULL;
95  tmp->diag = NULL;
96  tmp->alloc = 0;
97 
98  tmp->work_y_lo = tmp->work_x_hi = NULL;
99  tmp->sendbufLo = tmp->sendbufHi = NULL;
100  tmp->sendindLo = tmp->sendindHi = NULL;
101  tmp->num_recvLo = tmp->num_recvHi = 0;
102  tmp->num_sendLo = tmp->num_sendHi = 0;
103  tmp->sendlenLo = tmp->sendlenHi = 0;
104 
105  tmp->solveIsSetup = false;
106  tmp->numbSolve = NULL;
107 
108  tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Factor");
109 
110 /* Factor_dhZeroTiming(tmp); CHECK_V_ERROR; */
111 END_FUNC_DH}
112 
113 #undef __FUNC__
114 #define __FUNC__ "Factor_dhDestroy"
115 void
116 Factor_dhDestroy (Factor_dh mat)
117 {
118  START_FUNC_DH if (mat->rp != NULL)
119  {
120  FREE_DH (mat->rp);
121  CHECK_V_ERROR;
122  }
123  if (mat->cval != NULL)
124  {
125  FREE_DH (mat->cval);
126  CHECK_V_ERROR;
127  }
128  if (mat->aval != NULL)
129  {
130  FREE_DH (mat->aval);
131  CHECK_V_ERROR;
132  }
133  if (mat->diag != NULL)
134  {
135  FREE_DH (mat->diag);
136  CHECK_V_ERROR;
137  }
138  if (mat->fill != NULL)
139  {
140  FREE_DH (mat->fill);
141  CHECK_V_ERROR;
142  }
143 
144  if (mat->work_y_lo != NULL)
145  {
146  FREE_DH (mat->work_y_lo);
147  CHECK_V_ERROR;
148  }
149  if (mat->work_x_hi != NULL)
150  {
151  FREE_DH (mat->work_x_hi);
152  CHECK_V_ERROR;
153  }
154  if (mat->sendbufLo != NULL)
155  {
156  FREE_DH (mat->sendbufLo);
157  CHECK_V_ERROR;
158  }
159  if (mat->sendbufHi != NULL)
160  {
161  FREE_DH (mat->sendbufHi);
162  CHECK_V_ERROR;
163  }
164  if (mat->sendindLo != NULL)
165  {
166  FREE_DH (mat->sendindLo);
167  CHECK_V_ERROR;
168  }
169  if (mat->sendindHi != NULL)
170  {
171  FREE_DH (mat->sendindHi);
172  CHECK_V_ERROR;
173  }
174 
175  if (mat->numbSolve != NULL)
176  {
177  Numbering_dhDestroy (mat->numbSolve);
178  CHECK_V_ERROR;
179  }
180  FREE_DH (mat);
181  CHECK_V_ERROR;
182 END_FUNC_DH}
183 
184 
185 #undef __FUNC__
186 #define __FUNC__ "create_fake_mat_private"
187 static void
188 create_fake_mat_private (Factor_dh mat, Mat_dh * matFakeIN)
189 {
190  START_FUNC_DH Mat_dh matFake;
191  Mat_dhCreate (matFakeIN);
192  CHECK_V_ERROR;
193  matFake = *matFakeIN;
194  matFake->m = mat->m;
195  matFake->n = mat->n;
196  matFake->rp = mat->rp;
197  matFake->cval = mat->cval;
198  matFake->aval = mat->aval;
199  matFake->beg_row = mat->beg_row;
200 END_FUNC_DH}
201 
202 #undef __FUNC__
203 #define __FUNC__ "destroy_fake_mat_private"
204 static void
205 destroy_fake_mat_private (Mat_dh matFake)
206 {
207  START_FUNC_DH matFake->rp = NULL;
208  matFake->cval = NULL;
209  matFake->aval = NULL;
210  Mat_dhDestroy (matFake);
211  CHECK_V_ERROR;
212 END_FUNC_DH}
213 
214 
215 
216 #undef __FUNC__
217 #define __FUNC__ "Factor_dhReadNz"
218 int
219 Factor_dhReadNz (Factor_dh mat)
220 {
221  START_FUNC_DH int ierr, retval = mat->rp[mat->m];
222  int nz = retval;
223  ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh);
224  CHECK_MPI_ERROR (ierr);
225 END_FUNC_VAL (retval)}
226 
227 
228 
229 #undef __FUNC__
230 #define __FUNC__ "Factor_dhPrintRows"
231 void
232 Factor_dhPrintRows (Factor_dh mat, FILE * fp)
233 {
234  START_FUNC_DH int beg_row = mat->beg_row;
235  int m = mat->m, i, j;
236  bool noValues;
237 
238  noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
239  if (mat->aval == NULL)
240  noValues = true;
241 
242  if (mat->blockJacobi)
243  {
244  adjust_bj_private (mat);
245  CHECK_V_ERROR;
246  }
247 
248  fprintf (fp,
249  "\n----------------------- Factor_dhPrintRows ------------------\n");
250  if (mat->blockJacobi)
251  {
252  fprintf (fp,
253  "@@@ Block Jacobi ILU; adjusted values from zero-based @@@\n");
254  }
255 
256  for (i = 0; i < m; ++i)
257  {
258  fprintf (fp, "%i :: ", 1 + i + beg_row);
259  for (j = mat->rp[i]; j < mat->rp[i + 1]; ++j)
260  {
261  if (noValues)
262  {
263  fprintf (fp, "%i ", 1 + mat->cval[j]);
264  }
265  else
266  {
267  fprintf (fp, "%i,%g ; ", 1 + mat->cval[j], mat->aval[j]);
268  }
269  }
270  fprintf (fp, "\n");
271  }
272 
273  if (mat->blockJacobi)
274  {
275  unadjust_bj_private (mat);
276  CHECK_V_ERROR;
277  }
278 END_FUNC_DH}
279 
280 
281 #undef __FUNC__
282 #define __FUNC__ "Factor_dhPrintDiags"
283 void
284 Factor_dhPrintDiags (Factor_dh mat, FILE * fp)
285 {
286  START_FUNC_DH int beg_row = mat->beg_row;
287  int m = mat->m, i, pe, *diag = mat->diag;
288  REAL_DH *aval = mat->aval;
289 
290 
291  fprintf_dh (fp,
292  "\n----------------------- Factor_dhPrintDiags ------------------\n");
293  fprintf_dh (fp, "(grep for 'ZERO')\n");
294 
295  for (pe = 0; pe < np_dh; ++pe)
296  {
297  MPI_Barrier (comm_dh);
298  if (mat->id == pe)
299  {
300  fprintf (fp, "----- subdomain: %i processor: %i\n", pe, myid_dh);
301  for (i = 0; i < m; ++i)
302  {
303  REAL_DH val = aval[diag[i]];
304  if (val)
305  {
306  fprintf (fp, "%i %g\n", i + 1 + beg_row, aval[diag[i]]);
307  }
308  else
309  {
310  fprintf (fp, "%i %g ZERO\n", i + 1 + beg_row,
311  aval[diag[i]]);
312  }
313  }
314  }
315  }
316 END_FUNC_DH}
317 
318 
319 #undef __FUNC__
320 #define __FUNC__ "Factor_dhPrintGraph"
321 void
322 Factor_dhPrintGraph (Factor_dh mat, char *filename)
323 {
324  START_FUNC_DH FILE * fp;
325  int i, j, m = mat->m, *work, *rp = mat->rp, *cval = mat->cval;
326 
327  if (np_dh > 1)
328  SET_V_ERROR ("only implemented for single mpi task");
329 
330  work = (int *) MALLOC_DH (m * sizeof (int));
331  CHECK_V_ERROR;
332 
333  fp = openFile_dh (filename, "w");
334  CHECK_V_ERROR;
335 
336  for (i = 0; i < m; ++i)
337  {
338  for (j = 0; j < m; ++j)
339  work[j] = 0;
340  for (j = rp[i]; j < rp[i]; ++j)
341  work[cval[j]] = 1;
342 
343  for (j = 0; j < m; ++j)
344  {
345  if (work[j])
346  {
347  fprintf (fp, " x ");
348  }
349  else
350  {
351  fprintf (fp, " ");
352  }
353  }
354  fprintf (fp, "\n");
355  }
356 
357  closeFile_dh (fp);
358  CHECK_V_ERROR;
359 
360  FREE_DH (work);
361 END_FUNC_DH}
362 
363 
364 #undef __FUNC__
365 #define __FUNC__ "Factor_dhPrintTriples"
366 void
367 Factor_dhPrintTriples (Factor_dh mat, char *filename)
368 {
369  START_FUNC_DH int pe, i, j;
370  int m = mat->m, *rp = mat->rp;
371  int beg_row = mat->beg_row;
372  REAL_DH *aval = mat->aval;
373  bool noValues;
374  FILE *fp;
375 
376  if (mat->blockJacobi)
377  {
378  adjust_bj_private (mat);
379  CHECK_V_ERROR;
380  }
381 
382  noValues = (Parser_dhHasSwitch (parser_dh, "-noValues"));
383  if (noValues)
384  aval = NULL;
385 
386  for (pe = 0; pe < np_dh; ++pe)
387  {
388  MPI_Barrier (comm_dh);
389  if (mat->id == pe)
390  {
391  if (pe == 0)
392  {
393  fp = openFile_dh (filename, "w");
394  CHECK_V_ERROR;
395  }
396  else
397  {
398  fp = openFile_dh (filename, "a");
399  CHECK_V_ERROR;
400  }
401 
402  for (i = 0; i < m; ++i)
403  {
404  for (j = rp[i]; j < rp[i + 1]; ++j)
405  {
406  if (noValues)
407  {
408  fprintf (fp, "%i %i\n", 1 + i + beg_row,
409  1 + mat->cval[j]);
410  }
411  else
412  {
413  fprintf (fp, TRIPLES_FORMAT,
414  1 + i + beg_row, 1 + mat->cval[j], aval[j]);
415  }
416  }
417  }
418  closeFile_dh (fp);
419  CHECK_V_ERROR;
420  }
421  }
422 
423  if (mat->blockJacobi)
424  {
425  unadjust_bj_private (mat);
426  CHECK_V_ERROR;
427  }
428 END_FUNC_DH}
429 
430 /*--------------------------------------------------------------------------------
431  * Functions to setup the matrix for triangular solves. These are similar to
432  * MatVecSetup(), except that there are two cases: subdomains ordered lower than
433  * ourselves, and subdomains ordered higher than ourselves. This SolveSetup
434  * is used for Parallel ILU (PILU). The following are adopted/modified from
435  * Edmond Chow's ParaSails
436  *--------------------------------------------------------------------------------*/
437 
438 /* adopted from Edmond Chow's ParaSails */
439 
440 /* 1. start receives of node data to be received from other processors;
441  2. send to other processors the list of nodes this processor needs
442  to receive from them.
443  Returns: the number of processors from whom nodes will be received.
444 */
445 #undef __FUNC__
446 #define __FUNC__ "setup_receives_private"
447 static int
448 setup_receives_private (Factor_dh mat, int *beg_rows, int *end_rows,
449  double *recvBuf, MPI_Request * req,
450  int *reqind, int reqlen, int *outlist, bool debug)
451 {
452  START_FUNC_DH int i, j, this_pe, num_recv = 0;
453  MPI_Request request;
454 
455  if (debug)
456  {
457  fprintf (logFile,
458  "\nFACT ========================================================\n");
459  fprintf (logFile, "FACT STARTING: setup_receives_private\n");
460  }
461 
462  for (i = 0; i < reqlen; i = j)
463  { /* j is set below */
464  /* determine the processor that owns the row with index reqind[i] */
465  this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]);
466  CHECK_ERROR (-1);
467 
468  /* Figure out other rows we need from this_pe */
469  for (j = i + 1; j < reqlen; j++)
470  {
471  int idx = reqind[j];
472  if (idx < beg_rows[this_pe] || idx >= end_rows[this_pe])
473  {
474  break;
475  }
476  }
477 
478  if (debug)
479  {
480  int k;
481  fprintf (logFile, "FACT need nodes from P_%i: ", this_pe);
482  for (k = i; k < j; ++k)
483  fprintf (logFile, "%i ", 1 + reqind[k]);
484  fprintf (logFile, "\n");
485  }
486 
487  /* Record the number of number of indices needed from this_pe */
488  outlist[this_pe] = j - i;
489 
490  /* Request rows in reqind[i..j-1] */
491  /* Note: the receiving processor, this_pe, doesn't yet know
492  about the incoming request, hence, can't set up a matching
493  receive; this matching receive will be started later,
494  in setup_sends_private.
495  */
496  MPI_Isend (reqind + i, j - i, MPI_INT, this_pe, 444, comm_dh, &request);
497  MPI_Request_free (&request);
498 
499  /* set up persistent comms for receiving the values from this_pe */
500  MPI_Recv_init (recvBuf + i, j - i, MPI_DOUBLE, this_pe, 555,
501  comm_dh, req + num_recv);
502  ++num_recv;
503  }
504 
505  END_FUNC_VAL (num_recv);
506 }
507 
508 /*
509  1. start receive to get list of nodes that this processor
510  needs to send to other processors
511  2. start persistent comms to send the data
512 */
513 #undef __FUNC__
514 #define __FUNC__ "setup_sends_private"
515 static void
516 setup_sends_private (Factor_dh mat, int *inlist,
517  int *o2n_subdomain, bool debug)
518 {
519  START_FUNC_DH int i, jLo, jHi, sendlenLo, sendlenHi, first = mat->beg_row;
520  MPI_Request *requests = mat->requests, *sendReq;
521  MPI_Status *statuses = mat->status;
522  bool isHigher;
523  int *rcvBuf;
524  double *sendBuf;
525  int myidNEW = o2n_subdomain[myid_dh];
526  int count;
527 
528  if (debug)
529  {
530  fprintf (logFile, "FACT \nSTARTING: setup_sends_private\n");
531  }
532 
533  /* Determine size of and allocate sendbuf and sendind */
534  sendlenLo = sendlenHi = 0;
535  for (i = 0; i < np_dh; i++)
536  {
537  if (inlist[i])
538  {
539  if (o2n_subdomain[i] < myidNEW)
540  {
541  sendlenLo += inlist[i];
542  }
543  else
544  {
545  sendlenHi += inlist[i];
546  }
547  }
548  }
549 
550  mat->sendlenLo = sendlenLo;
551  mat->sendlenHi = sendlenHi;
552  mat->sendbufLo = (double *) MALLOC_DH (sendlenLo * sizeof (double));
553  CHECK_V_ERROR;
554  mat->sendbufHi = (double *) MALLOC_DH (sendlenHi * sizeof (double));
555  CHECK_V_ERROR;
556  mat->sendindLo = (int *) MALLOC_DH (sendlenLo * sizeof (int));
557  CHECK_V_ERROR;
558  mat->sendindHi = (int *) MALLOC_DH (sendlenHi * sizeof (int));
559  CHECK_V_ERROR;
560 
561  count = 0; /* number of calls to MPI_Irecv() */
562  jLo = jHi = 0;
563  mat->num_sendLo = 0;
564  mat->num_sendHi = 0;
565  for (i = 0; i < np_dh; i++)
566  {
567  if (inlist[i])
568  {
569  isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
570 
571  /* Post receive for the actual indices */
572  if (isHigher)
573  {
574  rcvBuf = &mat->sendindHi[jHi];
575  sendBuf = &mat->sendbufHi[jHi];
576  sendReq = &mat->send_reqHi[mat->num_sendHi];
577  mat->num_sendHi++;
578  jHi += inlist[i];
579  }
580  else
581  {
582  rcvBuf = &mat->sendindLo[jLo];
583  sendBuf = &mat->sendbufLo[jLo];
584  sendReq = &mat->send_reqLo[mat->num_sendLo];
585  mat->num_sendLo++;
586  jLo += inlist[i];
587  }
588 
589  /* matching receive, for list of unknowns that will be sent,
590  during the triangular solves, from ourselves to P_i
591  */
592  MPI_Irecv (rcvBuf, inlist[i], MPI_INT, i, 444, comm_dh,
593  requests + count);
594  ++count;
595 
596  /* Set up the send */
597  MPI_Send_init (sendBuf, inlist[i], MPI_DOUBLE, i, 555, comm_dh,
598  sendReq);
599  }
600  }
601 
602  /* note: count = mat->num_sendLo = mat->num_sendHi */
603  MPI_Waitall (count, requests, statuses);
604 
605  if (debug)
606  {
607  int j;
608  jLo = jHi = 0;
609 
610  fprintf (logFile,
611  "\nFACT columns that I must send to other subdomains:\n");
612  for (i = 0; i < np_dh; i++)
613  {
614  if (inlist[i])
615  {
616  isHigher = (o2n_subdomain[i] < myidNEW) ? false : true;
617  if (isHigher)
618  {
619  rcvBuf = &mat->sendindHi[jHi];
620  jHi += inlist[i];
621  }
622  else
623  {
624  rcvBuf = &mat->sendindLo[jLo];
625  jLo += inlist[i];
626  }
627 
628  fprintf (logFile, "FACT send to P_%i: ", i);
629  for (j = 0; j < inlist[i]; ++j)
630  fprintf (logFile, "%i ", rcvBuf[j] + 1);
631  fprintf (logFile, "\n");
632  }
633  }
634  }
635 
636  /* convert global indices to local indices */
637  /* these are all indices on this processor */
638  for (i = 0; i < mat->sendlenLo; i++)
639  mat->sendindLo[i] -= first;
640  for (i = 0; i < mat->sendlenHi; i++)
641  mat->sendindHi[i] -= first;
642 END_FUNC_DH}
643 
644 
645 
646 #undef __FUNC__
647 #define __FUNC__ "Factor_dhSolveSetup"
648 void
649 Factor_dhSolveSetup (Factor_dh mat, SubdomainGraph_dh sg)
650 {
651  START_FUNC_DH int *outlist, *inlist;
652  int i, row, *rp = mat->rp, *cval = mat->cval;
653  Numbering_dh numb;
654  int m = mat->m;
655  /* int firstLocalRow = mat->beg_row; */
656  int *beg_rows = sg->beg_rowP, *row_count = sg->row_count, *end_rows;
657  Mat_dh matFake;
658  bool debug = false;
659  double *recvBuf;
660 
661  if (mat->debug && logFile != NULL)
662  debug = true;
663 
664  end_rows = (int *) MALLOC_DH (np_dh * sizeof (int));
665  CHECK_V_ERROR;
666  outlist = (int *) MALLOC_DH (np_dh * sizeof (int));
667  CHECK_V_ERROR;
668  inlist = (int *) MALLOC_DH (np_dh * sizeof (int));
669  CHECK_V_ERROR;
670  for (i = 0; i < np_dh; ++i)
671  {
672  inlist[i] = 0;
673  outlist[i] = 0;
674  end_rows[i] = beg_rows[i] + row_count[i];
675  }
676 
677  /* Create Numbering object */
678  create_fake_mat_private (mat, &matFake);
679  CHECK_V_ERROR;
680  Numbering_dhCreate (&(mat->numbSolve));
681  CHECK_V_ERROR;
682  numb = mat->numbSolve;
683  Numbering_dhSetup (numb, matFake);
684  CHECK_V_ERROR;
685  destroy_fake_mat_private (matFake);
686  CHECK_V_ERROR;
687 
688  if (debug)
689  {
690  fprintf (stderr, "Numbering_dhSetup completed\n");
691  }
692 
693  /* Allocate recvbuf; recvbuf has numlocal entries saved for local part of x */
694  i = m + numb->num_ext;
695  mat->work_y_lo = (double *) MALLOC_DH (i * sizeof (double));
696  CHECK_V_ERROR;
697  mat->work_x_hi = (double *) MALLOC_DH (i * sizeof (double));
698  CHECK_V_ERROR;
699  if (debug)
700  {
701  fprintf (logFile, "FACT num_extLo= %i num_extHi= %i\n",
702  numb->num_extLo, numb->num_extHi);
703  }
704 
705  mat->num_recvLo = 0;
706  mat->num_recvHi = 0;
707  if (numb->num_extLo)
708  {
709  recvBuf = mat->work_y_lo + m;
710  mat->num_recvLo = setup_receives_private (mat, beg_rows, end_rows,
711  recvBuf, mat->recv_reqLo,
712  numb->idx_extLo,
713  numb->num_extLo, outlist,
714  debug);
715  CHECK_V_ERROR;
716 
717  }
718 
719  if (numb->num_extHi)
720  {
721  recvBuf = mat->work_x_hi + m + numb->num_extLo;
722  mat->num_recvHi = setup_receives_private (mat, beg_rows, end_rows,
723  recvBuf, mat->recv_reqHi,
724  numb->idx_extHi,
725  numb->num_extHi, outlist,
726  debug);
727  CHECK_V_ERROR;
728  }
729 
730  MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh);
731  /* At this point, inlist[j] contains the number of indices
732  that this processor must send to P_j. Processors next need
733  to exchange the actual lists of required indices; this is done
734  in setup_sends_private()
735  */
736 
737  setup_sends_private (mat, inlist, sg->o2n_sub, debug);
738  CHECK_V_ERROR;
739 
740  /* Convert column indices in each row to local indices */
741  for (row = 0; row < m; row++)
742  {
743  int len = rp[row + 1] - rp[row];
744  int *ind = cval + rp[row];
745  Numbering_dhGlobalToLocal (numb, len, ind, ind);
746  CHECK_V_ERROR;
747  }
748 
749  FREE_DH (outlist);
750  CHECK_V_ERROR;
751  FREE_DH (inlist);
752  CHECK_V_ERROR;
753  FREE_DH (end_rows);
754  CHECK_V_ERROR;
755 
756  if (debug)
757  {
758  int ii, jj;
759 
760  fprintf (logFile,
761  "\n--------- row/col structure, after global to local renumbering\n");
762  for (ii = 0; ii < mat->m; ++ii)
763  {
764  fprintf (logFile, "local row %i :: ", ii + 1);
765  for (jj = mat->rp[ii]; jj < mat->rp[ii + 1]; ++jj)
766  {
767  fprintf (logFile, "%i ", 1 + mat->cval[jj]);
768  }
769  fprintf (logFile, "\n");
770  }
771  fprintf (logFile, "\n");
772  fflush (logFile);
773  }
774 END_FUNC_DH}
775 
776 /* solve for MPI implementation of PILU. This function is
777  so similar to MatVec, that I put it here, instead of with
778  the other solves located in Euclid_apply.c.
779 */
780 static void forward_solve_private (int m, int from, int to,
781  int *rp, int *cval, int *diag,
782  double *aval, double *rhs, double *work_y,
783  bool debug);
784 
785 static void backward_solve_private (int m, int from, int to,
786  int *rp, int *cval, int *diag,
787  double *aval, double *work_y,
788  double *work_x, bool debug);
789 
790 static int beg_rowG;
791 
792 
793 #undef __FUNC__
794 #define __FUNC__ "Factor_dhSolve"
795 void
796 Factor_dhSolve (double *rhs, double *lhs, Euclid_dh ctx)
797 {
798  START_FUNC_DH Factor_dh mat = ctx->F;
799  int from, to;
800  int ierr, i, m = mat->m, first_bdry = mat->first_bdry;
801  int offsetLo = mat->numbSolve->num_extLo;
802  int offsetHi = mat->numbSolve->num_extHi;
803  int *rp = mat->rp, *cval = mat->cval, *diag = mat->diag;
804  double *aval = mat->aval;
805  int *sendindLo = mat->sendindLo, *sendindHi = mat->sendindHi;
806  int sendlenLo = mat->sendlenLo, sendlenHi = mat->sendlenHi;
807  double *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi;
808  double *work_y = mat->work_y_lo;
809  double *work_x = mat->work_x_hi;
810  bool debug = false;
811 
812  if (mat->debug && logFile != NULL)
813  debug = true;
814  if (debug)
815  beg_rowG = ctx->F->beg_row;
816 
817 /*
818 for (i=0; i<m+offsetLo+offsetHi; ++i) {
819  work_y[i] = -99;
820  work_x[i] = -99;
821 }
822 */
823 
824  if (debug)
825  {
826  fprintf (logFile,
827  "\n=====================================================\n");
828  fprintf (logFile,
829  "FACT Factor_dhSolve: num_recvLo= %i num_recvHi = %i\n",
830  mat->num_recvLo, mat->num_recvHi);
831  }
832 
833  /* start receives from higher and lower ordered subdomains */
834  if (mat->num_recvLo)
835  {
836  MPI_Startall (mat->num_recvLo, mat->recv_reqLo);
837  }
838  if (mat->num_recvHi)
839  {
840  MPI_Startall (mat->num_recvHi, mat->recv_reqHi);
841  }
842 
843  /*-------------------------------------------------------------
844  * PART 1: Forward Solve Ly = rhs for y ('y' is called 'work')
845  *-------------------------------------------------------------*/
846  /* forward triangular solve on interior nodes */
847  from = 0;
848  to = first_bdry;
849  if (from != to)
850  {
851  forward_solve_private (m, from, to, rp, cval, diag, aval,
852  rhs, work_y, debug);
853  CHECK_V_ERROR;
854  }
855 
856  /* wait for receives from lower ordered subdomains, then
857  complete forward solve on boundary nodes.
858  */
859  if (mat->num_recvLo)
860  {
861  MPI_Waitall (mat->num_recvLo, mat->recv_reqLo, mat->status);
862 
863  /* debug block */
864  if (debug)
865  {
866  fprintf (logFile,
867  "FACT got 'y' values from lower neighbors; work buffer:\n ");
868  for (i = 0; i < offsetLo; ++i)
869  {
870  fprintf (logFile, "%g ", work_y[m + i]);
871  }
872  }
873  }
874 
875  /* forward triangular solve on boundary nodes */
876  from = first_bdry;
877  to = m;
878  if (from != to)
879  {
880  forward_solve_private (m, from, to, rp, cval, diag, aval,
881  rhs, work_y, debug);
882  CHECK_V_ERROR;
883  }
884 
885  /* send boundary elements from work vector 'y' to higher ordered subdomains */
886  if (mat->num_sendHi)
887  {
888 
889  /* copy elements to send buffer */
890  for (i = 0; i < sendlenHi; i++)
891  {
892  sendbufHi[i] = work_y[sendindHi[i]];
893  }
894 
895  /* start the sends */
896  MPI_Startall (mat->num_sendHi, mat->send_reqHi);
897 
898  /* debug block */
899  if (debug)
900  {
901  fprintf (logFile,
902  "\nFACT sending 'y' values to higher neighbor:\nFACT ");
903  for (i = 0; i < sendlenHi; i++)
904  {
905  fprintf (logFile, "%g ", sendbufHi[i]);
906  }
907  fprintf (logFile, "\n");
908  }
909  }
910 
911  /*----------------------------------------------------------
912  * PART 2: Backward Solve
913  *----------------------------------------------------------*/
914  /* wait for bdry nodes 'x' from higher-ordered processsors */
915  if (mat->num_recvHi)
916  {
917  ierr = MPI_Waitall (mat->num_recvHi, mat->recv_reqHi, mat->status);
918  CHECK_MPI_V_ERROR (ierr);
919 
920  /* debug block */
921  if (debug)
922  {
923  fprintf (logFile, "FACT got 'x' values from higher neighbors:\n ");
924  for (i = m + offsetLo; i < m + offsetLo + offsetHi; ++i)
925  {
926  fprintf (logFile, "%g ", work_x[i]);
927  }
928  fprintf (logFile, "\n");
929  }
930  }
931 
932  /* backward solve boundary nodes */
933  from = m;
934  to = first_bdry;
935  if (from != to)
936  {
937  backward_solve_private (m, from, to, rp, cval, diag, aval,
938  work_y, work_x, debug);
939  CHECK_V_ERROR;
940  }
941 
942  /* send boundary node elements to lower ordered subdomains */
943  if (mat->num_sendLo)
944  {
945 
946  /* copy elements to send buffer */
947  for (i = 0; i < sendlenLo; i++)
948  {
949  sendbufLo[i] = work_x[sendindLo[i]];
950  }
951 
952  /* start the sends */
953  ierr = MPI_Startall (mat->num_sendLo, mat->send_reqLo);
954  CHECK_MPI_V_ERROR (ierr);
955 
956  /* debug block */
957  if (debug)
958  {
959  fprintf (logFile,
960  "\nFACT sending 'x' values to lower neighbor:\nFACT ");
961  for (i = 0; i < sendlenLo; i++)
962  {
963  fprintf (logFile, "%g ", sendbufLo[i]);
964  }
965  fprintf (logFile, "\n");
966  }
967  }
968 
969  /* backward solve interior nodes */
970  from = first_bdry;
971  to = 0;
972  if (from != to)
973  {
974  backward_solve_private (m, from, to, rp, cval, diag, aval,
975  work_y, work_x, debug);
976  CHECK_V_ERROR;
977  }
978 
979  /* copy solution from work vector lhs vector */
980  memcpy (lhs, work_x, m * sizeof (double));
981 
982  if (debug)
983  {
984  fprintf (logFile, "\nFACT solution: ");
985  for (i = 0; i < m; ++i)
986  {
987  fprintf (logFile, "%g ", lhs[i]);
988  }
989  fprintf (logFile, "\n");
990  }
991 
992  /* wait for sends to go through */
993  if (mat->num_sendLo)
994  {
995  ierr = MPI_Waitall (mat->num_sendLo, mat->send_reqLo, mat->status);
996  CHECK_MPI_V_ERROR (ierr);
997  }
998 
999  if (mat->num_sendHi)
1000  {
1001  ierr = MPI_Waitall (mat->num_sendHi, mat->send_reqHi, mat->status);
1002  CHECK_MPI_V_ERROR (ierr);
1003  }
1004 END_FUNC_DH}
1005 
1006 
1007 
1008 #undef __FUNC__
1009 #define __FUNC__ "forward_solve_private"
1010 void
1011 forward_solve_private (int m, int from, int to, int *rp,
1012  int *cval, int *diag, double *aval,
1013  double *rhs, double *work_y, bool debug)
1014 {
1015  START_FUNC_DH int i, j, idx;
1016 
1017  if (debug)
1018  {
1019  fprintf (logFile,
1020  "\nFACT starting forward_solve_private; from= %i; to= %i, m= %i\n",
1021  1 + from, 1 + to, m);
1022  }
1023 
1024 /*
1025  if (from == 0) {
1026  work_y[0] = rhs[0];
1027  if (debug) {
1028  fprintf(logFile, "FACT work_y[%i] = %g\n------------\n", 1+beg_rowG, work_y[0]);
1029  }
1030  } else {
1031  --from;
1032  }
1033 */
1034 
1035  if (debug)
1036  {
1037  for (i = from; i < to; ++i)
1038  {
1039  int len = diag[i] - rp[i];
1040  int *col = cval + rp[i];
1041  double *val = aval + rp[i];
1042  double sum = rhs[i];
1043 
1044  fprintf (logFile, "FACT solving for work_y[%i] (global)\n",
1045  i + 1 + beg_rowG);
1046  fprintf (logFile, "FACT sum = %g\n", sum);
1047  for (j = 0; j < len; ++j)
1048  {
1049  idx = col[j];
1050  sum -= (val[j] * work_y[idx]);
1051  fprintf (logFile,
1052  "FACT sum(%g) -= val[j] (%g) * work_y[%i] (%g)\n",
1053  sum, val[j], 1 + idx, work_y[idx]);
1054  }
1055  work_y[i] = sum;
1056  fprintf (logFile, "FACT work_y[%i] = %g\n", 1 + i + beg_rowG,
1057  work_y[i]);
1058  fprintf (logFile, "-----------\n");
1059  }
1060 
1061  fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
1062  for (i = 0; i < to; i++)
1063  fprintf (logFile, " %i %g\n", i + 1 + beg_rowG, work_y[i]);
1064 
1065  }
1066  else
1067  {
1068  for (i = from; i < to; ++i)
1069  {
1070  int len = diag[i] - rp[i];
1071  int *col = cval + rp[i];
1072  double *val = aval + rp[i];
1073  double sum = rhs[i];
1074 
1075  for (j = 0; j < len; ++j)
1076  {
1077  idx = col[j];
1078  sum -= (val[j] * work_y[idx]);
1079  }
1080  work_y[i] = sum;
1081  }
1082  }
1083 END_FUNC_DH}
1084 
1085 #undef __FUNC__
1086 #define __FUNC__ "backward_solve_private"
1087 void
1088 backward_solve_private (int m, int from, int to, int *rp,
1089  int *cval, int *diag, double *aval,
1090  double *work_y, double *work_x, bool debug)
1091 {
1092  START_FUNC_DH int i, j, idx;
1093 
1094  if (debug)
1095  {
1096  fprintf (logFile,
1097  "\nFACT starting backward_solve_private; from= %i; to= %i, m= %i\n",
1098  1 + from, 1 + to, m);
1099  for (i = from - 1; i >= to; --i)
1100  {
1101  int len = rp[i + 1] - diag[i] - 1;
1102  int *col = cval + diag[i] + 1;
1103  double *val = aval + diag[i] + 1;
1104  double sum = work_y[i];
1105  fprintf (logFile, "FACT solving for work_x[%i]\n",
1106  i + 1 + beg_rowG);
1107 
1108  for (j = 0; j < len; ++j)
1109  {
1110  idx = col[j];
1111  sum -= (val[j] * work_x[idx]);
1112  fprintf (logFile,
1113  "FACT sum(%g) -= val[j] (%g) * work_x[idx] (%g)\n",
1114  sum, val[j], work_x[idx]);
1115  }
1116  work_x[i] = sum * aval[diag[i]];
1117  fprintf (logFile, "FACT work_x[%i] = %g\n", 1 + i, work_x[i]);
1118  fprintf (logFile, "----------\n");
1119  }
1120 
1121  }
1122  else
1123  {
1124  for (i = from - 1; i >= to; --i)
1125  {
1126  int len = rp[i + 1] - diag[i] - 1;
1127  int *col = cval + diag[i] + 1;
1128  double *val = aval + diag[i] + 1;
1129  double sum = work_y[i];
1130 
1131  for (j = 0; j < len; ++j)
1132  {
1133  idx = col[j];
1134  sum -= (val[j] * work_x[idx]);
1135  }
1136  work_x[i] = sum * aval[diag[i]];
1137  }
1138  }
1139 END_FUNC_DH}
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "Factor_dhInit"
1143 void
1144 Factor_dhInit (void *A, bool fillFlag, bool avalFlag,
1145  double rho, int id, int beg_rowP, Factor_dh * Fout)
1146 {
1147  START_FUNC_DH int m, n, beg_row, alloc;
1148  Factor_dh F;
1149 
1150  EuclidGetDimensions (A, &beg_row, &m, &n);
1151  CHECK_V_ERROR;
1152  alloc = rho * m;
1153  Factor_dhCreate (&F);
1154  CHECK_V_ERROR;
1155 
1156  *Fout = F;
1157  F->m = m;
1158  F->n = n;
1159  F->beg_row = beg_rowP;
1160  F->id = id;
1161  F->alloc = alloc;
1162 
1163  F->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int));
1164  CHECK_V_ERROR;
1165  F->rp[0] = 0;
1166  F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
1167  CHECK_V_ERROR;
1168  F->diag = (int *) MALLOC_DH (m * sizeof (int));
1169  CHECK_V_ERROR;
1170  if (fillFlag)
1171  {
1172  F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
1173  CHECK_V_ERROR;
1174  }
1175  if (avalFlag)
1176  {
1177  F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
1178  CHECK_V_ERROR;
1179  }
1180 END_FUNC_DH}
1181 
1182 #undef __FUNC__
1183 #define __FUNC__ "Factor_dhReallocate"
1184 void
1185 Factor_dhReallocate (Factor_dh F, int used, int additional)
1186 {
1187  START_FUNC_DH int alloc = F->alloc;
1188 
1189  if (used + additional > F->alloc)
1190  {
1191  int *tmpI;
1192  while (alloc < used + additional)
1193  alloc *= 2.0;
1194  F->alloc = alloc;
1195  tmpI = F->cval;
1196  F->cval = (int *) MALLOC_DH (alloc * sizeof (int));
1197  CHECK_V_ERROR;
1198  memcpy (F->cval, tmpI, used * sizeof (int));
1199  FREE_DH (tmpI);
1200  CHECK_V_ERROR;
1201  if (F->fill != NULL)
1202  {
1203  tmpI = F->fill;
1204  F->fill = (int *) MALLOC_DH (alloc * sizeof (int));
1205  CHECK_V_ERROR;
1206  memcpy (F->fill, tmpI, used * sizeof (int));
1207  FREE_DH (tmpI);
1208  CHECK_V_ERROR;
1209  }
1210  if (F->aval != NULL)
1211  {
1212  REAL_DH *tmpF = F->aval;
1213  F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH));
1214  CHECK_V_ERROR;
1215  memcpy (F->aval, tmpF, used * sizeof (REAL_DH));
1216  FREE_DH (tmpF);
1217  CHECK_V_ERROR;
1218  }
1219  }
1220 END_FUNC_DH}
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "Factor_dhTranspose"
1224 void
1225 Factor_dhTranspose (Factor_dh A, Factor_dh * Bout)
1226 {
1227  START_FUNC_DH Factor_dh B;
1228 
1229  if (np_dh > 1)
1230  {
1231  SET_V_ERROR ("only for sequential");
1232  }
1233 
1234  Factor_dhCreate (&B);
1235  CHECK_V_ERROR;
1236  *Bout = B;
1237  B->m = B->n = A->m;
1238  if (B->aval == NULL)
1239  {
1240  mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
1241  A->aval, NULL);
1242  CHECK_V_ERROR;
1243  }
1244  else
1245  {
1246  mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval,
1247  A->aval, &B->aval);
1248  CHECK_V_ERROR;
1249  }
1250 END_FUNC_DH}
1251 
1252 
1253 /* this could be done using OpenMP, but I took it out for now */
1254 #undef __FUNC__
1255 #define __FUNC__ "Factor_dhSolveSeq"
1256 void
1257 Factor_dhSolveSeq (double *rhs, double *lhs, Euclid_dh ctx)
1258 {
1259  START_FUNC_DH Factor_dh F = ctx->F;
1260  if (F == NULL)
1261  {
1262  printf ("F is null.\n");
1263  }
1264  else
1265  {
1266  printf ("F isn't null.\n");
1267  }
1268  int *rp, *cval, *diag;
1269  int i, j, *vi, nz, m = F->m;
1270  REAL_DH *aval, *work;
1271  /* REAL_DH *scale; */
1272  REAL_DH *v, sum;
1273  bool debug = false;
1274 
1275  if (ctx->F->debug && logFile != NULL)
1276  debug = true;
1277 
1278  rp = F->rp;
1279  cval = F->cval;
1280  aval = F->aval;
1281  diag = F->diag;
1282  /* scale = ctx->scale; */
1283  work = ctx->work;
1284 
1285  if (debug)
1286  {
1287  fprintf (logFile,
1288  "\nFACT ============================================================\n");
1289  fprintf (logFile, "FACT starting Factor_dhSolveSeq\n");
1290 
1291  /* forward solve lower triangle */
1292  fprintf (logFile, "\nFACT STARTING FORWARD SOLVE\n------------\n");
1293  work[0] = rhs[0];
1294  fprintf (logFile, "FACT work[0] = %g\n------------\n", work[0]);
1295  for (i = 1; i < m; i++)
1296  {
1297  v = aval + rp[i];
1298  vi = cval + rp[i];
1299  nz = diag[i] - rp[i];
1300  fprintf (logFile, "FACT solving for work[%i]\n", i + 1);
1301  sum = rhs[i];
1302  for (j = 0; j < nz; ++j)
1303  {
1304  sum -= (v[j] * work[vi[j]]);
1305  fprintf (logFile,
1306  "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
1307  sum, v[j], work[vi[j]]);
1308  }
1309  work[i] = sum;
1310  fprintf (logFile, "FACT work[%i] = %g\n------------\n", 1 + i,
1311  work[i]);
1312  }
1313 
1314 
1315  fprintf (logFile, "\nFACT work vector at end of forward solve:\n");
1316  for (i = 0; i < m; i++)
1317  fprintf (logFile, " %i %g\n", i + 1, work[i]);
1318 
1319 
1320  /* backward solve upper triangular boundaries (sequential) */
1321  fprintf (logFile, "\nFACT STARTING BACKWARD SOLVE\n--------------\n");
1322  for (i = m - 1; i >= 0; i--)
1323  {
1324  v = aval + diag[i] + 1;
1325  vi = cval + diag[i] + 1;
1326  nz = rp[i + 1] - diag[i] - 1;
1327  fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
1328  sum = work[i];
1329  for (j = 0; j < nz; ++j)
1330  {
1331  sum -= (v[j] * work[vi[j]]);
1332  fprintf (logFile,
1333  "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n",
1334  sum, v[j], work[vi[j]]);
1335  }
1336  lhs[i] = work[i] = sum * aval[diag[i]];
1337  fprintf (logFile, "FACT lhs[%i] = %g\n------------\n", 1 + i,
1338  lhs[i]);
1339  fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1);
1340  }
1341 
1342  fprintf (logFile, "\nFACT solution: ");
1343  for (i = 0; i < m; ++i)
1344  fprintf (logFile, "%g ", lhs[i]);
1345  fprintf (logFile, "\n");
1346 
1347 
1348  }
1349  else
1350  {
1351  /* forward solve lower triangle */
1352  work[0] = rhs[0];
1353  for (i = 1; i < m; i++)
1354  {
1355  v = aval + rp[i];
1356  vi = cval + rp[i];
1357  nz = diag[i] - rp[i];
1358  sum = rhs[i];
1359  while (nz--)
1360  sum -= (*v++ * work[*vi++]);
1361  work[i] = sum;
1362  }
1363 
1364  /* backward solve upper triangular boundaries (sequential) */
1365  for (i = m - 1; i >= 0; i--)
1366  {
1367  v = aval + diag[i] + 1;
1368  vi = cval + diag[i] + 1;
1369  nz = rp[i + 1] - diag[i] - 1;
1370  sum = work[i];
1371  while (nz--)
1372  sum -= (*v++ * work[*vi++]);
1373  lhs[i] = work[i] = sum * aval[diag[i]];
1374  }
1375  }
1376 END_FUNC_DH}
1377 
1378 /*---------------------------------------------------------------
1379  * next two are used by Factor_dhPrintXXX methods
1380  *---------------------------------------------------------------*/
1381 
1382 #undef __FUNC__
1383 #define __FUNC__ "adjust_bj_private"
1384 void
1385 adjust_bj_private (Factor_dh mat)
1386 {
1387  START_FUNC_DH int i;
1388  int nz = mat->rp[mat->m];
1389  int beg_row = mat->beg_row;
1390  for (i = 0; i < nz; ++i)
1391  mat->cval[i] += beg_row;
1392 END_FUNC_DH}
1393 
1394 #undef __FUNC__
1395 #define __FUNC__ "unadjust_bj_private"
1396 void
1397 unadjust_bj_private (Factor_dh mat)
1398 {
1399  START_FUNC_DH int i;
1400  int nz = mat->rp[mat->m];
1401  int beg_row = mat->beg_row;
1402  for (i = 0; i < nz; ++i)
1403  mat->cval[i] -= beg_row;
1404 END_FUNC_DH}
1405 
1406 #undef __FUNC__
1407 #define __FUNC__ "Factor_dhMaxPivotInverse"
1408 double
1409 Factor_dhMaxPivotInverse (Factor_dh mat)
1410 {
1411  START_FUNC_DH int i, m = mat->m, *diags = mat->diag;
1412  REAL_DH *aval = mat->aval;
1413  double minGlobal = 0.0, min = aval[diags[0]];
1414  double retval;
1415 
1416  for (i = 0; i < m; ++i)
1417  min = MIN (min, fabs (aval[diags[i]]));
1418  if (np_dh == 1)
1419  {
1420  minGlobal = min;
1421  }
1422  else
1423  {
1424  MPI_Reduce (&min, &minGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, comm_dh);
1425  }
1426 
1427  if (minGlobal == 0)
1428  {
1429  retval = 0;
1430  }
1431  else
1432  {
1433  retval = 1.0 / minGlobal;
1434  }
1435 END_FUNC_VAL (retval)}
1436 
1437 #undef __FUNC__
1438 #define __FUNC__ "Factor_dhMaxValue"
1439 double
1440 Factor_dhMaxValue (Factor_dh mat)
1441 {
1442  START_FUNC_DH double maxGlobal = 0.0, max = 0.0;
1443  int i, nz = mat->rp[mat->m];
1444  REAL_DH *aval = mat->aval;
1445 
1446  for (i = 0; i < nz; ++i)
1447  {
1448  max = MAX (max, fabs (aval[i]));
1449  }
1450 
1451  if (np_dh == 1)
1452  {
1453  maxGlobal = max;
1454  }
1455  else
1456  {
1457  MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
1458  }
1459 END_FUNC_VAL (maxGlobal)}
1460 
1461 
1462 #undef __FUNC__
1463 #define __FUNC__ "Factor_dhCondEst"
1464 double
1465 Factor_dhCondEst (Factor_dh mat, Euclid_dh ctx)
1466 {
1467  START_FUNC_DH double max = 0.0, maxGlobal = 0.0;
1468  double *x;
1469  int i, m = mat->m;
1470  Vec_dh lhs, rhs;
1471 
1472  Vec_dhCreate (&lhs);
1473  CHECK_ERROR (-1);
1474  Vec_dhInit (lhs, m);
1475  CHECK_ERROR (-1);
1476  Vec_dhDuplicate (lhs, &rhs);
1477  CHECK_ERROR (-1);
1478  Vec_dhSet (rhs, 1.0);
1479  CHECK_ERROR (-1);
1480  Euclid_dhApply (ctx, rhs->vals, lhs->vals);
1481  CHECK_ERROR (-1);
1482 
1483  x = lhs->vals;
1484  for (i = 0; i < m; ++i)
1485  {
1486  max = MAX (max, fabs (x[i]));
1487  }
1488 
1489  if (np_dh == 1)
1490  {
1491  maxGlobal = max;
1492  }
1493  else
1494  {
1495  MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh);
1496  }
1497 END_FUNC_VAL (maxGlobal)}
Definition: Mat_dh.h:68
Definition: Vec_dh.h:58