IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
Euclid_dh.c
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Euclid_dh.h"
44 #include "Mem_dh.h"
45 #include "Mat_dh.h"
46 #include "Vec_dh.h"
47 #include "Factor_dh.h"
48 #include "getRow_dh.h"
49 #include "ilu_dh.h"
50 #include "Parser_dh.h"
51 #include "SortedList_dh.h"
52 #include "SubdomainGraph_dh.h"
53 #include "ExternalRows_dh.h"
54 #include "krylov_dh.h"
55 
56 static void get_runtime_params_private (Euclid_dh ctx);
57 static void invert_diagonals_private (Euclid_dh ctx);
58 static void compute_rho_private (Euclid_dh ctx);
59 static void factor_private (Euclid_dh ctx);
60 /* static void discard_indices_private(Euclid_dh ctx); */
61 static void reduce_timings_private (Euclid_dh ctx);
62 
63 #undef __FUNC__
64 #define __FUNC__ "Euclid_dhCreate"
65 void
66 Euclid_dhCreate (Euclid_dh * ctxOUT)
67 {
68  START_FUNC_DH
69  struct _mpi_interface_dh *ctx =
70  (struct _mpi_interface_dh *)
71  MALLOC_DH (sizeof (struct _mpi_interface_dh));
72  CHECK_V_ERROR;
73  *ctxOUT = ctx;
74 
75  ctx->isSetup = false;
76 
77  ctx->rho_init = 2.0;
78  ctx->rho_final = 0.0;
79 
80  ctx->m = 0;
81  ctx->n = 0;
82  ctx->rhs = NULL;
83  ctx->A = NULL;
84  ctx->F = NULL;
85  ctx->sg = NULL;
86 
87  ctx->scale = NULL;
88  ctx->isScaled = false;
89  ctx->work = NULL;
90  ctx->work2 = NULL;
91  ctx->from = 0;
92  ctx->to = 0;
93 
94  strcpy (ctx->algo_par, "pilu");
95  strcpy (ctx->algo_ilu, "iluk");
96  ctx->level = 1;
97  ctx->droptol = DEFAULT_DROP_TOL;
98  ctx->sparseTolA = 0.0;
99  ctx->sparseTolF = 0.0;
100  ctx->pivotMin = 0.0;
101  ctx->pivotFix = PIVOT_FIX_DEFAULT;
102  ctx->maxVal = 0.0;
103 
104  ctx->slist = NULL;
105  ctx->extRows = NULL;
106 
107  strcpy (ctx->krylovMethod, "bicgstab");
108  ctx->maxIts = 200;
109  ctx->rtol = 1e-5;
110  ctx->atol = 1e-50;
111  ctx->its = 0;
112  ctx->itsTotal = 0;
113  ctx->setupCount = 0;
114  ctx->logging = 0;
115  ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats"));
116 
117  {
118  int i;
119  for (i = 0; i < TIMING_BINS; ++i)
120  ctx->timing[i] = 0.0;
121  for (i = 0; i < STATS_BINS; ++i)
122  ctx->stats[i] = 0.0;
123  }
124  ctx->timingsWereReduced = false;
125 
126  ++ref_counter;
127 END_FUNC_DH}
128 
129 
130 #undef __FUNC__
131 #define __FUNC__ "Euclid_dhDestroy"
132 void
133 Euclid_dhDestroy (Euclid_dh ctx)
134 {
135  START_FUNC_DH
136  if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging)
137  {
138  /* insert switch so memory report will also be printed */
139  Parser_dhInsert (parser_dh, "-eu_mem", "1");
140  CHECK_V_ERROR;
141  Euclid_dhPrintHypreReport (ctx, stdout);
142  CHECK_V_ERROR;
143  }
144 
145  if (ctx->setupCount > 1 && ctx->printStats)
146  {
147  Euclid_dhPrintStatsShorter (ctx, stdout);
148  CHECK_V_ERROR;
149  }
150 
151  if (ctx->F != NULL)
152  {
153  Factor_dhDestroy (ctx->F);
154  CHECK_V_ERROR;
155  }
156  if (ctx->sg != NULL)
157  {
158  SubdomainGraph_dhDestroy (ctx->sg);
159  CHECK_V_ERROR;
160  }
161  if (ctx->scale != NULL)
162  {
163  FREE_DH (ctx->scale);
164  CHECK_V_ERROR;
165  }
166  if (ctx->work != NULL)
167  {
168  FREE_DH (ctx->work);
169  CHECK_V_ERROR;
170  }
171  if (ctx->work2 != NULL)
172  {
173  FREE_DH (ctx->work2);
174  CHECK_V_ERROR;
175  }
176  if (ctx->slist != NULL)
177  {
178  SortedList_dhDestroy (ctx->slist);
179  CHECK_V_ERROR;
180  }
181  if (ctx->extRows != NULL)
182  {
183  ExternalRows_dhDestroy (ctx->extRows);
184  CHECK_V_ERROR;
185  }
186  FREE_DH (ctx);
187  CHECK_V_ERROR;
188 
189  --ref_counter;
190 END_FUNC_DH}
191 
192 
193 /* on entry, "A" must have been set. If this context is being
194  reused, user must ensure ???
195 */
196 #undef __FUNC__
197 #define __FUNC__ "Euclid_dhSetup"
198 void
199 Euclid_dhSetup (Euclid_dh ctx)
200 {
201  START_FUNC_DH int m, n, beg_row;
202  double t1;
203  bool isSetup = ctx->isSetup;
204  bool bj = false;
205 
206  /*----------------------------------------------------
207  * If Euclid was previously setup, print summary of
208  * what happened during previous setup/solve
209  *----------------------------------------------------*/
210  if (ctx->setupCount && ctx->printStats)
211  {
212  Euclid_dhPrintStatsShorter (ctx, stdout);
213  CHECK_V_ERROR;
214  ctx->its = 0;
215  }
216 
217  /*----------------------------------------------------
218  * zero array for statistical reporting
219  *----------------------------------------------------*/
220  {
221  int i;
222  for (i = 0; i < STATS_BINS; ++i)
223  ctx->stats[i] = 0.0;
224  }
225 
226  /*----------------------------------------------------
227  * internal timing
228  *----------------------------------------------------*/
229  ctx->timing[SOLVE_START_T] = MPI_Wtime ();
230  /* sum timing from last linear solve cycle, if any */
231  ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
232  ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
233 
234  if (ctx->F != NULL)
235  {
236  Factor_dhDestroy (ctx->F);
237  CHECK_V_ERROR;
238  ctx->F = NULL;
239  }
240 
241  if (ctx->A == NULL)
242  {
243  SET_V_ERROR ("must set ctx->A before calling init");
244  }
245  EuclidGetDimensions (ctx->A, &beg_row, &m, &n);
246  CHECK_V_ERROR;
247  ctx->m = m;
248  ctx->n = n;
249 
250  if (Parser_dhHasSwitch (parser_dh, "-print_size"))
251  {
252  printf_dh
253  ("setting up linear system; global rows: %i local rows: %i (on P_0)\n",
254  n, m);
255  }
256 
257  sprintf (msgBuf_dh, "localRow= %i; globalRows= %i; beg_row= %i", m, n,
258  beg_row);
259  SET_INFO (msgBuf_dh);
260 
261  bj = Parser_dhHasSwitch (parser_dh, "-bj");
262 
263  /*------------------------------------------------------------------------
264  * Setup the SubdomainGraph, which contains connectivity and
265  * and permutation information. If this context is being reused,
266  * this may already have been done; if being resused, the underlying
267  * subdomain graph cannot change (user's responsibility?)
268  *------------------------------------------------------------------------*/
269  if (ctx->sg == NULL)
270  {
271  int blocks = np_dh;
272  t1 = MPI_Wtime ();
273  if (np_dh == 1)
274  {
275  Parser_dhReadInt (parser_dh, "-blocks", &blocks);
276  CHECK_V_ERROR;
277  SubdomainGraph_dhCreate (&(ctx->sg));
278  CHECK_V_ERROR;
279  SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A);
280  CHECK_V_ERROR;
281  }
282  else
283  {
284  SubdomainGraph_dhCreate (&(ctx->sg));
285  CHECK_V_ERROR;
286  SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A);
287  CHECK_V_ERROR;
288  }
289  ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1);
290  }
291 
292 
293 /* SubdomainGraph_dhDump(ctx->sg, "SG.dump"); CHECK_V_ERROR; */
294 
295  /*----------------------------------------------------
296  * for debugging
297  *----------------------------------------------------*/
298  if (Parser_dhHasSwitch (parser_dh, "-doNotFactor"))
299  {
300  goto END_OF_FUNCTION;
301  }
302 
303 
304  /*----------------------------------------------------
305  * query parser for runtime parameters
306  *----------------------------------------------------*/
307  if (!isSetup)
308  {
309  get_runtime_params_private (ctx);
310  CHECK_V_ERROR;
311  }
312  if (!strcmp (ctx->algo_par, "bj"))
313  bj = false;
314 
315  /*---------------------------------------------------------
316  * allocate and initialize storage for row-scaling
317  * (ctx->isScaled is set in get_runtime_params_private(); )
318  *---------------------------------------------------------*/
319  if (ctx->scale == NULL)
320  {
321  ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
322  CHECK_V_ERROR;
323  }
324  {
325  int i;
326  for (i = 0; i < m; ++i)
327  ctx->scale[i] = 1.0;
328  }
329 
330  /*------------------------------------------------------------------
331  * allocate work vectors; used in factorization and triangular solves;
332  *------------------------------------------------------------------*/
333  if (ctx->work == NULL)
334  {
335  ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
336  CHECK_V_ERROR;
337  }
338  if (ctx->work2 == NULL)
339  {
340  ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
341  CHECK_V_ERROR;
342  }
343 
344  /*-----------------------------------------------------------------
345  * perform the incomplete factorization (this should be, at least
346  * for higher level ILUK, the most time-intensive portion of setup)
347  *-----------------------------------------------------------------*/
348  t1 = MPI_Wtime ();
349  factor_private (ctx);
350  CHECK_V_ERROR;
351  ctx->timing[FACTOR_T] += (MPI_Wtime () - t1);
352 
353  /*--------------------------------------------------------------
354  * invert diagonals, for faster triangular solves
355  *--------------------------------------------------------------*/
356  if (strcmp (ctx->algo_par, "none"))
357  {
358  invert_diagonals_private (ctx);
359  CHECK_V_ERROR;
360  }
361 
362  /*--------------------------------------------------------------
363  * compute rho_final: global ratio of nzF/nzA
364  * also, if -sparseA > 0, compute ratio of nzA
365  * used in factorization
366  *--------------------------------------------------------------*/
367  /* for some reason compute_rho_private() was expensive, so now it's
368  an option, unless there's only one mpi task.
369  */
370  if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1)
371  {
372  if (strcmp (ctx->algo_par, "none"))
373  {
374  t1 = MPI_Wtime ();
375  compute_rho_private (ctx);
376  CHECK_V_ERROR;
377  ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1);
378  }
379  }
380 
381  /*--------------------------------------------------------------
382  * if using PILU, set up persistent comms and global-to-local
383  * number scheme, for efficient triangular solves.
384  * (Thanks to Edmond Chow for these algorithmic ideas.)
385  *--------------------------------------------------------------*/
386 
387  if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1)
388  {
389  t1 = MPI_Wtime ();
390  Factor_dhSolveSetup (ctx->F, ctx->sg);
391  CHECK_V_ERROR;
392  ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1);
393  }
394 
395 END_OF_FUNCTION:;
396 
397  /*-------------------------------------------------------
398  * internal timing
399  *-------------------------------------------------------*/
400  ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]);
401  ctx->setupCount += 1;
402 
403  ctx->isSetup = true;
404 
405 END_FUNC_DH}
406 
407 
408 #undef __FUNC__
409 #define __FUNC__ "get_runtime_params_private"
410 void
411 get_runtime_params_private (Euclid_dh ctx)
412 {
413  START_FUNC_DH char *tmp;
414 
415  /* params for use of internal solvers */
416  Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts));
417  Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol));
418  Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol));
419 
420  /* parallelization strategy (bj, pilu, none) */
421  tmp = NULL;
422  Parser_dhReadString (parser_dh, "-par", &tmp);
423  if (tmp != NULL)
424  {
425  strcpy (ctx->algo_par, tmp);
426  }
427  if (Parser_dhHasSwitch (parser_dh, "-bj"))
428  {
429  strcpy (ctx->algo_par, "bj");
430  }
431 
432 
433  /* factorization parameters */
434  Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init));
435  /* inital storage allocation for factor */
436  Parser_dhReadInt (parser_dh, "-level", &ctx->level);
437  Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level);
438 
439  if (Parser_dhHasSwitch (parser_dh, "-ilut"))
440  {
441  Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol);
442  ctx->isScaled = true;
443  strcpy (ctx->algo_ilu, "ilut");
444  }
445 
446  /* make sure both algo_par and algo_ilu are set to "none,"
447  if at least one is.
448  */
449  if (!strcmp (ctx->algo_par, "none"))
450  {
451  strcpy (ctx->algo_ilu, "none");
452  }
453  else if (!strcmp (ctx->algo_ilu, "none"))
454  {
455  strcpy (ctx->algo_par, "none");
456  }
457 
458 
459  Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA));
460  /* sparsify A before factoring */
461  Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF));
462  /* sparsify after factoring */
463  Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin));
464  /* adjust pivots if smaller than this */
465  Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix));
466  /* how to adjust pivots */
467 
468  /* set row scaling for mandatory cases */
469  if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut"))
470  {
471  ctx->isScaled = true;
472  }
473 
474  /* solve method */
475  tmp = NULL;
476  Parser_dhReadString (parser_dh, "-ksp_type", &tmp);
477  if (tmp != NULL)
478  {
479  strcpy (ctx->krylovMethod, tmp);
480 
481  if (!strcmp (ctx->krylovMethod, "bcgs"))
482  {
483  strcpy (ctx->krylovMethod, "bicgstab");
484  }
485  }
486 END_FUNC_DH}
487 
488 #undef __FUNC__
489 #define __FUNC__ "invert_diagonals_private"
490 void
491 invert_diagonals_private (Euclid_dh ctx)
492 {
493  START_FUNC_DH REAL_DH * aval = ctx->F->aval;
494  int *diag = ctx->F->diag;
495  if (aval == NULL || diag == NULL)
496  {
497  SET_INFO ("can't invert diags; either F->aval or F->diag is NULL");
498  }
499  else
500  {
501  int i, m = ctx->F->m;
502  for (i = 0; i < m; ++i)
503  {
504  aval[diag[i]] = 1.0 / aval[diag[i]];
505  }
506  }
507 END_FUNC_DH}
508 
509 
510 /* records rho_final (max for all processors) */
511 #undef __FUNC__
512 #define __FUNC__ "compute_rho_private"
513 void
514 compute_rho_private (Euclid_dh ctx)
515 {
516  START_FUNC_DH if (ctx->F != NULL)
517  {
518  double bufLocal[3], bufGlobal[3];
519  int m = ctx->m;
520 
521  ctx->stats[NZF_STATS] = (double) ctx->F->rp[m];
522  bufLocal[0] = ctx->stats[NZA_STATS]; /* nzA */
523  bufLocal[1] = ctx->stats[NZF_STATS]; /* nzF */
524  bufLocal[2] = ctx->stats[NZA_USED_STATS]; /* nzA used */
525 
526  if (np_dh == 1)
527  {
528  bufGlobal[0] = bufLocal[0];
529  bufGlobal[1] = bufLocal[1];
530  bufGlobal[2] = bufLocal[2];
531  }
532  else
533  {
534  MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
535  comm_dh);
536  }
537 
538  if (myid_dh == 0)
539  {
540 
541  /* compute rho */
542  if (bufGlobal[0] && bufGlobal[1])
543  {
544  ctx->rho_final = bufGlobal[1] / bufGlobal[0];
545  }
546  else
547  {
548  ctx->rho_final = -1;
549  }
550 
551  /* compute ratio of nonzeros in A that were used */
552  if (bufGlobal[0] && bufGlobal[2])
553  {
554  ctx->stats[NZA_RATIO_STATS] =
555  100.0 * bufGlobal[2] / bufGlobal[0];
556  }
557  else
558  {
559  ctx->stats[NZA_RATIO_STATS] = 100.0;
560  }
561  }
562  }
563 END_FUNC_DH}
564 
565 #undef __FUNC__
566 #define __FUNC__ "factor_private"
567 void
568 factor_private (Euclid_dh ctx)
569 {
570  START_FUNC_DH
571  /*-------------------------------------------------------------
572  * special case, for testing/debugging: no preconditioning
573  *-------------------------------------------------------------*/
574  if (!strcmp (ctx->algo_par, "none"))
575  {
576  goto DO_NOTHING;
577  }
578 
579  /*-------------------------------------------------------------
580  * Initialize object to hold factor.
581  *-------------------------------------------------------------*/
582  {
583  int br = 0;
584  int id = np_dh;
585  if (ctx->sg != NULL)
586  {
587  br = ctx->sg->beg_rowP[myid_dh];
588  id = ctx->sg->o2n_sub[myid_dh];
589  }
590  Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F));
591  CHECK_V_ERROR;
592  ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh];
593  ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
594  if (!strcmp (ctx->algo_par, "bj"))
595  ctx->F->blockJacobi = true;
596  if (Parser_dhHasSwitch (parser_dh, "-bj"))
597  ctx->F->blockJacobi = true;
598  }
599 
600  /*-------------------------------------------------------------
601  * single mpi task with single or multiple subdomains
602  *-------------------------------------------------------------*/
603  if (np_dh == 1)
604  {
605 
606  /* ILU(k) factorization */
607  if (!strcmp (ctx->algo_ilu, "iluk"))
608  {
609  ctx->from = 0;
610  ctx->to = ctx->m;
611 
612  /* only for debugging: use ilu_mpi_pilu */
613  if (Parser_dhHasSwitch (parser_dh, "-mpi"))
614  {
615  if (ctx->sg != NULL && ctx->sg->blocks > 1)
616  {
617  SET_V_ERROR
618  ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
619  }
620  iluk_mpi_pilu (ctx);
621  CHECK_V_ERROR;
622  }
623 
624  /* "normal" operation */
625  else
626  {
627  iluk_seq_block (ctx);
628  CHECK_V_ERROR;
629  /* note: iluk_seq_block() performs block jacobi iluk if ctx->algo_par == bj. */
630  }
631  }
632 
633  /* ILUT factorization */
634  else if (!strcmp (ctx->algo_ilu, "ilut"))
635  {
636  ctx->from = 0;
637  ctx->to = ctx->m;
638  ilut_seq (ctx);
639  CHECK_V_ERROR;
640  }
641 
642  /* all other factorization methods */
643  else
644  {
645  sprintf (msgBuf_dh, "factorization method: %s is not implemented",
646  ctx->algo_ilu);
647  SET_V_ERROR (msgBuf_dh);
648  }
649  }
650 
651  /*-------------------------------------------------------------
652  * multiple mpi tasks with multiple subdomains
653  *-------------------------------------------------------------*/
654  else
655  {
656  /* block jacobi */
657  if (!strcmp (ctx->algo_par, "bj"))
658  {
659  ctx->from = 0;
660  ctx->to = ctx->m;
661  iluk_mpi_bj (ctx);
662  CHECK_V_ERROR;
663  }
664 
665  /* iluk */
666  else if (!strcmp (ctx->algo_ilu, "iluk"))
667  {
668  bool bj = ctx->F->blockJacobi; /* for debugging */
669 
670  /* printf_dh("\n@@@ starting ilu_mpi_pilu @@@\n"); */
671 
672  SortedList_dhCreate (&(ctx->slist));
673  CHECK_V_ERROR;
674  SortedList_dhInit (ctx->slist, ctx->sg);
675  CHECK_V_ERROR;
676  ExternalRows_dhCreate (&(ctx->extRows));
677  CHECK_V_ERROR;
678  ExternalRows_dhInit (ctx->extRows, ctx);
679  CHECK_V_ERROR;
680 
681  /* factor interior rows */
682  ctx->from = 0;
683  ctx->to = ctx->F->first_bdry;
684 
685 /*
686 if (Parser_dhHasSwitch(parser_dh, "-test")) {
687  printf("[%i] Euclid_dh :: TESTING ilu_seq\n", myid_dh);
688  iluk_seq(ctx); CHECK_V_ERROR;
689 } else {
690  iluk_mpi_pilu(ctx); CHECK_V_ERROR;
691 }
692 */
693 
694  iluk_seq (ctx);
695  CHECK_V_ERROR;
696 
697  /* get external rows from lower ordered neighbors in the
698  subdomain graph; these rows are needed for factoring
699  this subdomain's boundary rows.
700  */
701  if (!bj)
702  {
703  ExternalRows_dhRecvRows (ctx->extRows);
704  CHECK_V_ERROR;
705  }
706 
707  /* factor boundary rows */
708  ctx->from = ctx->F->first_bdry;
709  ctx->to = ctx->F->m;
710  iluk_mpi_pilu (ctx);
711  CHECK_V_ERROR;
712 
713  /* send this processor's boundary rows to higher ordered
714  neighbors in the subdomain graph.
715  */
716  if (!bj)
717  {
718  ExternalRows_dhSendRows (ctx->extRows);
719  CHECK_V_ERROR;
720  }
721 
722  /* discard column indices in factor if they would alter
723  the subdomain graph (any such elements are in upper
724  triangular portion of the row)
725  */
726 
727 
728  SortedList_dhDestroy (ctx->slist);
729  CHECK_V_ERROR;
730  ctx->slist = NULL;
731  ExternalRows_dhDestroy (ctx->extRows);
732  CHECK_V_ERROR;
733  ctx->extRows = NULL;
734  }
735 
736  /* all other factorization methods */
737  else
738  {
739  sprintf (msgBuf_dh, "factorization method: %s is not implemented",
740  ctx->algo_ilu);
741  SET_V_ERROR (msgBuf_dh);
742  }
743  }
744 
745 DO_NOTHING:;
746 
747 END_FUNC_DH}
748 
749 #if 0
750 
751 #undef __FUNC__
752 #define __FUNC__ "discard_indices_private"
753 void
754 discard_indices_private (Euclid_dh ctx)
755 {
756  START_FUNC_DH
757 #if 0
758  int *rp = ctx->F->rp, *cval = ctx->F->cval;
759  double *aval = ctx->F->aval;
760  int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount;
761  int i, j, k, idx, count = 0, start_of_row;
762  int beg_row = ctx->beg_row, end_row = beg_row + m;
763  int *diag = ctx->F->diag;
764 
765  /* if col is not locally owned, and doesn't belong to a
766  * nabor in the (original) subdomain graph, we need to discard
767  * the column index and associated value. First, we'll flag all
768  * such indices for deletion.
769  */
770  for (i = 0; i < m; ++i)
771  {
772  for (j = rp[i]; j < rp[i + 1]; ++j)
773  {
774  int col = cval[j];
775  if (col < beg_row || col >= end_row)
776  {
777  bool flag = true;
778  int owner = find_owner_private_mpi (ctx, col);
779  CHECK_V_ERROR;
780 
781  for (k = 0; k < nc; ++k)
782  {
783  if (nabors[k] == owner)
784  {
785  flag = false;
786  break;
787  }
788  }
789 
790  if (flag)
791  {
792  cval[j] = -1;
793  ++count;
794  }
795  }
796  }
797  }
798 
799  sprintf (msgBuf_dh,
800  "deleting %i indices that would alter the subdomain graph", count);
801  SET_INFO (msgBuf_dh);
802 
803  /* Second, perform the actual deletion */
804  idx = 0;
805  start_of_row = 0;
806  for (i = 0; i < m; ++i)
807  {
808  for (j = start_of_row; j < rp[i + 1]; ++j)
809  {
810  int col = cval[j];
811  double val = aval[j];
812  if (col != -1)
813  {
814  cval[idx] = col;
815  aval[idx] = val;
816  ++idx;
817  }
818  }
819  start_of_row = rp[i + 1];
820  rp[i + 1] = idx;
821  }
822 
823  /* rebuild diagonal pointers */
824  for (i = 0; i < m; ++i)
825  {
826  for (j = rp[i]; j < rp[i + 1]; ++j)
827  {
828  if (cval[j] == i + beg_row)
829  {
830  diag[i] = j;
831  break;
832  }
833  }
834  }
835 #endif
836 END_FUNC_DH}
837 #endif
838 
839 #undef __FUNC__
840 #define __FUNC__ "Euclid_dhSolve"
841 void
842 Euclid_dhSolve (Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
843 {
844  START_FUNC_DH int itsOUT;
845  Mat_dh A = (Mat_dh) ctx->A;
846 
847  if (!strcmp (ctx->krylovMethod, "cg"))
848  {
849  cg_euclid (A, ctx, x->vals, b->vals, &itsOUT);
850  ERRCHKA;
851  }
852  else if (!strcmp (ctx->krylovMethod, "bicgstab"))
853  {
854  bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT);
855  ERRCHKA;
856  }
857  else
858  {
859  sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod);
860  SET_V_ERROR (msgBuf_dh);
861  }
862  *its = itsOUT;
863 END_FUNC_DH}
864 
865 #undef __FUNC__
866 #define __FUNC__ "Euclid_dhPrintStats"
867 void
868 Euclid_dhPrintStats (Euclid_dh ctx, FILE * fp)
869 {
870  START_FUNC_DH double *timing;
871  int nz;
872 
873  nz = Factor_dhReadNz (ctx->F);
874  CHECK_V_ERROR;
875  timing = ctx->timing;
876 
877  /* add in timing from lasst setup (if any) */
878  ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
879  ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
880 
881  reduce_timings_private (ctx);
882  CHECK_V_ERROR;
883 
884  fprintf_dh (fp,
885  "\n==================== Euclid report (start) ====================\n");
886  fprintf_dh (fp, "\nruntime parameters\n");
887  fprintf_dh (fp, "------------------\n");
888  fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
889  fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
890  fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
891  fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
892  fprintf_dh (fp, " matrix was row scaled: %i\n", ctx->isScaled);
893 
894  fprintf_dh (fp, " matrix row count: %i\n", ctx->n);
895  fprintf_dh (fp, " nzF: %i\n", nz);
896  fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
897  fprintf_dh (fp, " level: %i\n", ctx->level);
898  fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
899 
900  fprintf_dh (fp, "\nEuclid timing report\n");
901  fprintf_dh (fp, "--------------------\n");
902  fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
903  timing[TOTAL_SOLVE_T]);
904  fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
905  fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
906  fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
907  timing[SUB_GRAPH_T]);
908  fprintf_dh (fp, " factorization: %0.2f\n", timing[FACTOR_T]);
909  fprintf_dh (fp, " solve setup: %0.2f\n",
910  timing[SOLVE_SETUP_T]);
911  fprintf_dh (fp, " rho: %0.2f\n",
912  ctx->timing[COMPUTE_RHO_T]);
913  fprintf_dh (fp, " misc (should be small): %0.2f\n",
914  timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
915  timing[SOLVE_SETUP_T] +
916  timing[COMPUTE_RHO_T]));
917 
918  if (ctx->sg != NULL)
919  {
920  SubdomainGraph_dhPrintStats (ctx->sg, fp);
921  CHECK_V_ERROR;
922  SubdomainGraph_dhPrintRatios (ctx->sg, fp);
923  CHECK_V_ERROR;
924  }
925 
926 
927  fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n");
928  fprintf_dh (fp, "---------------------------------------------------\n");
929  fprintf_dh (fp, " solve method: %s\n", ctx->krylovMethod);
930  fprintf_dh (fp, " maxIts: %i\n", ctx->maxIts);
931  fprintf_dh (fp, " rtol: %g\n", ctx->rtol);
932  fprintf_dh (fp, " atol: %g\n", ctx->atol);
933  fprintf_dh (fp,
934  "\n==================== Euclid report (end) ======================\n");
935 END_FUNC_DH}
936 
937 
938 /* nzA ratio and rho refer to most recent solve, if more than
939  one solve (call to Setup) was performed. Other stats
940  are cumulative.
941 */
942 #undef __FUNC__
943 #define __FUNC__ "Euclid_dhPrintStatsShort"
944 void
945 Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve,
946  FILE * fp)
947 {
948  START_FUNC_DH double *timing = ctx->timing;
949  /* double *stats = ctx->stats; */
950  /* double setup_factor; */
951  /* double setup_other; */
952  double apply_total;
953  double apply_per_it;
954  /* double nzUsedRatio; */
955  double perIt;
956  int blocks = np_dh;
957 
958  if (np_dh == 1)
959  blocks = ctx->sg->blocks;
960 
961  reduce_timings_private (ctx);
962  CHECK_V_ERROR;
963 
964  /* setup_factor = timing[FACTOR_T]; */
965  /* setup_other = timing[SETUP_T] - setup_factor; */
966  apply_total = timing[TRI_SOLVE_T];
967  apply_per_it = apply_total / (double) ctx->its;
968  /* nzUsedRatio = stats[NZA_RATIO_STATS]; */
969  perIt = solve / (double) ctx->its;
970 
971  fprintf_dh (fp, "\n");
972  fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
973  "method", "subdms", "level", "its", "setup", "solve", "total",
974  "perIt", "perIt", "rows");
975  fprintf_dh (fp,
976  "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
977  fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g XXX\n", ctx->algo_par, /* parallelization strategy [pilu, bj] */
978  blocks, /* number of subdomains */
979  ctx->level, /* level, for ILU(k) */
980  ctx->its, /* iterations */
981  setup, /* total setup time, from caller */
982  solve, /* total setup time, from caller */
983  setup + solve, /* total time, from caller */
984  perIt, /* time per iteration, solver+precond. */
985  apply_per_it, /* time per iteration, solver+precond. */
986  (double) ctx->n /* global unknnowns */
987  );
988 
989 
990 
991 
992 #if 0
993  fprintf_dh (fp, "\n");
994  fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
995  "", "", "", "", "", "setup", "setup", "", "", "", "", "", "");
996 
997  fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
998  "method", "subdms", "level", "its", "total", "factor",
999  "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows");
1000  fprintf_dh (fp,
1001  "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
1002 
1003 
1004  fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g XXX\n", ctx->algo_par, /* parallelization strategy [pilu, bj] */
1005  blocks, /* number of subdomains */
1006  ctx->level, /* level, for ILU(k) */
1007  ctx->its, /* iterations */
1008  setup, /* total setup time, from caller */
1009  solve, /* total setup time, from caller */
1010  setup_factor, /* pc solve: factorization */
1011  setup_other, /* pc setup: other */
1012  apply_total, /* triangular solve time */
1013  apply_per_it, /* time for one triangular solve */
1014  ctx->rho_final, /* rho */
1015  ctx->sparseTolA, /* sparseA tolerance */
1016  nzUsedRatio, /* percent of A that was used */
1017  (double) ctx->n /* global unknnowns */
1018  );
1019 #endif
1020 
1021 #if 0
1022  /* special: for scalability studies */
1023  fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level",
1024  "subGph", "factor", "solveS", "perIt");
1025  fprintf_dh (fp, "------ ----- ----- ----- ----- ----- WW\n");
1026  fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n",
1027  ctx->algo_par,
1028  ctx->level,
1029  timing[SUB_GRAPH_T],
1030  timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it);
1031 #endif
1032 END_FUNC_DH}
1033 
1034 
1035 /* its during last solve; rho; nzaUsed */
1036 #undef __FUNC__
1037 #define __FUNC__ "Euclid_dhPrintStatsShorter"
1038 void
1039 Euclid_dhPrintStatsShorter (Euclid_dh ctx, FILE * fp)
1040 {
1041  START_FUNC_DH double *stats = ctx->stats;
1042 
1043  int its = ctx->its;
1044  double rho = ctx->rho_final;
1045  double nzUsedRatio = stats[NZA_RATIO_STATS];
1046 
1047 
1048  fprintf_dh (fp, "\nStats from last linear solve: YY\n");
1049  fprintf_dh (fp, "%6s %6s %6s YY\n", "its", "rho", "A_%");
1050  fprintf_dh (fp, " ----- ----- ----- YY\n");
1051  fprintf_dh (fp, "%6i %6.2f %6.2f YYY\n", its, rho, nzUsedRatio);
1052 
1053 END_FUNC_DH}
1054 
1055 #undef __FUNC__
1056 #define __FUNC__ "Euclid_dhPrintScaling"
1057 void
1058 Euclid_dhPrintScaling (Euclid_dh ctx, FILE * fp)
1059 {
1060  START_FUNC_DH int i, m = ctx->m;
1061 
1062  if (m > 10)
1063  m = 10;
1064 
1065  if (ctx->scale == NULL)
1066  {
1067  SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?");
1068  }
1069 
1070  fprintf (fp, "\n---------- 1st %i row scaling values:\n", m);
1071  for (i = 0; i < m; ++i)
1072  {
1073  fprintf (fp, " %i %g \n", i + 1, ctx->scale[i]);
1074  }
1075 END_FUNC_DH}
1076 
1077 
1078 #undef __FUNC__
1079 #define __FUNC__ "reduce_timings_private"
1080 void
1081 reduce_timings_private (Euclid_dh ctx)
1082 {
1083  START_FUNC_DH if (np_dh > 1)
1084  {
1085  double bufOUT[TIMING_BINS];
1086 
1087  memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double));
1088  MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
1089  comm_dh);
1090  }
1091 
1092  ctx->timingsWereReduced = true;
1093 END_FUNC_DH}
1094 
1095 #undef __FUNC__
1096 #define __FUNC__ "Euclid_dhPrintHypreReport"
1097 void
1098 Euclid_dhPrintHypreReport (Euclid_dh ctx, FILE * fp)
1099 {
1100  START_FUNC_DH double *timing;
1101  int nz;
1102 
1103  nz = Factor_dhReadNz (ctx->F);
1104  CHECK_V_ERROR;
1105  timing = ctx->timing;
1106 
1107  /* add in timing from lasst setup (if any) */
1108  ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
1109  ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
1110 
1111  reduce_timings_private (ctx);
1112  CHECK_V_ERROR;
1113 
1114  if (myid_dh == 0)
1115  {
1116 
1117  fprintf (fp,
1118  "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
1119  fprintf_dh (fp, "\nruntime parameters\n");
1120  fprintf_dh (fp, "------------------\n");
1121  fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
1122  fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
1123  fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
1124  fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
1125  if (!strcmp (ctx->algo_ilu, "iluk"))
1126  {
1127  fprintf_dh (fp, " level: %i\n", ctx->level);
1128  }
1129 
1130  if (ctx->isScaled)
1131  {
1132  fprintf_dh (fp, " matrix was row scaled\n");
1133  }
1134 
1135  fprintf_dh (fp, " global matrix row count: %i\n", ctx->n);
1136  fprintf_dh (fp, " nzF: %i\n", nz);
1137  fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
1138  fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
1139 
1140  fprintf_dh (fp, "\nEuclid timing report\n");
1141  fprintf_dh (fp, "--------------------\n");
1142  fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
1143  timing[TOTAL_SOLVE_T]);
1144  fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
1145  fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
1146  fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
1147  timing[SUB_GRAPH_T]);
1148  fprintf_dh (fp, " factorization: %0.2f\n",
1149  timing[FACTOR_T]);
1150  fprintf_dh (fp, " solve setup: %0.2f\n",
1151  timing[SOLVE_SETUP_T]);
1152  fprintf_dh (fp, " rho: %0.2f\n",
1153  ctx->timing[COMPUTE_RHO_T]);
1154  fprintf_dh (fp, " misc (should be small): %0.2f\n",
1155  timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] +
1156  timing[SOLVE_SETUP_T] +
1157  timing[COMPUTE_RHO_T]));
1158 
1159  if (ctx->sg != NULL)
1160  {
1161  SubdomainGraph_dhPrintStats (ctx->sg, fp);
1162  CHECK_V_ERROR;
1163  SubdomainGraph_dhPrintRatios (ctx->sg, fp);
1164  CHECK_V_ERROR;
1165  }
1166 
1167  fprintf (fp,
1168  "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
1169 
1170  }
1171 
1172 END_FUNC_DH}
1173 
1174 #undef __FUNC__
1175 #define __FUNC__ "Euclid_dhPrintTestData"
1176 void
1177 Euclid_dhPrintTestData (Euclid_dh ctx, FILE * fp)
1178 {
1179  START_FUNC_DH
1180  /* Print data that should remain that will hopefully
1181  remain the same for any platform.
1182  Possibly "tri solves" may change . . .
1183  */
1184  if (myid_dh == 0)
1185  {
1186  fprintf (fp, " setups: %i\n", ctx->setupCount);
1187  fprintf (fp, " tri solves: %i\n", ctx->its);
1188  fprintf (fp, " parallelization method: %s\n", ctx->algo_par);
1189  fprintf (fp, " factorization method: %s\n", ctx->algo_ilu);
1190  fprintf (fp, " level: %i\n", ctx->level);
1191  fprintf (fp, " row scaling: %i\n", ctx->isScaled);
1192  }
1193  SubdomainGraph_dhPrintRatios (ctx->sg, fp);
1194  CHECK_V_ERROR;
1195 END_FUNC_DH}
Definition: Mat_dh.h:62
Definition: Vec_dh.h:52