IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
ilu_seq.c
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 /* to do: re-integrate fix-smalll-pivots */
44 
45 #include "ilu_dh.h"
46 #include "Mem_dh.h"
47 #include "Parser_dh.h"
48 #include "Euclid_dh.h"
49 #include "getRow_dh.h"
50 #include "Factor_dh.h"
51 #include "SubdomainGraph_dh.h"
52 
53 static bool check_constraint_private (Euclid_dh ctx, int b, int j);
54 
55 static int symbolic_row_private (int localRow,
56  int *list, int *marker, int *tmpFill,
57  int len, int *CVAL, double *AVAL,
58  int *o2n_col, Euclid_dh ctx, bool debug);
59 
60 static int numeric_row_private (int localRow,
61  int len, int *CVAL, double *AVAL,
62  REAL_DH * work, int *o2n_col, Euclid_dh ctx,
63  bool debug);
64 
65 
66 #undef __FUNC__
67 #define __FUNC__ "compute_scaling_private"
68 void
69 compute_scaling_private (int row, int len, double *AVAL, Euclid_dh ctx)
70 {
71  START_FUNC_DH double tmp = 0.0;
72  int j;
73 
74  for (j = 0; j < len; ++j)
75  tmp = MAX (tmp, fabs (AVAL[j]));
76  if (tmp)
77  {
78  ctx->scale[row] = 1.0 / tmp;
79  }
80 END_FUNC_DH}
81 
82 #if 0
83 
84 /* not used ? */
85 #undef __FUNC__
86 #define __FUNC__ "fixPivot_private"
87 double
88 fixPivot_private (int row, int len, float *vals)
89 {
90  START_FUNC_DH int i;
91  float max = 0.0;
92  bool debug = false;
93 
94  for (i = 0; i < len; ++i)
95  {
96  float tmp = fabs (vals[i]);
97  max = MAX (max, tmp);
98  }
99 END_FUNC_VAL (max * ctxPrivate->pivotFix)}
100 
101 #endif
102 
103 
104 
105 
106 #undef __FUNC__
107 #define __FUNC__ "iluk_seq"
108 void
109 iluk_seq (Euclid_dh ctx)
110 {
111  START_FUNC_DH int *rp, *cval, *diag;
112  int *CVAL;
113  int i, j, len, count, col, idx = 0;
114  int *list, *marker, *fill, *tmpFill;
115  int temp, m, from = ctx->from, to = ctx->to;
116  int *n2o_row, *o2n_col, beg_row, beg_rowP;
117  double *AVAL;
118  REAL_DH *work, *aval;
119  Factor_dh F = ctx->F;
120  SubdomainGraph_dh sg = ctx->sg;
121  bool debug = false;
122 
123  if (logFile != NULL && Parser_dhHasSwitch (parser_dh, "-debug_ilu"))
124  debug = true;
125 
126  m = F->m;
127  rp = F->rp;
128  cval = F->cval;
129  fill = F->fill;
130  diag = F->diag;
131  aval = F->aval;
132  work = ctx->work;
133  count = rp[from];
134 
135  if (sg == NULL)
136  {
137  SET_V_ERROR ("subdomain graph is NULL");
138  }
139 
140  n2o_row = ctx->sg->n2o_row;
141  o2n_col = ctx->sg->o2n_col;
142  beg_row = ctx->sg->beg_row[myid_dh];
143  beg_rowP = ctx->sg->beg_rowP[myid_dh];
144 
145  /* allocate and initialize working space */
146  list = (int *) MALLOC_DH ((m + 1) * sizeof (int));
147  CHECK_V_ERROR;
148  marker = (int *) MALLOC_DH (m * sizeof (int));
149  CHECK_V_ERROR;
150  tmpFill = (int *) MALLOC_DH (m * sizeof (int));
151  CHECK_V_ERROR;
152  for (i = 0; i < m; ++i)
153  marker[i] = -1;
154 
155  /* working space for values */
156  for (i = 0; i < m; ++i)
157  work[i] = 0.0;
158 
159 /* printf_dh("====================== starting iluk_seq; level= %i\n\n", ctx->level);
160 */
161 
162 
163  /*---------- main loop ----------*/
164 
165  for (i = from; i < to; ++i)
166  {
167  int row = n2o_row[i]; /* local row number */
168  int globalRow = row + beg_row; /* global row number */
169 
170 /*fprintf(logFile, "--------------------------------- localRow= %i\n", 1+i);
171 */
172 
173  if (debug)
174  {
175  fprintf (logFile,
176  "ILU_seq ================================= starting local row: %i, (global= %i) level= %i\n",
177  i + 1, i + 1 + sg->beg_rowP[myid_dh], ctx->level);
178  }
179 
180  EuclidGetRow (ctx->A, globalRow, &len, &CVAL, &AVAL);
181  CHECK_V_ERROR;
182 
183  /* compute scaling value for row(i) */
184  if (ctx->isScaled)
185  {
186  compute_scaling_private (i, len, AVAL, ctx);
187  CHECK_V_ERROR;
188  }
189 
190  /* Compute symbolic factor for row(i);
191  this also performs sparsification
192  */
193  count = symbolic_row_private (i, list, marker, tmpFill,
194  len, CVAL, AVAL, o2n_col, ctx, debug);
195  CHECK_V_ERROR;
196 
197  /* Ensure adequate storage; reallocate, if necessary. */
198  if (idx + count > F->alloc)
199  {
200  Factor_dhReallocate (F, idx, count);
201  CHECK_V_ERROR;
202  SET_INFO ("REALLOCATED from ilu_seq");
203  cval = F->cval;
204  fill = F->fill;
205  aval = F->aval;
206  }
207 
208  /* Copy factored symbolic row to permanent storage */
209  col = list[m];
210  while (count--)
211  {
212  cval[idx] = col;
213  fill[idx] = tmpFill[col];
214  ++idx;
215 /*fprintf(logFile, " col= %i\n", 1+col);
216 */
217  col = list[col];
218  }
219 
220  /* add row-pointer to start of next row. */
221  rp[i + 1] = idx;
222 
223  /* Insert pointer to diagonal */
224  temp = rp[i];
225  while (cval[temp] != i)
226  ++temp;
227  diag[i] = temp;
228 
229 /*fprintf(logFile, " diag[i]= %i\n", diag);
230 */
231 
232  /* compute numeric factor for current row */
233  numeric_row_private (i, len, CVAL, AVAL, work, o2n_col, ctx, debug);
234  CHECK_V_ERROR EuclidRestoreRow (ctx->A, globalRow, &len, &CVAL, &AVAL);
235  CHECK_V_ERROR;
236 
237  /* Copy factored numeric row to permanent storage,
238  and re-zero work vector
239  */
240  if (debug)
241  {
242  fprintf (logFile, "ILU_seq: ");
243  for (j = rp[i]; j < rp[i + 1]; ++j)
244  {
245  col = cval[j];
246  aval[j] = work[col];
247  work[col] = 0.0;
248  fprintf (logFile, "%i,%i,%g ; ", 1 + cval[j], fill[j], aval[j]);
249  fflush (logFile);
250  }
251  fprintf (logFile, "\n");
252  }
253  else
254  {
255  for (j = rp[i]; j < rp[i + 1]; ++j)
256  {
257  col = cval[j];
258  aval[j] = work[col];
259  work[col] = 0.0;
260  }
261  }
262 
263  /* check for zero diagonal */
264  if (!aval[diag[i]])
265  {
266  sprintf (msgBuf_dh, "zero diagonal in local row %i", i + 1);
267  SET_V_ERROR (msgBuf_dh);
268  }
269  }
270 
271  FREE_DH (list);
272  CHECK_V_ERROR;
273  FREE_DH (tmpFill);
274  CHECK_V_ERROR;
275  FREE_DH (marker);
276  CHECK_V_ERROR;
277 
278  /* adjust column indices back to global */
279  if (beg_rowP)
280  {
281  int start = rp[from];
282  int stop = rp[to];
283  for (i = start; i < stop; ++i)
284  cval[i] += beg_rowP;
285  }
286 
287  /* for debugging: this is so the Print methods will work, even if
288  F hasn't been fully factored
289  */
290  for (i = to + 1; i < m; ++i)
291  rp[i] = 0;
292 
293 END_FUNC_DH}
294 
295 
296 #undef __FUNC__
297 #define __FUNC__ "iluk_seq_block"
298 void
299 iluk_seq_block (Euclid_dh ctx)
300 {
301  START_FUNC_DH int *rp, *cval, *diag;
302  int *CVAL;
303  int h, i, j, len, count, col, idx = 0;
304  int *list, *marker, *fill, *tmpFill;
305  int temp, m;
306  int *n2o_row, *o2n_col, *beg_rowP, *n2o_sub, blocks;
307  int *row_count, *dummy = NULL, dummy2[1];
308  double *AVAL;
309  REAL_DH *work, *aval;
310  Factor_dh F = ctx->F;
311  SubdomainGraph_dh sg = ctx->sg;
312  bool bj = false, constrained = false;
313  int discard = 0;
314  int gr = -1; /* globalRow */
315  bool debug = false;
316 
317  if (logFile != NULL && Parser_dhHasSwitch (parser_dh, "-debug_ilu"))
318  debug = true;
319 
320 /*fprintf(stderr, "====================== starting iluk_seq_block; level= %i\n\n", ctx->level);
321 */
322 
323  if (!strcmp (ctx->algo_par, "bj"))
324  bj = true;
325  constrained = !Parser_dhHasSwitch (parser_dh, "-unconstrained");
326 
327  m = F->m;
328  rp = F->rp;
329  cval = F->cval;
330  fill = F->fill;
331  diag = F->diag;
332  aval = F->aval;
333  work = ctx->work;
334 
335  if (sg != NULL)
336  {
337  n2o_row = sg->n2o_row;
338  o2n_col = sg->o2n_col;
339  row_count = sg->row_count;
340  /* beg_row = sg->beg_row ; */
341  beg_rowP = sg->beg_rowP;
342  n2o_sub = sg->n2o_sub;
343  blocks = sg->blocks;
344  }
345 
346  else
347  {
348  dummy = (int *) MALLOC_DH (m * sizeof (int));
349  CHECK_V_ERROR;
350  for (i = 0; i < m; ++i)
351  dummy[i] = i;
352  n2o_row = dummy;
353  o2n_col = dummy;
354  dummy2[0] = m;
355  row_count = dummy2;
356  /* beg_row = 0; */
357  beg_rowP = dummy;
358  n2o_sub = dummy;
359  blocks = 1;
360  }
361 
362  /* allocate and initialize working space */
363  list = (int *) MALLOC_DH ((m + 1) * sizeof (int));
364  CHECK_V_ERROR;
365  marker = (int *) MALLOC_DH (m * sizeof (int));
366  CHECK_V_ERROR;
367  tmpFill = (int *) MALLOC_DH (m * sizeof (int));
368  CHECK_V_ERROR;
369  for (i = 0; i < m; ++i)
370  marker[i] = -1;
371 
372  /* working space for values */
373  for (i = 0; i < m; ++i)
374  work[i] = 0.0;
375 
376  /*---------- main loop ----------*/
377 
378  for (h = 0; h < blocks; ++h)
379  {
380  /* 1st and last row in current block, with respect to A */
381  int curBlock = n2o_sub[h];
382  int first_row = beg_rowP[curBlock];
383  int end_row = first_row + row_count[curBlock];
384 
385  if (debug)
386  {
387  fprintf (logFile, "\n\nILU_seq BLOCK: %i @@@@@@@@@@@@@@@ \n",
388  curBlock);
389  }
390 
391  for (i = first_row; i < end_row; ++i)
392  {
393  int row = n2o_row[i];
394  ++gr;
395 
396  if (debug)
397  {
398  fprintf (logFile,
399  "ILU_seq global: %i local: %i =================================\n",
400  1 + gr, 1 + i - first_row);
401  }
402 
403 /*prinft("first_row= %i end_row= %i\n", first_row, end_row);
404 */
405 
406  EuclidGetRow (ctx->A, row, &len, &CVAL, &AVAL);
407  CHECK_V_ERROR;
408 
409  /* compute scaling value for row(i) */
410  if (ctx->isScaled)
411  {
412  compute_scaling_private (i, len, AVAL, ctx);
413  CHECK_V_ERROR;
414  }
415 
416  /* Compute symbolic factor for row(i);
417  this also performs sparsification
418  */
419  count = symbolic_row_private (i, list, marker, tmpFill,
420  len, CVAL, AVAL, o2n_col, ctx, debug);
421  CHECK_V_ERROR;
422 
423  /* Ensure adequate storage; reallocate, if necessary. */
424  if (idx + count > F->alloc)
425  {
426  Factor_dhReallocate (F, idx, count);
427  CHECK_V_ERROR;
428  SET_INFO ("REALLOCATED from ilu_seq");
429  cval = F->cval;
430  fill = F->fill;
431  aval = F->aval;
432  }
433 
434  /* Copy factored symbolic row to permanent storage */
435  col = list[m];
436  while (count--)
437  {
438 
439  /* constrained pilu */
440  if (constrained && !bj)
441  {
442  if (col >= first_row && col < end_row)
443  {
444  cval[idx] = col;
445  fill[idx] = tmpFill[col];
446  ++idx;
447  }
448  else
449  {
450  if (check_constraint_private (ctx, curBlock, col))
451  {
452  cval[idx] = col;
453  fill[idx] = tmpFill[col];
454  ++idx;
455  }
456  else
457  {
458  ++discard;
459  }
460  }
461  col = list[col];
462  }
463 
464  /* block jacobi case */
465  else if (bj)
466  {
467  if (col >= first_row && col < end_row)
468  {
469  cval[idx] = col;
470  fill[idx] = tmpFill[col];
471  ++idx;
472  }
473  else
474  {
475  ++discard;
476  }
477  col = list[col];
478  }
479 
480  /* general case */
481  else
482  {
483  cval[idx] = col;
484  fill[idx] = tmpFill[col];
485  ++idx;
486  col = list[col];
487  }
488  }
489 
490  /* add row-pointer to start of next row. */
491  rp[i + 1] = idx;
492 
493  /* Insert pointer to diagonal */
494  temp = rp[i];
495  while (cval[temp] != i)
496  ++temp;
497  diag[i] = temp;
498 
499  /* compute numeric factor for current row */
500  numeric_row_private (i, len, CVAL, AVAL, work, o2n_col, ctx, debug);
501  CHECK_V_ERROR EuclidRestoreRow (ctx->A, row, &len, &CVAL, &AVAL);
502  CHECK_V_ERROR;
503 
504  /* Copy factored numeric row to permanent storage,
505  and re-zero work vector
506  */
507  if (debug)
508  {
509  fprintf (logFile, "ILU_seq: ");
510  for (j = rp[i]; j < rp[i + 1]; ++j)
511  {
512  col = cval[j];
513  aval[j] = work[col];
514  work[col] = 0.0;
515  fprintf (logFile, "%i,%i,%g ; ", 1 + cval[j], fill[j],
516  aval[j]);
517  }
518  fprintf (logFile, "\n");
519  }
520 
521  /* normal operation */
522  else
523  {
524  for (j = rp[i]; j < rp[i + 1]; ++j)
525  {
526  col = cval[j];
527  aval[j] = work[col];
528  work[col] = 0.0;
529  }
530  }
531 
532  /* check for zero diagonal */
533  if (!aval[diag[i]])
534  {
535  sprintf (msgBuf_dh, "zero diagonal in local row %i", i + 1);
536  SET_V_ERROR (msgBuf_dh);
537  }
538  }
539  }
540 
541 /* printf("bj= %i constrained= %i discarded= %i\n", bj, constrained, discard); */
542 
543  if (dummy != NULL)
544  {
545  FREE_DH (dummy);
546  CHECK_V_ERROR;
547  }
548  FREE_DH (list);
549  CHECK_V_ERROR;
550  FREE_DH (tmpFill);
551  CHECK_V_ERROR;
552  FREE_DH (marker);
553  CHECK_V_ERROR;
554 
555 END_FUNC_DH}
556 
557 
558 
559 /* Computes ILU(K) factor of a single row; returns fill
560  count for the row. Explicitly inserts diag if not already
561  present. On return, all column indices are local
562  (i.e, referenced to 0).
563 */
564 #undef __FUNC__
565 #define __FUNC__ "symbolic_row_private"
566 int
567 symbolic_row_private (int localRow,
568  int *list, int *marker, int *tmpFill,
569  int len, int *CVAL, double *AVAL,
570  int *o2n_col, Euclid_dh ctx, bool debug)
571 {
572  START_FUNC_DH int level = ctx->level, m = ctx->F->m;
573  int *cval = ctx->F->cval, *diag = ctx->F->diag, *rp = ctx->F->rp;
574  int *fill = ctx->F->fill;
575  int count = 0;
576  int j, node, tmp, col, head;
577  int fill1, fill2, beg_row;
578  double val;
579  double thresh = ctx->sparseTolA;
580  REAL_DH scale;
581 
582  scale = ctx->scale[localRow];
583  ctx->stats[NZA_STATS] += (double) len;
584  beg_row = ctx->sg->beg_row[myid_dh];
585 
586  /* Insert col indices in linked list, and values in work vector.
587  * List[m] points to the first (smallest) col in the linked list.
588  * Column values are adjusted from global to local numbering.
589  */
590  list[m] = m;
591  for (j = 0; j < len; ++j)
592  {
593  tmp = m;
594  col = *CVAL++;
595  col -= beg_row; /* adjust to zero based */
596  col = o2n_col[col]; /* permute the column */
597  val = *AVAL++;
598  val *= scale; /* scale the value */
599 
600  if (fabs (val) > thresh || col == localRow)
601  { /* sparsification */
602  ++count;
603  while (col > list[tmp])
604  tmp = list[tmp];
605  list[col] = list[tmp];
606  list[tmp] = col;
607  tmpFill[col] = 0;
608  marker[col] = localRow;
609  }
610  }
611 
612  /* insert diag if not already present */
613  if (marker[localRow] != localRow)
614  {
615  tmp = m;
616  while (localRow > list[tmp])
617  tmp = list[tmp];
618  list[localRow] = list[tmp];
619  list[tmp] = localRow;
620  tmpFill[localRow] = 0;
621  marker[localRow] = localRow;
622  ++count;
623  }
624  ctx->stats[NZA_USED_STATS] += (double) count;
625 
626  /* update row from previously factored rows */
627  head = m;
628  if (level > 0)
629  {
630  while (list[head] < localRow)
631  {
632  node = list[head];
633  fill1 = tmpFill[node];
634 
635  if (debug)
636  {
637  fprintf (logFile, "ILU_seq sf updating from row: %i\n",
638  1 + node);
639  }
640 
641  if (fill1 < level)
642  {
643  for (j = diag[node] + 1; j < rp[node + 1]; ++j)
644  {
645  col = cval[j];
646  fill2 = fill1 + fill[j] + 1;
647 
648  if (fill2 <= level)
649  {
650  /* if newly discovered fill entry, mark it as discovered;
651  * if entry has level <= K, add it to the linked-list.
652  */
653  if (marker[col] < localRow)
654  {
655  tmp = head;
656  marker[col] = localRow;
657  tmpFill[col] = fill2;
658  while (col > list[tmp])
659  tmp = list[tmp];
660  list[col] = list[tmp];
661  list[tmp] = col;
662  ++count; /* increment fill count */
663  }
664 
665  /* if previously-discovered fill, update the entry's level. */
666  else
667  {
668  tmpFill[col] =
669  (fill2 < tmpFill[col]) ? fill2 : tmpFill[col];
670  }
671  }
672  }
673  } /* fill1 < level */
674  head = list[head]; /* advance to next item in linked list */
675  }
676  }
677 END_FUNC_VAL (count)}
678 
679 
680 #undef __FUNC__
681 #define __FUNC__ "numeric_row_private"
682 int
683 numeric_row_private (int localRow,
684  int len, int *CVAL, double *AVAL,
685  REAL_DH * work, int *o2n_col, Euclid_dh ctx, bool debug)
686 {
687  START_FUNC_DH double pc, pv, multiplier;
688  int j, k, col, row;
689  int *rp = ctx->F->rp, *cval = ctx->F->cval;
690  int *diag = ctx->F->diag;
691  int beg_row;
692  double val;
693  REAL_DH *aval = ctx->F->aval, scale;
694 
695  scale = ctx->scale[localRow];
696  beg_row = ctx->sg->beg_row[myid_dh];
697 
698  /* zero work vector */
699  /* note: indices in col[] are already permuted. */
700  for (j = rp[localRow]; j < rp[localRow + 1]; ++j)
701  {
702  col = cval[j];
703  work[col] = 0.0;
704  }
705 
706  /* init work vector with values from A */
707  /* (note: some values may be na due to sparsification; this is O.K.) */
708  for (j = 0; j < len; ++j)
709  {
710  col = *CVAL++;
711  col -= beg_row;
712  val = *AVAL++;
713  col = o2n_col[col]; /* note: we permute the indices from A */
714  work[col] = val * scale;
715  }
716 
717 
718 
719 /*fprintf(stderr, "local row= %i\n", 1+localRow);
720 */
721 
722 
723  for (j = rp[localRow]; j < diag[localRow]; ++j)
724  {
725  row = cval[j]; /* previously factored row */
726  pc = work[row];
727 
728 
729  pv = aval[diag[row]]; /* diagonal of previously factored row */
730 
731 /*
732 if (pc == 0.0 || pv == 0.0) {
733 fprintf(stderr, "pv= %g; pc= %g\n", pv, pc);
734 }
735 */
736 
737  if (pc != 0.0 && pv != 0.0)
738  {
739  multiplier = pc / pv;
740  work[row] = multiplier;
741 
742  if (debug)
743  {
744  fprintf (logFile,
745  "ILU_seq nf updating from row: %i; multiplier= %g\n",
746  1 + row, multiplier);
747  }
748 
749  for (k = diag[row] + 1; k < rp[row + 1]; ++k)
750  {
751  col = cval[k];
752  work[col] -= (multiplier * aval[k]);
753  }
754  }
755  else
756  {
757  if (debug)
758  {
759  fprintf (logFile,
760  "ILU_seq nf NO UPDATE from row %i; pc = %g; pv = %g\n",
761  1 + row, pc, pv);
762  }
763  }
764  }
765 
766  /* check for zero or too small of a pivot */
767 #if 0
768  if (fabs (work[i]) <= pivotTol)
769  {
770  /* yuck! assume row scaling, and just stick in a value */
771  aval[diag[i]] = pivotFix;
772  }
773 #endif
774 
775 END_FUNC_VAL (0)}
776 
777 
778 /*-----------------------------------------------------------------------*
779  * ILUT starts here
780  *-----------------------------------------------------------------------*/
781 int ilut_row_private (int localRow, int *list, int *o2n_col, int *marker,
782  int len, int *CVAL, double *AVAL,
783  REAL_DH * work, Euclid_dh ctx, bool debug);
784 
785 #undef __FUNC__
786 #define __FUNC__ "ilut_seq"
787 void
788 ilut_seq (Euclid_dh ctx)
789 {
790  START_FUNC_DH int *rp, *cval, *diag, *CVAL;
791  int i, len, count, col, idx = 0;
792  int *list, *marker;
793  int temp, m, from, to;
794  int *n2o_row, *o2n_col, beg_row, beg_rowP;
795  double *AVAL, droptol;
796  REAL_DH *work, *aval, val;
797  Factor_dh F = ctx->F;
798  SubdomainGraph_dh sg = ctx->sg;
799  bool debug = false;
800 
801  if (logFile != NULL && Parser_dhHasSwitch (parser_dh, "-debug_ilu"))
802  debug = true;
803 
804  m = F->m;
805  rp = F->rp;
806  cval = F->cval;
807  diag = F->diag;
808  aval = F->aval;
809  work = ctx->work;
810  from = ctx->from;
811  to = ctx->to;
812  count = rp[from];
813  droptol = ctx->droptol;
814 
815  if (sg == NULL)
816  {
817  SET_V_ERROR ("subdomain graph is NULL");
818  }
819 
820  n2o_row = ctx->sg->n2o_row;
821  o2n_col = ctx->sg->o2n_col;
822  beg_row = ctx->sg->beg_row[myid_dh];
823  beg_rowP = ctx->sg->beg_rowP[myid_dh];
824 
825 
826  /* allocate and initialize working space */
827  list = (int *) MALLOC_DH ((m + 1) * sizeof (int));
828  CHECK_V_ERROR;
829  marker = (int *) MALLOC_DH (m * sizeof (int));
830  CHECK_V_ERROR;
831  for (i = 0; i < m; ++i)
832  marker[i] = -1;
833  rp[0] = 0;
834 
835  /* working space for values */
836  for (i = 0; i < m; ++i)
837  work[i] = 0.0;
838 
839  /* ----- main loop start ----- */
840  for (i = from; i < to; ++i)
841  {
842  int row = n2o_row[i]; /* local row number */
843  int globalRow = row + beg_row; /* global row number */
844  EuclidGetRow (ctx->A, globalRow, &len, &CVAL, &AVAL);
845  CHECK_V_ERROR;
846 
847  /* compute scaling value for row(i) */
848  compute_scaling_private (i, len, AVAL, ctx);
849  CHECK_V_ERROR;
850 
851  /* compute factor for row i */
852  count = ilut_row_private (i, list, o2n_col, marker,
853  len, CVAL, AVAL, work, ctx, debug);
854  CHECK_V_ERROR;
855 
856  EuclidRestoreRow (ctx->A, globalRow, &len, &CVAL, &AVAL);
857  CHECK_V_ERROR;
858 
859  /* Ensure adequate storage; reallocate, if necessary. */
860  if (idx + count > F->alloc)
861  {
862  Factor_dhReallocate (F, idx, count);
863  CHECK_V_ERROR;
864  SET_INFO ("REALLOCATED from ilu_seq");
865  cval = F->cval;
866  aval = F->aval;
867  }
868 
869  /* Copy factored row to permanent storage,
870  apply 2nd drop test,
871  and re-zero work vector
872  */
873  col = list[m];
874  while (count--)
875  {
876  val = work[col];
877  if (col == i || fabs (val) > droptol)
878  {
879  cval[idx] = col;
880  aval[idx++] = val;
881  work[col] = 0.0;
882  }
883  col = list[col];
884  }
885 
886  /* add row-pointer to start of next row. */
887  rp[i + 1] = idx;
888 
889  /* Insert pointer to diagonal */
890  temp = rp[i];
891  while (cval[temp] != i)
892  ++temp;
893  diag[i] = temp;
894 
895  /* check for zero diagonal */
896  if (!aval[diag[i]])
897  {
898  sprintf (msgBuf_dh, "zero diagonal in local row %i", i + 1);
899  SET_V_ERROR (msgBuf_dh);
900  }
901  } /* --------- main loop end --------- */
902 
903  /* adjust column indices back to global */
904  if (beg_rowP)
905  {
906  int start = rp[from];
907  int stop = rp[to];
908  for (i = start; i < stop; ++i)
909  cval[i] += beg_rowP;
910  }
911 
912  FREE_DH (list);
913  FREE_DH (marker);
914 END_FUNC_DH}
915 
916 
917 #undef __FUNC__
918 #define __FUNC__ "ilut_row_private"
919 int
920 ilut_row_private (int localRow, int *list, int *o2n_col, int *marker,
921  int len, int *CVAL, double *AVAL,
922  REAL_DH * work, Euclid_dh ctx, bool debug)
923 {
924  START_FUNC_DH Factor_dh F = ctx->F;
925  int j, col, m = ctx->m, *rp = F->rp, *cval = F->cval;
926  int tmp, *diag = F->diag;
927  int head;
928  int count = 0, beg_row;
929  double val;
930  double mult, *aval = F->aval;
931  double scale, pv, pc;
932  double droptol = ctx->droptol;
933  double thresh = ctx->sparseTolA;
934 
935  scale = ctx->scale[localRow];
936  ctx->stats[NZA_STATS] += (double) len;
937  beg_row = ctx->sg->beg_row[myid_dh];
938 
939 
940  /* Insert col indices in linked list, and values in work vector.
941  * List[m] points to the first (smallest) col in the linked list.
942  * Column values are adjusted from global to local numbering.
943  */
944  list[m] = m;
945  for (j = 0; j < len; ++j)
946  {
947  tmp = m;
948  col = *CVAL++;
949  col -= beg_row; /* adjust to zero based */
950  col = o2n_col[col]; /* permute the column */
951  val = *AVAL++;
952  val *= scale; /* scale the value */
953 
954  if (fabs (val) > thresh || col == localRow)
955  { /* sparsification */
956  ++count;
957  while (col > list[tmp])
958  tmp = list[tmp];
959  list[col] = list[tmp];
960  list[tmp] = col;
961  work[col] = val;
962  marker[col] = localRow;
963  }
964  }
965 
966  /* insert diag if not already present */
967  if (marker[localRow] != localRow)
968  {
969  tmp = m;
970  while (localRow > list[tmp])
971  tmp = list[tmp];
972  list[localRow] = list[tmp];
973  list[tmp] = localRow;
974  marker[localRow] = localRow;
975  ++count;
976  }
977 
978  /* update current row from previously factored rows */
979  head = m;
980  while (list[head] < localRow)
981  {
982  int row = list[head];
983 
984  /* get the multiplier, and apply 1st drop tolerance test */
985  pc = work[row];
986  if (pc != 0.0)
987  {
988  pv = aval[diag[row]]; /* diagonal (pivot) of previously factored row */
989  mult = pc / pv;
990 
991  /* update localRow from previously factored "row" */
992  if (fabs (mult) > droptol)
993  {
994  work[row] = mult;
995 
996  for (j = diag[row] + 1; j < rp[row + 1]; ++j)
997  {
998  col = cval[j];
999  work[col] -= (mult * aval[j]);
1000 
1001  /* if col isn't already present in the linked-list, insert it. */
1002  if (marker[col] < localRow)
1003  {
1004  marker[col] = localRow; /* mark the column as known fill */
1005  tmp = head; /* insert in list [this and next 3 lines] */
1006  while (col > list[tmp])
1007  tmp = list[tmp];
1008  list[col] = list[tmp];
1009  list[tmp] = col;
1010  ++count; /* increment fill count */
1011  }
1012  }
1013  }
1014  }
1015  head = list[head]; /* advance to next item in linked list */
1016  }
1017 
1018 END_FUNC_VAL (count)}
1019 
1020 
1021 #undef __FUNC__
1022 #define __FUNC__ "check_constraint_private"
1023 bool
1024 check_constraint_private (Euclid_dh ctx, int p1, int j)
1025 {
1026  START_FUNC_DH bool retval = false;
1027  int i, p2;
1028  int *nabors, count;
1029  SubdomainGraph_dh sg = ctx->sg;
1030 
1031  if (sg == NULL)
1032  {
1033  SET_ERROR (-1, "ctx->sg == NULL");
1034  }
1035 
1036  p2 = SubdomainGraph_dhFindOwner (ctx->sg, j, true);
1037 
1038 
1039  nabors = sg->adj + sg->ptrs[p1];
1040  count = sg->ptrs[p1 + 1] - sg->ptrs[p1];
1041 
1042 /*
1043 printf("p1= %i, p2= %i; p1's nabors: ", p1, p2);
1044 for (i=0; i<count; ++i) printf("%i ", nabors[i]);
1045 printf("\n");
1046 */
1047 
1048  for (i = 0; i < count; ++i)
1049  {
1050 /* printf(" @@@ next nabor= %i\n", nabors[i]);
1051 */
1052  if (nabors[i] == p2)
1053  {
1054  retval = true;
1055  break;
1056  }
1057  }
1058 
1059 END_FUNC_VAL (retval)}