Amesos Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
amesos_klu_factor.c
Go to the documentation of this file.
1 /* ========================================================================== */
2 /* === KLU_factor =========================================================== */
3 /* ========================================================================== */
4 
5 /* Factor the matrix, after ordering and analyzing it with KLU_analyze
6  * or KLU_analyze_given.
7  */
8 
9 #include "amesos_klu_internal.h"
10 
11 /* ========================================================================== */
12 /* === KLU_factor2 ========================================================== */
13 /* ========================================================================== */
14 
15 static void factor2
16 (
17  /* inputs, not modified */
18  Int Ap [ ], /* size n+1, column pointers */
19  Int Ai [ ], /* size nz, row indices */
20  Entry Ax [ ],
21  KLU_symbolic *Symbolic,
22 
23  /* inputs, modified on output: */
24  KLU_numeric *Numeric,
25  KLU_common *Common
26 )
27 {
28  double lsize ;
29  double *Lnz, *Rs ;
30  Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
31  *Lip, *Uip, *Llen, *Ulen ;
32  Entry *Offx, *X, s, *Udiag ;
33  Unit **LUbx ;
34  Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
35  nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
36  max_unz_block ;
37 
38  /* ---------------------------------------------------------------------- */
39  /* initializations */
40  /* ---------------------------------------------------------------------- */
41 
42  /* get the contents of the Symbolic object */
43  n = Symbolic->n ;
44  P = Symbolic->P ;
45  Q = Symbolic->Q ;
46  R = Symbolic->R ;
47  Lnz = Symbolic->Lnz ;
48  nblocks = Symbolic->nblocks ;
49  nzoff = Symbolic->nzoff ;
50 
51  Pnum = Numeric->Pnum ;
52  Offp = Numeric->Offp ;
53  Offi = Numeric->Offi ;
54  Offx = (Entry *) Numeric->Offx ;
55 
56  Lip = Numeric->Lip ;
57  Uip = Numeric->Uip ;
58  Llen = Numeric->Llen ;
59  Ulen = Numeric->Ulen ;
60  LUbx = (Unit **) Numeric->LUbx ;
61  Udiag = Numeric->Udiag ;
62 
63  Rs = Numeric->Rs ;
64  Pinv = Numeric->Pinv ;
65  X = (Entry *) Numeric->Xwork ; /* X is of size n */
66  Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */
67  /* 1*maxblock for Pblock */
68  Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
69  Common->nrealloc = 0 ;
70  scale = Common->scale ;
71  max_lnz_block = 1 ;
72  max_unz_block = 1 ;
73 
74  /* compute the inverse of P from symbolic analysis. Will be updated to
75  * become the inverse of the numerical factorization when the factorization
76  * is done, for use in KLU_refactor */
77 #ifndef NDEBUG
78  for (k = 0 ; k < n ; k++)
79  {
80  Pinv [k] = EMPTY ;
81  }
82 #endif
83  for (k = 0 ; k < n ; k++)
84  {
85  ASSERT (P [k] >= 0 && P [k] < n) ;
86  Pinv [P [k]] = k ;
87  }
88 #ifndef NDEBUG
89  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
90 #endif
91 
92  lnz = 0 ;
93  unz = 0 ;
94  Common->noffdiag = 0 ;
95  Offp [0] = 0 ;
96 
97  /* ---------------------------------------------------------------------- */
98  /* optionally check input matrix and compute scale factors */
99  /* ---------------------------------------------------------------------- */
100 
101  if (scale >= 0)
102  {
103  /* use Pnum as workspace. NOTE: scale factors are not yet permuted
104  * according to the final pivot row ordering, so Rs [oldrow] is the
105  * scale factor for A (oldrow,:), for the user's matrix A. Pnum is
106  * used as workspace in KLU_scale. When the factorization is done,
107  * the scale factors are permuted according to the final pivot row
108  * permutation, so that Rs [k] is the scale factor for the kth row of
109  * A(p,q) where p and q are the final row and column permutations. */
110  KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;
111  if (Common->status < KLU_OK)
112  {
113  /* matrix is invalid */
114  return ;
115  }
116  }
117 
118 #ifndef NDEBUG
119  if (scale > 0)
120  {
121  for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
122  }
123 #endif
124 
125  /* ---------------------------------------------------------------------- */
126  /* factor each block using klu */
127  /* ---------------------------------------------------------------------- */
128 
129  for (block = 0 ; block < nblocks ; block++)
130  {
131 
132  /* ------------------------------------------------------------------ */
133  /* the block is from rows/columns k1 to k2-1 */
134  /* ------------------------------------------------------------------ */
135 
136  k1 = R [block] ;
137  k2 = R [block+1] ;
138  nk = k2 - k1 ;
139  PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
140 
141  if (nk == 1)
142  {
143 
144  /* -------------------------------------------------------------- */
145  /* singleton case */
146  /* -------------------------------------------------------------- */
147 
148  poff = Offp [k1] ;
149  oldcol = Q [k1] ;
150  pend = Ap [oldcol+1] ;
151  CLEAR (s) ;
152 
153  if (scale <= 0)
154  {
155  /* no scaling */
156  for (p = Ap [oldcol] ; p < pend ; p++)
157  {
158  oldrow = Ai [p] ;
159  newrow = Pinv [oldrow] ;
160  if (newrow < k1)
161  {
162  Offi [poff] = oldrow ;
163  Offx [poff] = Ax [p] ;
164  poff++ ;
165  }
166  else
167  {
168  ASSERT (newrow == k1) ;
169  PRINTF (("singleton block %d", block)) ;
170  PRINT_ENTRY (Ax [p]) ;
171  s = Ax [p] ;
172  }
173  }
174  }
175  else
176  {
177  /* row scaling. NOTE: scale factors are not yet permuted
178  * according to the pivot row permutation, so Rs [oldrow] is
179  * used below. When the factorization is done, the scale
180  * factors are permuted, so that Rs [newrow] will be used in
181  * klu_solve, klu_tsolve, and klu_rgrowth */
182  for (p = Ap [oldcol] ; p < pend ; p++)
183  {
184  oldrow = Ai [p] ;
185  newrow = Pinv [oldrow] ;
186  if (newrow < k1)
187  {
188  Offi [poff] = oldrow ;
189  /* Offx [poff] = Ax [p] / Rs [oldrow] ; */
190  SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
191  poff++ ;
192  }
193  else
194  {
195  ASSERT (newrow == k1) ;
196  PRINTF (("singleton block %d ", block)) ;
197  PRINT_ENTRY (Ax[p]) ;
198  SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
199  }
200  }
201  }
202 
203  Udiag [k1] = s ;
204 
205  if (IS_ZERO (s))
206  {
207  /* singular singleton */
208  Common->status = KLU_SINGULAR ;
209  Common->numerical_rank = k1 ;
210  Common->singular_col = oldcol ;
211  if (Common->halt_if_singular)
212  {
213  return ;
214  }
215  }
216 
217  Offp [k1+1] = poff ;
218  Pnum [k1] = P [k1] ;
219  lnz++ ;
220  unz++ ;
221 
222  }
223  else
224  {
225 
226  /* -------------------------------------------------------------- */
227  /* construct and factorize the kth block */
228  /* -------------------------------------------------------------- */
229 
230  if (Lnz [block] < 0)
231  {
232  /* COLAMD was used - no estimate of fill-in */
233  /* use 10 times the nnz in A, plus n */
234  lsize = -(Common->initmem) ;
235  }
236  else
237  {
238  lsize = Common->initmem_amd * Lnz [block] + nk ;
239  }
240 
241  /* allocates 1 arrays: LUbx [block] */
242  Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
243  lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
244  Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
245  X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
246 
247  if (Common->status < KLU_OK ||
248  (Common->status == KLU_SINGULAR && Common->halt_if_singular))
249  {
250  /* out of memory, invalid inputs, or singular */
251  return ;
252  }
253 
254  PRINTF (("\n----------------------- L %d:\n", block)) ;
255  ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
256  PRINTF (("\n----------------------- U %d:\n", block)) ;
257  ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
258 
259  /* -------------------------------------------------------------- */
260  /* get statistics */
261  /* -------------------------------------------------------------- */
262 
263  lnz += lnz_block ;
264  unz += unz_block ;
265  max_lnz_block = MAX (max_lnz_block, lnz_block) ;
266  max_unz_block = MAX (max_unz_block, unz_block) ;
267 
268  if (Lnz [block] == EMPTY)
269  {
270  /* revise estimate for subsequent factorization */
271  Lnz [block] = MAX (lnz_block, unz_block) ;
272  }
273 
274  /* -------------------------------------------------------------- */
275  /* combine the klu row ordering with the symbolic pre-ordering */
276  /* -------------------------------------------------------------- */
277 
278  PRINTF (("Pnum, 1-based:\n")) ;
279  for (k = 0 ; k < nk ; k++)
280  {
281  ASSERT (k + k1 < n) ;
282  ASSERT (Pblock [k] + k1 < n) ;
283  Pnum [k + k1] = P [Pblock [k] + k1] ;
284  PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
285  k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
286  }
287 
288  /* the local pivot row permutation Pblock is no longer needed */
289  }
290  }
291  ASSERT (nzoff == Offp [n]) ;
292  PRINTF (("\n------------------- Off diagonal entries:\n")) ;
293  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
294 
295  Numeric->lnz = lnz ;
296  Numeric->unz = unz ;
297  Numeric->max_lnz_block = max_lnz_block ;
298  Numeric->max_unz_block = max_unz_block ;
299 
300  /* compute the inverse of Pnum */
301 #ifndef NDEBUG
302  for (k = 0 ; k < n ; k++)
303  {
304  Pinv [k] = EMPTY ;
305  }
306 #endif
307  for (k = 0 ; k < n ; k++)
308  {
309  ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
310  Pinv [Pnum [k]] = k ;
311  }
312 #ifndef NDEBUG
313  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
314 #endif
315 
316  /* permute scale factors Rs according to pivotal row order */
317  if (scale > 0)
318  {
319  for (k = 0 ; k < n ; k++)
320  {
321  REAL (X [k]) = Rs [Pnum [k]] ;
322  }
323  for (k = 0 ; k < n ; k++)
324  {
325  Rs [k] = REAL (X [k]) ;
326  }
327  }
328 
329  PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
330  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
331 
332  /* apply the pivot row permutations to the off-diagonal entries */
333  for (p = 0 ; p < nzoff ; p++)
334  {
335  ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
336  Offi [p] = Pinv [Offi [p]] ;
337  }
338 
339  PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
340  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
341 
342 #ifndef NDEBUG
343  {
344  PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
345  Entry ss, *Udiag = Numeric->Udiag ;
346  for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
347  {
348  k1 = R [block] ;
349  k2 = R [block+1] ;
350  nk = k2 - k1 ;
351  PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
352  if (nk == 1)
353  {
354  PRINTF (("singleton ")) ;
355  /* ENTRY_PRINT (singleton [block]) ; */
356  ss = Udiag [k1] ;
357  PRINT_ENTRY (ss) ;
358  }
359  else
360  {
361  Int *Lip, *Uip, *Llen, *Ulen ;
362  Unit *LU ;
363  Lip = Numeric->Lip + k1 ;
364  Llen = Numeric->Llen + k1 ;
365  LU = (Unit *) Numeric->LUbx [block] ;
366  PRINTF (("\n---- L block %d\n", block));
367  ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
368  Uip = Numeric->Uip + k1 ;
369  Ulen = Numeric->Ulen + k1 ;
370  PRINTF (("\n---- U block %d\n", block)) ;
371  ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
372  }
373  }
374  }
375 #endif
376 }
377 
378 
379 
380 /* ========================================================================== */
381 /* === KLU_factor =========================================================== */
382 /* ========================================================================== */
383 
384 KLU_numeric *KLU_factor /* returns NULL if error, or a valid
385  KLU_numeric object if successful */
386 (
387  /* --- inputs --- */
388  Int Ap [ ], /* size n+1, column pointers */
389  Int Ai [ ], /* size nz, row indices */
390  double Ax [ ],
391  KLU_symbolic *Symbolic,
392  /* -------------- */
393  KLU_common *Common
394 )
395 {
396  Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
397  Int *R ;
398  KLU_numeric *Numeric ;
399  size_t n1, nzoff1, s, b6, n3 ;
400 #ifdef KLU_ENABLE_OPENMP
401  int num_threads;
402 #endif
403 
404  if (Common == NULL)
405  {
406  return (NULL) ;
407  }
408  Common->status = KLU_OK ;
409  Common->numerical_rank = EMPTY ;
410  Common->singular_col = EMPTY ;
411 
412  /* ---------------------------------------------------------------------- */
413  /* get the contents of the Symbolic object */
414  /* ---------------------------------------------------------------------- */
415 
416  /* check for a valid Symbolic object */
417  if (Symbolic == NULL)
418  {
419  Common->status = KLU_INVALID ;
420  return (NULL) ;
421  }
422 
423  n = Symbolic->n ;
424  nzoff = Symbolic->nzoff ;
425  nblocks = Symbolic->nblocks ;
426  maxblock = Symbolic->maxblock ;
427  R = Symbolic->R ;
428  PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n",
429  n, nzoff, nblocks, maxblock)) ;
430 
431  /* ---------------------------------------------------------------------- */
432  /* get control parameters and make sure they are in the proper range */
433  /* ---------------------------------------------------------------------- */
434 
435  Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
436  Common->initmem = MAX (1.0, Common->initmem) ;
437  Common->tol = MIN (Common->tol, 1.0) ;
438  Common->tol = MAX (0.0, Common->tol) ;
439  Common->memgrow = MAX (1.0, Common->memgrow) ;
440 
441  /* ---------------------------------------------------------------------- */
442  /* allocate the Numeric object */
443  /* ---------------------------------------------------------------------- */
444 
445  /* this will not cause size_t overflow (already checked by KLU_symbolic) */
446  n1 = ((size_t) n) + 1 ;
447  nzoff1 = ((size_t) nzoff) + 1 ;
448 
449  Numeric = KLU_malloc (sizeof (KLU_numeric), 1, Common) ;
450  if (Common->status < KLU_OK)
451  {
452  /* out of memory */
453  Common->status = KLU_OUT_OF_MEMORY ;
454  return (NULL) ;
455  }
456  Numeric->n = n ;
457  Numeric->nblocks = nblocks ;
458  Numeric->nzoff = nzoff ;
459  Numeric->Pnum = KLU_malloc (n, sizeof (Int), Common) ;
460  Numeric->Offp = KLU_malloc (n1, sizeof (Int), Common) ;
461  Numeric->Offi = KLU_malloc (nzoff1, sizeof (Int), Common) ;
462  Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
463 
464  Numeric->Lip = KLU_malloc (n, sizeof (Int), Common) ;
465  Numeric->Uip = KLU_malloc (n, sizeof (Int), Common) ;
466  Numeric->Llen = KLU_malloc (n, sizeof (Int), Common) ;
467  Numeric->Ulen = KLU_malloc (n, sizeof (Int), Common) ;
468 
469  Numeric->LUsize = KLU_malloc (nblocks, sizeof (size_t), Common) ;
470 
471  Numeric->LUbx = KLU_malloc (nblocks, sizeof (Unit *), Common) ;
472  if (Numeric->LUbx != NULL)
473  {
474  for (k = 0 ; k < nblocks ; k++)
475  {
476  Numeric->LUbx [k] = NULL ;
477  }
478  }
479 
480  Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
481 
482  if (Common->scale > 0)
483  {
484  Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ;
485  }
486  else
487  {
488  /* no scaling */
489  Numeric->Rs = NULL ;
490  }
491 
492  Numeric->Pinv = KLU_malloc (n, sizeof (Int), Common) ;
493 
494  /* allocate permanent workspace for factorization and solve. Note that the
495  * solver will use an Xwork of size 4n, whereas the factorization codes use
496  * an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
497  * uses an Xwork of size 2n. Total size is:
498  *
499  * n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
500  *
501  * If OpenMP is enabled the solve uses an Xwork of size num_threads * 4n.
502  */
503 #ifdef KLU_ENABLE_OPENMP
504 #pragma omp parallel
505  num_threads = omp_get_num_threads();
506  s = KLU_mult_size_t (n, num_threads * sizeof (Entry), &ok) ;
507  n3 = KLU_mult_size_t (n, 3 * num_threads * sizeof (Entry), &ok) ;
508 #else
509  s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
510  n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
511 #endif
512  b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
513  Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
514  Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
515  Numeric->Xwork = Numeric->Work ;
516  Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
517  if (!ok || Common->status < KLU_OK)
518  {
519  /* out of memory or problem too large */
520  Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
521  KLU_free_numeric (&Numeric, Common) ;
522  return (NULL) ;
523  }
524 
525  /* ---------------------------------------------------------------------- */
526  /* factorize the blocks */
527  /* ---------------------------------------------------------------------- */
528 
529  factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
530 
531  /* ---------------------------------------------------------------------- */
532  /* return or free the Numeric object */
533  /* ---------------------------------------------------------------------- */
534 
535  if (Common->status < KLU_OK)
536  {
537  /* out of memory or inputs invalid */
538  KLU_free_numeric (&Numeric, Common) ;
539  }
540  else if (Common->status == KLU_SINGULAR)
541  {
542  if (Common->halt_if_singular)
543  {
544  /* Matrix is singular, and the Numeric object is only partially
545  * defined because we halted early. This is the default case for
546  * a singular matrix. */
547  KLU_free_numeric (&Numeric, Common) ;
548  }
549  }
550  else if (Common->status == KLU_OK)
551  {
552  /* successful non-singular factorization */
553  Common->numerical_rank = n ;
554  Common->singular_col = n ;
555  }
556  return (Numeric) ;
557 }
#define KLU_kernel_factor
#define REAL
#define KLU_INVALID
#define PRINT_ENTRY(a)
#define EMPTY
#define KLU_symbolic
#define Int
#define FALSE
#define KLU_add_size_t
#define P(k)
#define KLU_mult_size_t
double Unit
#define MAX(a, b)
#define KLU_OUT_OF_MEMORY
#define NULL
#define KLU_free_numeric
#define CLEAR(c)
#define KLU_TOO_LARGE
#define ASSERT(expression)
#define KLU_numeric
#define KLU_valid_LU
#define KLU_malloc
#define IS_ZERO(x)
static void factor2(Int Ap[], Int Ai[], Entry Ax[], KLU_symbolic *Symbolic, KLU_numeric *Numeric, KLU_common *Common)
KLU_numeric * KLU_factor(Int Ap[], Int Ai[], double Ax[], KLU_symbolic *Symbolic, KLU_common *Common)
#define SCALE_DIV_ASSIGN(a, c, s)
#define KLU_scale
#define KLU_valid
#define KLU_common
#define MIN(a, b)
#define KLU_SINGULAR
#define Entry
#define PRINTF(params)
int n
#define TRUE
#define KLU_OK