Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_factor.hpp
1 /* ========================================================================== */
2 /* === KLU_factor =========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // ***********************************************************************
6 //
7 // KLU2: A Direct Linear Solver package
8 // Copyright 2011 Sandia Corporation
9 //
10 // Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11 // U.S. Government retains certain rights in this software.
12 //
13 // This library is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU Lesser General Public License as
15 // published by the Free Software Foundation; either version 2.1 of the
16 // License, or (at your option) any later version.
17 //
18 // This library is distributed in the hope that it will be useful, but
19 // WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 // Lesser General Public License for more details.
22 //
23 // You should have received a copy of the GNU Lesser General Public
24 // License along with this library; if not, write to the Free Software
25 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 // USA
27 // Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28 //
29 // KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30 // University of Florida. The Authors of KLU are Timothy A. Davis and
31 // Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32 // information for KLU.
33 //
34 // ***********************************************************************
35 // @HEADER
36 
37 /* Factor the matrix, after ordering and analyzing it with KLU_analyze
38  * or KLU_analyze_given.
39  */
40 
41 #ifndef KLU2_FACTOR_HPP
42 #define KLU2_FACTOR_HPP
43 
44 #include "klu2_internal.h"
45 #include "klu2.hpp"
46 #include "klu2_memory.hpp"
47 #include "klu2_scale.hpp"
48 
49 
50 /* ========================================================================== */
51 /* === KLU_factor2 ========================================================== */
52 /* ========================================================================== */
53 
54 template <typename Entry, typename Int>
55 static void factor2
56 (
57  /* inputs, not modified */
58  Int Ap [ ], /* size n+1, column pointers */
59  Int Ai [ ], /* size nz, row indices */
60  Entry Ax [ ],
61  KLU_symbolic<Entry, Int> *Symbolic,
62 
63  /* inputs, modified on output: */
64  KLU_numeric<Entry, Int> *Numeric,
65  KLU_common<Entry, Int> *Common
66 )
67 {
68  double lsize ;
69  double *Lnz, *Rs ;
70  Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
71  *Lip, *Uip, *Llen, *Ulen ;
72  Entry *Offx, *X, s, *Udiag ;
73  Unit **LUbx ;
74  Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
75  nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
76  max_unz_block ;
77 
78  /* ---------------------------------------------------------------------- */
79  /* initializations */
80  /* ---------------------------------------------------------------------- */
81 
82  /* get the contents of the Symbolic object */
83  n = Symbolic->n ;
84  P = Symbolic->P ;
85  Q = Symbolic->Q ;
86  R = Symbolic->R ;
87  Lnz = Symbolic->Lnz ;
88  nblocks = Symbolic->nblocks ;
89  nzoff = Symbolic->nzoff ;
90 
91  Pnum = Numeric->Pnum ;
92  Offp = Numeric->Offp ;
93  Offi = Numeric->Offi ;
94  Offx = (Entry *) Numeric->Offx ;
95 
96  Lip = Numeric->Lip ;
97  Uip = Numeric->Uip ;
98  Llen = Numeric->Llen ;
99  Ulen = Numeric->Ulen ;
100  LUbx = (Unit **) Numeric->LUbx ;
101  Udiag = (Entry *) Numeric->Udiag ;
102 
103  Rs = Numeric->Rs ;
104  Pinv = Numeric->Pinv ;
105  X = (Entry *) Numeric->Xwork ; /* X is of size n */
106  Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */
107  /* 1*maxblock for Pblock */
108  Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
109  Common->nrealloc = 0 ;
110  scale = Common->scale ;
111  max_lnz_block = 1 ;
112  max_unz_block = 1 ;
113 
114  /* compute the inverse of P from symbolic analysis. Will be updated to
115  * become the inverse of the numerical factorization when the factorization
116  * is done, for use in KLU_refactor */
117 #ifndef NDEBUGKLU2
118  for (k = 0 ; k < n ; k++)
119  {
120  Pinv [k] = EMPTY ;
121  }
122 #endif
123  for (k = 0 ; k < n ; k++)
124  {
125  ASSERT (P [k] >= 0 && P [k] < n) ;
126  Pinv [P [k]] = k ;
127  }
128 #ifndef NDEBUGKLU2
129  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
130 #endif
131 
132  lnz = 0 ;
133  unz = 0 ;
134  Common->noffdiag = 0 ;
135  Offp [0] = 0 ;
136 
137  /* ---------------------------------------------------------------------- */
138  /* optionally check input matrix and compute scale factors */
139  /* ---------------------------------------------------------------------- */
140 
141  if (scale >= 0)
142  {
143  /* use Pnum as workspace. NOTE: scale factors are not yet permuted
144  * according to the final pivot row ordering, so Rs [oldrow] is the
145  * scale factor for A (oldrow,:), for the user's matrix A. Pnum is
146  * used as workspace in KLU_scale. When the factorization is done,
147  * the scale factors are permuted according to the final pivot row
148  * permutation, so that Rs [k] is the scale factor for the kth row of
149  * A(p,q) where p and q are the final row and column permutations. */
150  /*KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;*/
151  KLU_scale (scale, n, Ap, Ai, Ax, Rs, Pnum, Common) ;
152  if (Common->status < KLU_OK)
153  {
154  /* matrix is invalid */
155  return ;
156  }
157  }
158 
159 #ifndef NDEBUGKLU2
160  if (scale > 0)
161  {
162  for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
163  }
164 #endif
165 
166  /* ---------------------------------------------------------------------- */
167  /* factor each block using klu */
168  /* ---------------------------------------------------------------------- */
169 
170  for (block = 0 ; block < nblocks ; block++)
171  {
172 
173  /* ------------------------------------------------------------------ */
174  /* the block is from rows/columns k1 to k2-1 */
175  /* ------------------------------------------------------------------ */
176 
177  k1 = R [block] ;
178  k2 = R [block+1] ;
179  nk = k2 - k1 ;
180  PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
181 
182  if (nk == 1)
183  {
184 
185  /* -------------------------------------------------------------- */
186  /* singleton case */
187  /* -------------------------------------------------------------- */
188 
189  poff = Offp [k1] ;
190  oldcol = Q [k1] ;
191  pend = Ap [oldcol+1] ;
192  CLEAR (s) ;
193 
194  if (scale <= 0)
195  {
196  /* no scaling */
197  for (p = Ap [oldcol] ; p < pend ; p++)
198  {
199  oldrow = Ai [p] ;
200  newrow = Pinv [oldrow] ;
201  if (newrow < k1)
202  {
203  Offi [poff] = oldrow ;
204  Offx [poff] = Ax [p] ;
205  poff++ ;
206  }
207  else
208  {
209  ASSERT (newrow == k1) ;
210  PRINTF (("singleton block %d", block)) ;
211  PRINT_ENTRY (Ax [p]) ;
212  s = Ax [p] ;
213  }
214  }
215  }
216  else
217  {
218  /* row scaling. NOTE: scale factors are not yet permuted
219  * according to the pivot row permutation, so Rs [oldrow] is
220  * used below. When the factorization is done, the scale
221  * factors are permuted, so that Rs [newrow] will be used in
222  * klu_solve, klu_tsolve, and klu_rgrowth */
223  for (p = Ap [oldcol] ; p < pend ; p++)
224  {
225  oldrow = Ai [p] ;
226  newrow = Pinv [oldrow] ;
227  if (newrow < k1)
228  {
229  Offi [poff] = oldrow ;
230  /* Offx [poff] = Ax [p] / Rs [oldrow] ; */
231  SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
232  poff++ ;
233  }
234  else
235  {
236  ASSERT (newrow == k1) ;
237  PRINTF (("singleton block %d ", block)) ;
238  PRINT_ENTRY (Ax[p]) ;
239  SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
240  }
241  }
242  }
243 
244  Udiag [k1] = s ;
245 
246  if (IS_ZERO (s))
247  {
248  /* singular singleton */
249  Common->status = KLU_SINGULAR ;
250  Common->numerical_rank = k1 ;
251  Common->singular_col = oldcol ;
252  if (Common->halt_if_singular)
253  {
254  return ;
255  }
256  }
257 
258  Offp [k1+1] = poff ;
259  Pnum [k1] = P [k1] ;
260  lnz++ ;
261  unz++ ;
262 
263  }
264  else
265  {
266 
267  /* -------------------------------------------------------------- */
268  /* construct and factorize the kth block */
269  /* -------------------------------------------------------------- */
270 
271  if (Lnz [block] < 0)
272  {
273  /* COLAMD was used - no estimate of fill-in */
274  /* use 10 times the nnz in A, plus n */
275  lsize = -(Common->initmem) ;
276  }
277  else
278  {
279  lsize = Common->initmem_amd * Lnz [block] + nk ;
280  }
281 
282  /* allocates 1 arrays: LUbx [block] */
283  Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
284  lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
285  Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
286  X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
287 
288  if (Common->status < KLU_OK ||
289  (Common->status == KLU_SINGULAR && Common->halt_if_singular))
290  {
291  /* out of memory, invalid inputs, or singular */
292  return ;
293  }
294 
295  PRINTF (("\n----------------------- L %d:\n", block)) ;
296  ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
297  PRINTF (("\n----------------------- U %d:\n", block)) ;
298  ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
299 
300  /* -------------------------------------------------------------- */
301  /* get statistics */
302  /* -------------------------------------------------------------- */
303 
304  lnz += lnz_block ;
305  unz += unz_block ;
306  max_lnz_block = MAX (max_lnz_block, lnz_block) ;
307  max_unz_block = MAX (max_unz_block, unz_block) ;
308 
309  if (Lnz [block] == EMPTY)
310  {
311  /* revise estimate for subsequent factorization */
312  Lnz [block] = MAX (lnz_block, unz_block) ;
313  }
314 
315  /* -------------------------------------------------------------- */
316  /* combine the klu row ordering with the symbolic pre-ordering */
317  /* -------------------------------------------------------------- */
318 
319  PRINTF (("Pnum, 1-based:\n")) ;
320  for (k = 0 ; k < nk ; k++)
321  {
322  ASSERT (k + k1 < n) ;
323  ASSERT (Pblock [k] + k1 < n) ;
324  Pnum [k + k1] = P [Pblock [k] + k1] ;
325  PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
326  k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
327  }
328 
329  /* the local pivot row permutation Pblock is no longer needed */
330  }
331  }
332  ASSERT (nzoff == Offp [n]) ;
333  PRINTF (("\n------------------- Off diagonal entries:\n")) ;
334  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
335 
336  Numeric->lnz = lnz ;
337  Numeric->unz = unz ;
338  Numeric->max_lnz_block = max_lnz_block ;
339  Numeric->max_unz_block = max_unz_block ;
340 
341  /* compute the inverse of Pnum */
342 #ifndef NDEBUGKLU2
343  for (k = 0 ; k < n ; k++)
344  {
345  Pinv [k] = EMPTY ;
346  }
347 #endif
348  for (k = 0 ; k < n ; k++)
349  {
350  ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
351  Pinv [Pnum [k]] = k ;
352  }
353 #ifndef NDEBUGKLU2
354  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
355 #endif
356 
357  /* permute scale factors Rs according to pivotal row order */
358  if (scale > 0)
359  {
360  for (k = 0 ; k < n ; k++)
361  {
362  /* TODO : Check. REAL(X[k]) Can be just X[k] */
363  /* REAL (X [k]) = Rs [Pnum [k]] ; */
364  X [k] = Rs [Pnum [k]] ;
365  }
366  for (k = 0 ; k < n ; k++)
367  {
368  Rs [k] = REAL (X [k]) ;
369  }
370  }
371 
372  PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
373  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
374 
375  /* apply the pivot row permutations to the off-diagonal entries */
376  for (p = 0 ; p < nzoff ; p++)
377  {
378  ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
379  Offi [p] = Pinv [Offi [p]] ;
380  }
381 
382  PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
383  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
384 
385 #ifndef NDEBUGKLU2
386  {
387  PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
388  Entry ss, *Udiag = Numeric->Udiag ;
389  for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
390  {
391  k1 = R [block] ;
392  k2 = R [block+1] ;
393  nk = k2 - k1 ;
394  PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
395  if (nk == 1)
396  {
397  PRINTF (("singleton ")) ;
398  /* ENTRY_PRINT (singleton [block]) ; */
399  ss = Udiag [k1] ;
400  PRINT_ENTRY (ss) ;
401  }
402  else
403  {
404  Int *Lip, *Uip, *Llen, *Ulen ;
405  Unit *LU ;
406  Lip = Numeric->Lip + k1 ;
407  Llen = Numeric->Llen + k1 ;
408  LU = (Unit *) Numeric->LUbx [block] ;
409  PRINTF (("\n---- L block %d\n", block));
410  ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
411  Uip = Numeric->Uip + k1 ;
412  Ulen = Numeric->Ulen + k1 ;
413  PRINTF (("\n---- U block %d\n", block)) ;
414  ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
415  }
416  }
417  }
418 #endif
419 }
420 
421 
422 
423 /* ========================================================================== */
424 /* === KLU_factor =========================================================== */
425 /* ========================================================================== */
426 
427 template <typename Entry, typename Int>
428 KLU_numeric<Entry, Int> *KLU_factor /* returns NULL if error, or a valid
429  KLU_numeric object if successful */
430 (
431  /* --- inputs --- */
432  Int Ap [ ], /* size n+1, column pointers */
433  Int Ai [ ], /* size nz, row indices */
434  /* TODO : Checked, switch from double to Entry , still make sure works in all cases */
435  Entry Ax [ ],
436  KLU_symbolic<Entry, Int> *Symbolic,
437  /* -------------- */
438  KLU_common<Entry, Int> *Common
439 )
440 {
441  Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
442 
443  KLU_numeric<Entry, Int> *Numeric ;
444  size_t n1, nzoff1, s, b6, n3 ;
445 
446  if (Common == NULL)
447  {
448  return (NULL) ;
449  }
450  Common->status = KLU_OK ;
451  Common->numerical_rank = EMPTY ;
452  Common->singular_col = EMPTY ;
453 
454  /* ---------------------------------------------------------------------- */
455  /* get the contents of the Symbolic object */
456  /* ---------------------------------------------------------------------- */
457 
458  /* check for a valid Symbolic object */
459  if (Symbolic == NULL)
460  {
461  Common->status = KLU_INVALID ;
462  return (NULL) ;
463  }
464 
465  n = Symbolic->n ;
466  nzoff = Symbolic->nzoff ;
467  nblocks = Symbolic->nblocks ;
468  maxblock = Symbolic->maxblock ;
469 
470  PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n",
471  n, nzoff, nblocks, maxblock)) ;
472 
473  /* ---------------------------------------------------------------------- */
474  /* get control parameters and make sure they are in the proper range */
475  /* ---------------------------------------------------------------------- */
476 
477  Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
478  Common->initmem = MAX (1.0, Common->initmem) ;
479  Common->tol = MIN (Common->tol, 1.0) ;
480  Common->tol = MAX (0.0, Common->tol) ;
481  Common->memgrow = MAX (1.0, Common->memgrow) ;
482 
483  /* ---------------------------------------------------------------------- */
484  /* allocate the Numeric object */
485  /* ---------------------------------------------------------------------- */
486 
487  /* this will not cause size_t overflow (already checked by KLU_symbolic) */
488  n1 = ((size_t) n) + 1 ;
489  nzoff1 = ((size_t) nzoff) + 1 ;
490 
491  Numeric = (KLU_numeric<Entry, Int> *) KLU_malloc (sizeof (KLU_numeric<Entry, Int>), 1, Common) ;
492  if (Common->status < KLU_OK)
493  {
494  /* out of memory */
495  Common->status = KLU_OUT_OF_MEMORY ;
496  return (NULL) ;
497  }
498  Numeric->n = n ;
499  Numeric->nblocks = nblocks ;
500  Numeric->nzoff = nzoff ;
501  Numeric->Pnum = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
502  Numeric->Offp = (Int *) KLU_malloc (n1, sizeof (Int), Common) ;
503  Numeric->Offi = (Int *) KLU_malloc (nzoff1, sizeof (Int), Common) ;
504  Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
505 
506  Numeric->Lip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
507  Numeric->Uip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
508  Numeric->Llen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
509  Numeric->Ulen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
510 
511  Numeric->LUsize = (size_t *) KLU_malloc (nblocks, sizeof (size_t), Common) ;
512 
513  Numeric->LUbx = (void **) KLU_malloc (nblocks, sizeof (Unit *), Common) ;
514  if (Numeric->LUbx != NULL)
515  {
516  for (k = 0 ; k < nblocks ; k++)
517  {
518  Numeric->LUbx [k] = NULL ;
519  }
520  }
521 
522  Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
523 
524  if (Common->scale > 0)
525  {
526  Numeric->Rs = (double *) KLU_malloc (n, sizeof (double), Common) ;
527  }
528  else
529  {
530  /* no scaling */
531  Numeric->Rs = NULL ;
532  }
533 
534  Numeric->Pinv = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
535 
536  /* allocate permanent workspace for factorization and solve. Note that the
537  * solver will use an Xwork of size 4n, whereas the factorization codes use
538  * an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
539  * uses an Xwork of size 2n. Total size is:
540  *
541  * n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
542  */
543  s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
544  n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
545  b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
546  Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
547  Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
548  Numeric->Xwork = Numeric->Work ;
549  Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
550  if (!ok || Common->status < KLU_OK)
551  {
552  /* out of memory or problem too large */
553  Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
554  KLU_free_numeric (&Numeric, Common) ;
555  return (NULL) ;
556  }
557 
558  /* ---------------------------------------------------------------------- */
559  /* factorize the blocks */
560  /* ---------------------------------------------------------------------- */
561 
562  factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
563 
564  /* ---------------------------------------------------------------------- */
565  /* return or free the Numeric object */
566  /* ---------------------------------------------------------------------- */
567 
568  if (Common->status < KLU_OK)
569  {
570  /* out of memory or inputs invalid */
571  KLU_free_numeric (&Numeric, Common) ;
572  }
573  else if (Common->status == KLU_SINGULAR)
574  {
575  if (Common->halt_if_singular)
576  {
577  /* Matrix is singular, and the Numeric object is only partially
578  * defined because we halted early. This is the default case for
579  * a singular matrix. */
580  KLU_free_numeric (&Numeric, Common) ;
581  }
582  }
583  else if (Common->status == KLU_OK)
584  {
585  /* successful non-singular factorization */
586  Common->numerical_rank = n ;
587  Common->singular_col = n ;
588  }
589  return (Numeric) ;
590 }
591 
592 #endif
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.