Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_factor.hpp
1 /* ========================================================================== */
2 /* === KLU_factor =========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // *****************************************************************************
6 // KLU2: A Direct Linear Solver package
7 //
8 // Copyright 2011 NTESS and the KLU2 contributors.
9 // SPDX-License-Identifier: LGPL-2.1-or-later
10 // *****************************************************************************
11 // @HEADER
12 
13 /* Factor the matrix, after ordering and analyzing it with KLU_analyze
14  * or KLU_analyze_given.
15  */
16 
17 #ifndef KLU2_FACTOR_HPP
18 #define KLU2_FACTOR_HPP
19 
20 #include "klu2_internal.h"
21 #include "klu2.hpp"
22 #include "klu2_memory.hpp"
23 #include "klu2_scale.hpp"
24 
25 
26 /* ========================================================================== */
27 /* === KLU_factor2 ========================================================== */
28 /* ========================================================================== */
29 
30 template <typename Entry, typename Int>
31 static void factor2
32 (
33  /* inputs, not modified */
34  Int Ap [ ], /* size n+1, column pointers */
35  Int Ai [ ], /* size nz, row indices */
36  Entry Ax [ ],
37  KLU_symbolic<Entry, Int> *Symbolic,
38 
39  /* inputs, modified on output: */
40  KLU_numeric<Entry, Int> *Numeric,
41  KLU_common<Entry, Int> *Common
42 )
43 {
44  double lsize ;
45  double *Lnz, *Rs ;
46  Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
47  *Lip, *Uip, *Llen, *Ulen ;
48  Entry *Offx, *X, s, *Udiag ;
49  Unit **LUbx ;
50  Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
51  nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
52  max_unz_block ;
53 
54  /* ---------------------------------------------------------------------- */
55  /* initializations */
56  /* ---------------------------------------------------------------------- */
57 
58  /* get the contents of the Symbolic object */
59  n = Symbolic->n ;
60  P = Symbolic->P ;
61  Q = Symbolic->Q ;
62  R = Symbolic->R ;
63  Lnz = Symbolic->Lnz ;
64  nblocks = Symbolic->nblocks ;
65  nzoff = Symbolic->nzoff ;
66 
67  Pnum = Numeric->Pnum ;
68  Offp = Numeric->Offp ;
69  Offi = Numeric->Offi ;
70  Offx = (Entry *) Numeric->Offx ;
71 
72  Lip = Numeric->Lip ;
73  Uip = Numeric->Uip ;
74  Llen = Numeric->Llen ;
75  Ulen = Numeric->Ulen ;
76  LUbx = (Unit **) Numeric->LUbx ;
77  Udiag = (Entry *) Numeric->Udiag ;
78 
79  Rs = Numeric->Rs ;
80  Pinv = Numeric->Pinv ;
81  X = (Entry *) Numeric->Xwork ; /* X is of size n */
82  Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */
83  /* 1*maxblock for Pblock */
84  Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
85  Common->nrealloc = 0 ;
86  scale = Common->scale ;
87  max_lnz_block = 1 ;
88  max_unz_block = 1 ;
89 
90  /* compute the inverse of P from symbolic analysis. Will be updated to
91  * become the inverse of the numerical factorization when the factorization
92  * is done, for use in KLU_refactor */
93 #ifndef NDEBUGKLU2
94  for (k = 0 ; k < n ; k++)
95  {
96  Pinv [k] = AMESOS2_KLU2_EMPTY ;
97  }
98 #endif
99  for (k = 0 ; k < n ; k++)
100  {
101  ASSERT (P [k] >= 0 && P [k] < n) ;
102  Pinv [P [k]] = k ;
103  }
104 #ifndef NDEBUGKLU2
105  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
106 #endif
107 
108  lnz = 0 ;
109  unz = 0 ;
110  Common->noffdiag = 0 ;
111  Offp [0] = 0 ;
112 
113  /* ---------------------------------------------------------------------- */
114  /* optionally check input matrix and compute scale factors */
115  /* ---------------------------------------------------------------------- */
116 
117  if (scale >= 0)
118  {
119  /* use Pnum as workspace. NOTE: scale factors are not yet permuted
120  * according to the final pivot row ordering, so Rs [oldrow] is the
121  * scale factor for A (oldrow,:), for the user's matrix A. Pnum is
122  * used as workspace in KLU_scale. When the factorization is done,
123  * the scale factors are permuted according to the final pivot row
124  * permutation, so that Rs [k] is the scale factor for the kth row of
125  * A(p,q) where p and q are the final row and column permutations. */
126  /*KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;*/
127  KLU_scale (scale, n, Ap, Ai, Ax, Rs, Pnum, Common) ;
128  if (Common->status < KLU_OK)
129  {
130  /* matrix is invalid */
131  return ;
132  }
133  }
134 
135 #ifndef NDEBUGKLU2
136  if (scale > 0)
137  {
138  for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
139  }
140 #endif
141 
142  /* ---------------------------------------------------------------------- */
143  /* factor each block using klu */
144  /* ---------------------------------------------------------------------- */
145 
146  for (block = 0 ; block < nblocks ; block++)
147  {
148 
149  /* ------------------------------------------------------------------ */
150  /* the block is from rows/columns k1 to k2-1 */
151  /* ------------------------------------------------------------------ */
152 
153  k1 = R [block] ;
154  k2 = R [block+1] ;
155  nk = k2 - k1 ;
156  PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
157 
158  if (nk == 1)
159  {
160 
161  /* -------------------------------------------------------------- */
162  /* singleton case */
163  /* -------------------------------------------------------------- */
164 
165  poff = Offp [k1] ;
166  oldcol = Q [k1] ;
167  pend = Ap [oldcol+1] ;
168  CLEAR (s) ;
169 
170  if (scale <= 0)
171  {
172  /* no scaling */
173  for (p = Ap [oldcol] ; p < pend ; p++)
174  {
175  oldrow = Ai [p] ;
176  newrow = Pinv [oldrow] ;
177  if (newrow < k1)
178  {
179  Offi [poff] = oldrow ;
180  Offx [poff] = Ax [p] ;
181  poff++ ;
182  }
183  else
184  {
185  ASSERT (newrow == k1) ;
186  PRINTF (("singleton block %d", block)) ;
187  PRINT_ENTRY (Ax [p]) ;
188  s = Ax [p] ;
189  }
190  }
191  }
192  else
193  {
194  /* row scaling. NOTE: scale factors are not yet permuted
195  * according to the pivot row permutation, so Rs [oldrow] is
196  * used below. When the factorization is done, the scale
197  * factors are permuted, so that Rs [newrow] will be used in
198  * klu_solve, klu_tsolve, and klu_rgrowth */
199  for (p = Ap [oldcol] ; p < pend ; p++)
200  {
201  oldrow = Ai [p] ;
202  newrow = Pinv [oldrow] ;
203  if (newrow < k1)
204  {
205  Offi [poff] = oldrow ;
206  /* Offx [poff] = Ax [p] / Rs [oldrow] ; */
207  SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
208  poff++ ;
209  }
210  else
211  {
212  ASSERT (newrow == k1) ;
213  PRINTF (("singleton block %d ", block)) ;
214  PRINT_ENTRY (Ax[p]) ;
215  SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
216  }
217  }
218  }
219 
220  Udiag [k1] = s ;
221 
222  if (IS_ZERO (s))
223  {
224  /* singular singleton */
225  Common->status = KLU_SINGULAR ;
226  Common->numerical_rank = k1 ;
227  Common->singular_col = oldcol ;
228  if (Common->halt_if_singular)
229  {
230  return ;
231  }
232  }
233 
234  Offp [k1+1] = poff ;
235  Pnum [k1] = P [k1] ;
236  lnz++ ;
237  unz++ ;
238 
239  }
240  else
241  {
242 
243  /* -------------------------------------------------------------- */
244  /* construct and factorize the kth block */
245  /* -------------------------------------------------------------- */
246 
247  if (Lnz [block] < 0)
248  {
249  /* COLAMD was used - no estimate of fill-in */
250  /* use 10 times the nnz in A, plus n */
251  lsize = -(Common->initmem) ;
252  }
253  else
254  {
255  lsize = Common->initmem_amd * Lnz [block] + nk ;
256  }
257 
258  /* allocates 1 arrays: LUbx [block] */
259  Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
260  lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
261  Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
262  X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
263 
264  if (Common->status < KLU_OK ||
265  (Common->status == KLU_SINGULAR && Common->halt_if_singular))
266  {
267  /* out of memory, invalid inputs, or singular */
268  return ;
269  }
270 
271  PRINTF (("\n----------------------- L %d:\n", block)) ;
272  ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
273  PRINTF (("\n----------------------- U %d:\n", block)) ;
274  ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
275 
276  /* -------------------------------------------------------------- */
277  /* get statistics */
278  /* -------------------------------------------------------------- */
279 
280  lnz += lnz_block ;
281  unz += unz_block ;
282  max_lnz_block = MAX (max_lnz_block, lnz_block) ;
283  max_unz_block = MAX (max_unz_block, unz_block) ;
284 
285  if (Lnz [block] == AMESOS2_KLU2_EMPTY)
286  {
287  /* revise estimate for subsequent factorization */
288  Lnz [block] = MAX (lnz_block, unz_block) ;
289  }
290 
291  /* -------------------------------------------------------------- */
292  /* combine the klu row ordering with the symbolic pre-ordering */
293  /* -------------------------------------------------------------- */
294 
295  PRINTF (("Pnum, 1-based:\n")) ;
296  for (k = 0 ; k < nk ; k++)
297  {
298  ASSERT (k + k1 < n) ;
299  ASSERT (Pblock [k] + k1 < n) ;
300  Pnum [k + k1] = P [Pblock [k] + k1] ;
301  PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
302  k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
303  }
304 
305  /* the local pivot row permutation Pblock is no longer needed */
306  }
307  }
308  ASSERT (nzoff == Offp [n]) ;
309  PRINTF (("\n------------------- Off diagonal entries:\n")) ;
310  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
311 
312  Numeric->lnz = lnz ;
313  Numeric->unz = unz ;
314  Numeric->max_lnz_block = max_lnz_block ;
315  Numeric->max_unz_block = max_unz_block ;
316 
317  /* compute the inverse of Pnum */
318 #ifndef NDEBUGKLU2
319  for (k = 0 ; k < n ; k++)
320  {
321  Pinv [k] = AMESOS2_KLU2_EMPTY ;
322  }
323 #endif
324  for (k = 0 ; k < n ; k++)
325  {
326  ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
327  Pinv [Pnum [k]] = k ;
328  }
329 #ifndef NDEBUGKLU2
330  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
331 #endif
332 
333  /* permute scale factors Rs according to pivotal row order */
334  if (scale > 0)
335  {
336  for (k = 0 ; k < n ; k++)
337  {
338  /* TODO : Check. REAL(X[k]) Can be just X[k] */
339  /* REAL (X [k]) = Rs [Pnum [k]] ; */
340  X [k] = Rs [Pnum [k]] ;
341  }
342  for (k = 0 ; k < n ; k++)
343  {
344  Rs [k] = REAL (X [k]) ;
345  }
346  }
347 
348  PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
349  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
350 
351  /* apply the pivot row permutations to the off-diagonal entries */
352  for (p = 0 ; p < nzoff ; p++)
353  {
354  ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
355  Offi [p] = Pinv [Offi [p]] ;
356  }
357 
358  PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
359  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
360 
361 #ifndef NDEBUGKLU2
362  {
363  PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
364  Entry ss, *Udiag = Numeric->Udiag ;
365  for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
366  {
367  k1 = R [block] ;
368  k2 = R [block+1] ;
369  nk = k2 - k1 ;
370  PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
371  if (nk == 1)
372  {
373  PRINTF (("singleton ")) ;
374  /* ENTRY_PRINT (singleton [block]) ; */
375  ss = Udiag [k1] ;
376  PRINT_ENTRY (ss) ;
377  }
378  else
379  {
380  Int *Lip, *Uip, *Llen, *Ulen ;
381  Unit *LU ;
382  Lip = Numeric->Lip + k1 ;
383  Llen = Numeric->Llen + k1 ;
384  LU = (Unit *) Numeric->LUbx [block] ;
385  PRINTF (("\n---- L block %d\n", block));
386  ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
387  Uip = Numeric->Uip + k1 ;
388  Ulen = Numeric->Ulen + k1 ;
389  PRINTF (("\n---- U block %d\n", block)) ;
390  ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
391  }
392  }
393  }
394 #endif
395 }
396 
397 
398 
399 /* ========================================================================== */
400 /* === KLU_factor =========================================================== */
401 /* ========================================================================== */
402 
403 template <typename Entry, typename Int>
404 KLU_numeric<Entry, Int> *KLU_factor /* returns NULL if error, or a valid
405  KLU_numeric object if successful */
406 (
407  /* --- inputs --- */
408  Int Ap [ ], /* size n+1, column pointers */
409  Int Ai [ ], /* size nz, row indices */
410  /* TODO : Checked, switch from double to Entry , still make sure works in all cases */
411  Entry Ax [ ],
412  KLU_symbolic<Entry, Int> *Symbolic,
413  /* -------------- */
414  KLU_common<Entry, Int> *Common
415 )
416 {
417  Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
418 
419  KLU_numeric<Entry, Int> *Numeric ;
420  size_t n1, nzoff1, s, b6, n3 ;
421 
422  if (Common == NULL)
423  {
424  return (NULL) ;
425  }
426  Common->status = KLU_OK ;
427  Common->numerical_rank = AMESOS2_KLU2_EMPTY ;
428  Common->singular_col = AMESOS2_KLU2_EMPTY ;
429 
430  /* ---------------------------------------------------------------------- */
431  /* get the contents of the Symbolic object */
432  /* ---------------------------------------------------------------------- */
433 
434  /* check for a valid Symbolic object */
435  if (Symbolic == NULL)
436  {
437  Common->status = KLU_INVALID ;
438  return (NULL) ;
439  }
440 
441  n = Symbolic->n ;
442  nzoff = Symbolic->nzoff ;
443  nblocks = Symbolic->nblocks ;
444  maxblock = Symbolic->maxblock ;
445 
446  PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n",
447  n, nzoff, nblocks, maxblock)) ;
448 
449  /* ---------------------------------------------------------------------- */
450  /* get control parameters and make sure they are in the proper range */
451  /* ---------------------------------------------------------------------- */
452 
453  Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
454  Common->initmem = MAX (1.0, Common->initmem) ;
455  Common->tol = MIN (Common->tol, 1.0) ;
456  Common->tol = MAX (0.0, Common->tol) ;
457  Common->memgrow = MAX (1.0, Common->memgrow) ;
458 
459  /* ---------------------------------------------------------------------- */
460  /* allocate the Numeric object */
461  /* ---------------------------------------------------------------------- */
462 
463  /* this will not cause size_t overflow (already checked by KLU_symbolic) */
464  n1 = ((size_t) n) + 1 ;
465  nzoff1 = ((size_t) nzoff) + 1 ;
466 
467  Numeric = (KLU_numeric<Entry, Int> *) KLU_malloc (sizeof (KLU_numeric<Entry, Int>), 1, Common) ;
468  if (Common->status < KLU_OK)
469  {
470  /* out of memory */
471  Common->status = KLU_OUT_OF_MEMORY ;
472  return (NULL) ;
473  }
474  Numeric->n = n ;
475  Numeric->nblocks = nblocks ;
476  Numeric->nzoff = nzoff ;
477  Numeric->Pnum = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
478  Numeric->Offp = (Int *) KLU_malloc (n1, sizeof (Int), Common) ;
479  Numeric->Offi = (Int *) KLU_malloc (nzoff1, sizeof (Int), Common) ;
480  Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
481 
482  Numeric->Lip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
483  Numeric->Uip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
484  Numeric->Llen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
485  Numeric->Ulen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
486 
487  Numeric->LUsize = (size_t *) KLU_malloc (nblocks, sizeof (size_t), Common) ;
488 
489  Numeric->LUbx = (void **) KLU_malloc (nblocks, sizeof (Unit *), Common) ;
490  if (Numeric->LUbx != NULL)
491  {
492  for (k = 0 ; k < nblocks ; k++)
493  {
494  Numeric->LUbx [k] = NULL ;
495  }
496  }
497 
498  Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
499 
500  if (Common->scale > 0)
501  {
502  Numeric->Rs = (double *) KLU_malloc (n, sizeof (double), Common) ;
503  }
504  else
505  {
506  /* no scaling */
507  Numeric->Rs = NULL ;
508  }
509 
510  Numeric->Pinv = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
511 
512  /* allocate permanent workspace for factorization and solve. Note that the
513  * solver will use an Xwork of size 4n, whereas the factorization codes use
514  * an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
515  * uses an Xwork of size 2n. Total size is:
516  *
517  * n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
518  */
519  s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
520  n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
521  b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
522  Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
523  Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
524  Numeric->Xwork = Numeric->Work ;
525  Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
526  if (!ok || Common->status < KLU_OK)
527  {
528  /* out of memory or problem too large */
529  Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
530  KLU_free_numeric (&Numeric, Common) ;
531  return (NULL) ;
532  }
533 
534  /* ---------------------------------------------------------------------- */
535  /* factorize the blocks */
536  /* ---------------------------------------------------------------------- */
537 
538  factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
539 
540  /* ---------------------------------------------------------------------- */
541  /* return or free the Numeric object */
542  /* ---------------------------------------------------------------------- */
543 
544  if (Common->status < KLU_OK)
545  {
546  /* out of memory or inputs invalid */
547  KLU_free_numeric (&Numeric, Common) ;
548  }
549  else if (Common->status == KLU_SINGULAR)
550  {
551  if (Common->halt_if_singular)
552  {
553  /* Matrix is singular, and the Numeric object is only partially
554  * defined because we halted early. This is the default case for
555  * a singular matrix. */
556  KLU_free_numeric (&Numeric, Common) ;
557  }
558  }
559  else if (Common->status == KLU_OK)
560  {
561  /* successful non-singular factorization */
562  Common->numerical_rank = n ;
563  Common->singular_col = n ;
564  }
565  return (Numeric) ;
566 }
567 
568 #endif
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
int Ap[]
Column offsets.
Definition: klu2_simple.cpp:52
int Ai[]
Row values.
Definition: klu2_simple.cpp:54