Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_kernel.hpp
1 /* ========================================================================== */
2 /* === KLU_kernel =========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // *****************************************************************************
6 // KLU2: A Direct Linear Solver package
7 //
8 // Copyright 2011 NTESS and the KLU2 contributors.
9 // SPDX-License-Identifier: LGPL-2.1-or-later
10 // *****************************************************************************
11 // @HEADER
12 
13 /* Sparse left-looking LU factorization, with partial pivoting. Based on
14  * Gilbert & Peierl's method, with a non-recursive DFS and with Eisenstat &
15  * Liu's symmetric pruning. No user-callable routines are in this file.
16  */
17 
18 #ifndef KLU2_KERNEL_HPP
19 #define KLU2_KERNEL_HPP
20 
21 #include "klu2_internal.h"
22 #include "klu2_memory.hpp"
23 
24 /* ========================================================================== */
25 /* === dfs ================================================================== */
26 /* ========================================================================== */
27 
28 /* Does a depth-first-search, starting at node j. */
29 
30 template <typename Int>
31 static Int dfs
32 (
33  /* input, not modified on output: */
34  Int j, /* node at which to start the DFS */
35  Int k, /* mark value, for the Flag array */
36  Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or AMESOS2_KLU2_EMPTY if
37  * row i is not yet pivotal. */
38  Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
39  Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
40 
41  /* workspace, not defined on input or output */
42  Int Stack [ ], /* size n */
43 
44  /* input/output: */
45  Int Flag [ ], /* Flag [i] == k means i is marked */
46  Int Lpend [ ], /* for symmetric pruning */
47  Int top, /* top of stack on input*/
48  Unit LU [],
49  Int *Lik, /* Li row index array of the kth column */
50  Int *plength,
51 
52  /* other, not defined on input or output */
53  Int Ap_pos [ ] /* keeps track of position in adj list during DFS */
54 )
55 {
56  Int i, pos, jnew, head, l_length ;
57  Int *Li ;
58 
59  l_length = *plength ;
60 
61  head = 0 ;
62  Stack [0] = j ;
63  ASSERT (Flag [j] != k) ;
64 
65  while (head >= 0)
66  {
67  j = Stack [head] ;
68  jnew = Pinv [j] ;
69  ASSERT (jnew >= 0 && jnew < k) ; /* j is pivotal */
70 
71  if (Flag [j] != k) /* a node is not yet visited */
72  {
73  /* first time that j has been visited */
74  Flag [j] = k ;
75  PRINTF (("[ start dfs at %d : new %d\n", j, jnew)) ;
76  /* set Ap_pos [head] to one past the last entry in col j to scan */
77  Ap_pos [head] =
78  (Lpend [jnew] == AMESOS2_KLU2_EMPTY) ? Llen [jnew] : Lpend [jnew] ;
79  }
80 
81  /* add the adjacent nodes to the recursive stack by iterating through
82  * until finding another non-visited pivotal node */
83  Li = (Int *) (LU + Lip [jnew]) ;
84  for (pos = --Ap_pos [head] ; pos >= 0 ; --pos)
85  {
86  i = Li [pos] ;
87  if (Flag [i] != k)
88  {
89  /* node i is not yet visited */
90  if (Pinv [i] >= 0)
91  {
92  /* keep track of where we left off in the scan of the
93  * adjacency list of node j so we can restart j where we
94  * left off. */
95  Ap_pos [head] = pos ;
96 
97  /* node i is pivotal; push it onto the recursive stack
98  * and immediately break so we can recurse on node i. */
99  Stack [++head] = i ;
100  break ;
101  }
102  else
103  {
104  /* node i is not pivotal (no outgoing edges). */
105  /* Flag as visited and store directly into L,
106  * and continue with current node j. */
107  Flag [i] = k ;
108  Lik [l_length] = i ;
109  l_length++ ;
110  }
111  }
112  }
113 
114  if (pos == -1)
115  {
116  /* if all adjacent nodes of j are already visited, pop j from
117  * recursive stack and push j onto output stack */
118  head-- ;
119  Stack[--top] = j ;
120  PRINTF ((" end dfs at %d ] head : %d\n", j, head)) ;
121  }
122  }
123 
124  *plength = l_length ;
125  return (top) ;
126 }
127 
128 
129 /* ========================================================================== */
130 /* === lsolve_symbolic ====================================================== */
131 /* ========================================================================== */
132 
133 /* Finds the pattern of x, for the solution of Lx=b */
134 
135 template <typename Int>
136 static Int lsolve_symbolic
137 (
138  /* input, not modified on output: */
139  Int n, /* L is n-by-n, where n >= 0 */
140  Int k, /* also used as the mark value, for the Flag array */
141  Int Ap [ ],
142  Int Ai [ ],
143  Int Q [ ],
144  Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or AMESOS2_KLU2_EMPTY if row i
145  * is not yet pivotal. */
146 
147  /* workspace, not defined on input or output */
148  Int Stack [ ], /* size n */
149 
150  /* workspace, defined on input and output */
151  Int Flag [ ], /* size n. Initially, all of Flag [0..n-1] < k. After
152  * lsolve_symbolic is done, Flag [i] == k if i is in
153  * the pattern of the output, and Flag [0..n-1] <= k. */
154 
155  /* other */
156  Int Lpend [ ], /* for symmetric pruning */
157  Int Ap_pos [ ], /* workspace used in dfs */
158 
159  Unit LU [ ], /* LU factors (pattern and values) */
160  Int lup, /* pointer to free space in LU */
161  Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
162  Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
163 
164  /* ---- the following are only used in the BTF case --- */
165 
166  Int k1, /* the block of A is from k1 to k2-1 */
167  Int PSinv [ ] /* inverse of P from symbolic factorization */
168 )
169 {
170  Int *Lik ;
171  Int i, p, pend, oldcol, kglobal, top, l_length ;
172 
173  top = n ;
174  l_length = 0 ;
175  Lik = (Int *) (LU + lup);
176 
177  /* ---------------------------------------------------------------------- */
178  /* BTF factorization of A (k1:k2-1, k1:k2-1) */
179  /* ---------------------------------------------------------------------- */
180 
181  kglobal = k + k1 ; /* column k of the block is col kglobal of A */
182  oldcol = Q [kglobal] ; /* Q must be present for BTF case */
183  pend = Ap [oldcol+1] ;
184  for (p = Ap [oldcol] ; p < pend ; p++)
185  {
186  i = PSinv [Ai [p]] - k1 ;
187  if (i < 0) continue ; /* skip entry outside the block */
188 
189  /* (i,k) is an entry in the block. start a DFS at node i */
190  PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
191  if (Flag [i] != k)
192  {
193  if (Pinv [i] >= 0)
194  {
195  top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
196  Lpend, top, LU, Lik, &l_length, Ap_pos) ;
197  }
198  else
199  {
200  /* i is not pivotal, and not flagged. Flag and put in L */
201  Flag [i] = k ;
202  Lik [l_length] = i ;
203  l_length++;
204  }
205  }
206  }
207 
208  /* If Llen [k] is zero, the matrix is structurally singular */
209  Llen [k] = l_length ;
210  return (top) ;
211 }
212 
213 
214 /* ========================================================================== */
215 /* === construct_column ===================================================== */
216 /* ========================================================================== */
217 
218 /* Construct the kth column of A, and the off-diagonal part, if requested.
219  * Scatter the numerical values into the workspace X, and construct the
220  * corresponding column of the off-diagonal matrix. */
221 
222 template <typename Entry, typename Int>
223 static void construct_column
224 (
225  /* inputs, not modified on output */
226  Int k, /* the column of A (or the column of the block) to get */
227  Int Ap [ ],
228  Int Ai [ ],
229  Entry Ax [ ],
230  Int Q [ ], /* column pre-ordering */
231 
232  /* zero on input, modified on output */
233  Entry X [ ],
234 
235  /* ---- the following are only used in the BTF case --- */
236 
237  /* inputs, not modified on output */
238  Int k1, /* the block of A is from k1 to k2-1 */
239  Int PSinv [ ], /* inverse of P from symbolic factorization */
240  double Rs [ ], /* scale factors for A */
241  Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */
242 
243  /* inputs, modified on output */
244  Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
245  Int Offi [ ],
246  Entry Offx [ ]
247 )
248 {
249  Entry aik ;
250  Int i, p, pend, oldcol, kglobal, poff, oldrow ;
251 
252  /* ---------------------------------------------------------------------- */
253  /* Scale and scatter the column into X. */
254  /* ---------------------------------------------------------------------- */
255 
256  kglobal = k + k1 ; /* column k of the block is col kglobal of A */
257  poff = Offp [kglobal] ; /* start of off-diagonal column */
258  oldcol = Q [kglobal] ;
259  pend = Ap [oldcol+1] ;
260 
261  if (scale <= 0)
262  {
263  /* no scaling */
264  for (p = Ap [oldcol] ; p < pend ; p++)
265  {
266  oldrow = Ai [p] ;
267  i = PSinv [oldrow] - k1 ;
268  aik = Ax [p] ;
269  if (i < 0)
270  {
271  /* this is an entry in the off-diagonal part */
272  Offi [poff] = oldrow ;
273  Offx [poff] = aik ;
274  poff++ ;
275  }
276  else
277  {
278  /* (i,k) is an entry in the block. scatter into X */
279  X [i] = aik ;
280  }
281  }
282  }
283  else
284  {
285  /* row scaling */
286  for (p = Ap [oldcol] ; p < pend ; p++)
287  {
288  oldrow = Ai [p] ;
289  i = PSinv [oldrow] - k1 ;
290  aik = Ax [p] ;
291  SCALE_DIV (aik, Rs [oldrow]) ;
292  if (i < 0)
293  {
294  /* this is an entry in the off-diagonal part */
295  Offi [poff] = oldrow ;
296  Offx [poff] = aik ;
297  poff++ ;
298  }
299  else
300  {
301  /* (i,k) is an entry in the block. scatter into X */
302  X [i] = aik ;
303  }
304  }
305  }
306 
307  Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */
308 }
309 
310 
311 /* ========================================================================== */
312 /* === lsolve_numeric ======================================================= */
313 /* ========================================================================== */
314 
315 /* Computes the numerical values of x, for the solution of Lx=b. Note that x
316  * may include explicit zeros if numerical cancelation occurs. L is assumed
317  * to be unit-diagonal, with possibly unsorted columns (but the first entry in
318  * the column must always be the diagonal entry). */
319 
320 template <typename Entry, typename Int>
321 static void lsolve_numeric
322 (
323  /* input, not modified on output: */
324  Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or AMESOS2_KLU2_EMPTY if row i
325  * is not yet pivotal. */
326  Unit *LU, /* LU factors (pattern and values) */
327  Int Stack [ ], /* stack for dfs */
328  Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
329  Int top, /* top of stack on input */
330  Int n, /* A is n-by-n */
331  Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
332 
333  /* output, must be zero on input: */
334  Entry X [ ] /* size n, initially zero. On output,
335  * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]]
336  * contains the solution. */
337 
338 )
339 {
340  Entry xj ;
341  Entry *Lx ;
342  Int *Li ;
343  Int p, s, j, jnew, len ;
344 
345  /* solve Lx=b */
346  for (s = top ; s < n ; s++)
347  {
348  /* forward solve with column j of L */
349  j = Stack [s] ;
350  jnew = Pinv [j] ;
351  ASSERT (jnew >= 0) ;
352  xj = X [j] ;
353  GET_POINTER (LU, Lip, Llen, Li, Lx, jnew, len) ;
354  ASSERT (Lip [jnew] <= Lip [jnew+1]) ;
355  for (p = 0 ; p < len ; p++)
356  {
357  /*X [Li [p]] -= Lx [p] * xj ; */
358  MULT_SUB (X [Li [p]], Lx [p], xj) ;
359  }
360  }
361 }
362 
363 
364 /* ========================================================================== */
365 /* === lpivot =============================================================== */
366 /* ========================================================================== */
367 
368 /* Find a pivot via partial pivoting, and scale the column of L. */
369 
370 template <typename Entry, typename Int>
371 static Int lpivot
372 (
373  Int diagrow,
374  Int *p_pivrow,
375  Entry *p_pivot,
376  double *p_abs_pivot,
377  double tol,
378  Entry X [ ],
379  Unit *LU, /* LU factors (pattern and values) */
380  Int Lip [ ],
381  Int Llen [ ],
382  Int k,
383  Int n,
384 
385  Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or AMESOS2_KLU2_EMPTY if
386  * row i is not yet pivotal. */
387 
388  Int *p_firstrow,
389  KLU_common<Entry, Int> *Common
390 )
391 {
392  Entry x, pivot, *Lx ;
393  double abs_pivot, xabs ;
394  Int p, i, ppivrow, pdiag, pivrow, *Li, last_row_index, firstrow, len ;
395 
396  pivrow = AMESOS2_KLU2_EMPTY ;
397  if (Llen [k] == 0)
398  {
399  /* matrix is structurally singular */
400  if (Common->halt_if_singular)
401  {
402  return (FALSE) ;
403  }
404  for (firstrow = *p_firstrow ; firstrow < n ; firstrow++)
405  {
406  PRINTF (("check %d\n", firstrow)) ;
407  if (Pinv [firstrow] < 0)
408  {
409  /* found the lowest-numbered non-pivotal row. Pick it. */
410  pivrow = firstrow ;
411  PRINTF (("Got pivotal row: %d\n", pivrow)) ;
412  break ;
413  }
414  }
415  ASSERT (pivrow >= 0 && pivrow < n) ;
416  CLEAR (pivot) ;
417  *p_pivrow = pivrow ;
418  *p_pivot = pivot ;
419  *p_abs_pivot = 0 ;
420  *p_firstrow = firstrow ;
421  return (FALSE) ;
422  }
423 
424  pdiag = AMESOS2_KLU2_EMPTY ;
425  ppivrow = AMESOS2_KLU2_EMPTY ;
426  abs_pivot = AMESOS2_KLU2_EMPTY ;
427  i = Llen [k] - 1 ;
428  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
429  last_row_index = Li [i] ;
430 
431  /* decrement the length by 1 */
432  Llen [k] = i ;
433  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
434 
435  /* look in Li [0 ..Llen [k] - 1 ] for a pivot row */
436  for (p = 0 ; p < len ; p++)
437  {
438  /* gather the entry from X and store in L */
439  i = Li [p] ;
440  x = X [i] ;
441  CLEAR (X [i]) ;
442 
443  Lx [p] = x ;
444  /* xabs = ABS (x) ; */
445  KLU2_ABS (xabs, x) ;
446 
447  /* find the diagonal */
448  if (i == diagrow)
449  {
450  pdiag = p ;
451  }
452 
453  /* find the partial-pivoting choice */
454  if (xabs > abs_pivot)
455  {
456  abs_pivot = xabs ;
457  ppivrow = p ;
458  }
459  }
460 
461  /* xabs = ABS (X [last_row_index]) ;*/
462  KLU2_ABS (xabs, X [last_row_index]) ;
463  if (xabs > abs_pivot)
464  {
465  abs_pivot = xabs ;
466  ppivrow = AMESOS2_KLU2_EMPTY ;
467  }
468 
469  /* compare the diagonal with the largest entry */
470  if (last_row_index == diagrow)
471  {
472  if (xabs >= tol * abs_pivot)
473  {
474  abs_pivot = xabs ;
475  ppivrow = AMESOS2_KLU2_EMPTY ;
476  }
477  }
478  else if (pdiag != AMESOS2_KLU2_EMPTY)
479  {
480  /* xabs = ABS (Lx [pdiag]) ;*/
481  KLU2_ABS (xabs, Lx [pdiag]) ;
482  if (xabs >= tol * abs_pivot)
483  {
484  /* the diagonal is large enough */
485  abs_pivot = xabs ;
486  ppivrow = pdiag ;
487  }
488  }
489 
490  if (ppivrow != AMESOS2_KLU2_EMPTY)
491  {
492  pivrow = Li [ppivrow] ;
493  pivot = Lx [ppivrow] ;
494  /* overwrite the ppivrow values with last index values */
495  Li [ppivrow] = last_row_index ;
496  Lx [ppivrow] = X [last_row_index] ;
497  }
498  else
499  {
500  pivrow = last_row_index ;
501  pivot = X [last_row_index] ;
502  }
503  CLEAR (X [last_row_index]) ;
504 
505  *p_pivrow = pivrow ;
506  *p_pivot = pivot ;
507  *p_abs_pivot = abs_pivot ;
508  ASSERT (pivrow >= 0 && pivrow < n) ;
509 
510  if (IS_ZERO (pivot) && Common->halt_if_singular)
511  {
512  /* numerically singular case */
513  return (FALSE) ;
514  }
515 
516  /* divide L by the pivot value */
517  for (p = 0 ; p < Llen [k] ; p++)
518  {
519  /* Lx [p] /= pivot ; */
520  DIV (Lx [p], Lx [p], pivot) ;
521  }
522 
523  return (TRUE) ;
524 }
525 
526 
527 /* ========================================================================== */
528 /* === prune ================================================================ */
529 /* ========================================================================== */
530 
531 /* Prune the columns of L to reduce work in subsequent depth-first searches */
532 template <typename Entry, typename Int>
533 static void prune
534 (
535  /* input/output: */
536  Int Lpend [ ], /* Lpend [j] marks symmetric pruning point for L(:,j) */
537 
538  /* input: */
539  Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or AMESOS2_KLU2_EMPTY if
540  * row i is not yet pivotal. */
541  Int k, /* prune using column k of U */
542  Int pivrow, /* current pivot row */
543 
544  /* input/output: */
545  Unit *LU, /* LU factors (pattern and values) */
546 
547  /* input */
548  Int Uip [ ], /* size n, column pointers for U */
549  Int Lip [ ], /* size n, column pointers for L */
550  Int Ulen [ ], /* size n, column length of U */
551  Int Llen [ ] /* size n, column length of L */
552 )
553 {
554  Entry x ;
555  Entry *Lx, *Ux ;
556  Int *Li, *Ui ;
557  Int p, i, j, p2, phead, ptail, llen, ulen ;
558 
559  /* check to see if any column of L can be pruned */
560  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
561 
562  // Try not to warn about Ux never being used
563  (void) Ux;
564  for (p = 0 ; p < ulen ; p++)
565  {
566  j = Ui [p] ;
567  ASSERT (j < k) ;
568  PRINTF (("%d is pruned: %d. Lpend[j] %d Lip[j+1] %d\n",
569  j, Lpend [j] != AMESOS2_KLU2_EMPTY, Lpend [j], Lip [j+1])) ;
570  if (Lpend [j] == AMESOS2_KLU2_EMPTY)
571  {
572  /* scan column j of L for the pivot row */
573  GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
574  for (p2 = 0 ; p2 < llen ; p2++)
575  {
576  if (pivrow == Li [p2])
577  {
578  /* found it! This column can be pruned */
579 #ifndef NDEBUGKLU2
580  PRINTF (("==== PRUNE: col j %d of L\n", j)) ;
581  {
582  Int p3 ;
583  for (p3 = 0 ; p3 < Llen [j] ; p3++)
584  {
585  PRINTF (("before: %i pivotal: %d\n", Li [p3],
586  Pinv [Li [p3]] >= 0)) ;
587  }
588  }
589 #endif
590 
591  /* partition column j of L. The unit diagonal of L
592  * is not stored in the column of L. */
593  phead = 0 ;
594  ptail = Llen [j] ;
595  while (phead < ptail)
596  {
597  i = Li [phead] ;
598  if (Pinv [i] >= 0)
599  {
600  /* leave at the head */
601  phead++ ;
602  }
603  else
604  {
605  /* swap with the tail */
606  ptail-- ;
607  Li [phead] = Li [ptail] ;
608  Li [ptail] = i ;
609  x = Lx [phead] ;
610  Lx [phead] = Lx [ptail] ;
611  Lx [ptail] = x ;
612  }
613  }
614 
615  /* set Lpend to one past the last entry in the
616  * first part of the column of L. Entries in
617  * Li [0 ... Lpend [j]-1] are the only part of
618  * column j of L that needs to be scanned in the DFS.
619  * Lpend [j] was AMESOS2_KLU2_EMPTY; setting it >= 0 also flags
620  * column j as pruned. */
621  Lpend [j] = ptail ;
622 
623 #ifndef NDEBUGKLU2
624  {
625  Int p3 ;
626  for (p3 = 0 ; p3 < Llen [j] ; p3++)
627  {
628  if (p3 == Lpend [j]) PRINTF (("----\n")) ;
629  PRINTF (("after: %i pivotal: %d\n", Li [p3],
630  Pinv [Li [p3]] >= 0)) ;
631  }
632  }
633 #endif
634 
635  break ;
636  }
637  }
638  }
639  }
640 }
641 
642 
643 /* ========================================================================== */
644 /* === KLU_kernel =========================================================== */
645 /* ========================================================================== */
646 
647 template <typename Entry, typename Int>
648 size_t KLU_kernel /* final size of LU on output */
649 (
650  /* input, not modified */
651  Int n, /* A is n-by-n */
652  Int Ap [ ], /* size n+1, column pointers for A */
653  Int Ai [ ], /* size nz = Ap [n], row indices for A */
654  Entry Ax [ ], /* size nz, values of A */
655  Int Q [ ], /* size n, optional input permutation */
656  size_t lusize, /* initial size of LU on input */
657 
658  /* output, not defined on input */
659  Int Pinv [ ], /* size n, inverse row permutation, where Pinv [i] = k if
660  * row i is the kth pivot row */
661  Int P [ ], /* size n, row permutation, where P [k] = i if row i is the
662  * kth pivot row. */
663  Unit **p_LU, /* LU array, size lusize on input */
664  Entry Udiag [ ], /* size n, diagonal of U */
665  Int Llen [ ], /* size n, column length of L */
666  Int Ulen [ ], /* size n, column length of U */
667  Int Lip [ ], /* size n, column pointers for L */
668  Int Uip [ ], /* size n, column pointers for U */
669  Int *lnz, /* size of L*/
670  Int *unz, /* size of U*/
671  /* workspace, not defined on input */
672  Entry X [ ], /* size n, undefined on input, zero on output */
673 
674  /* workspace, not defined on input or output */
675  Int Stack [ ], /* size n */
676  Int Flag [ ], /* size n */
677  Int Ap_pos [ ], /* size n */
678 
679  /* other workspace: */
680  Int Lpend [ ], /* size n workspace, for pruning only */
681 
682  /* inputs, not modified on output */
683  Int k1, /* the block of A is from k1 to k2-1 */
684  Int PSinv [ ], /* inverse of P from symbolic factorization */
685  double Rs [ ], /* scale factors for A */
686 
687  /* inputs, modified on output */
688  Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
689  Int Offi [ ],
690  Entry Offx [ ],
691  /* --------------- */
692  KLU_common<Entry, Int> *Common
693 )
694 {
695  Entry pivot ;
696  double abs_pivot, xsize, nunits, tol, memgrow ;
697  Entry *Ux ;
698  Int *Li, *Ui ;
699  Unit *LU ; /* LU factors (pattern and values) */
700  Int k, p, i, j, pivrow = 0, kbar, diagrow, firstrow, lup, top, scale, len ;
701  size_t newlusize ;
702 
703 #ifndef NDEBUGKLU2
704  Entry *Lx ;
705 #endif
706 
707  ASSERT (Common != NULL) ;
708  scale = Common->scale ;
709  tol = Common->tol ;
710  memgrow = Common->memgrow ;
711  *lnz = 0 ;
712  *unz = 0 ;
713  CLEAR (pivot) ;
714 
715  /* ---------------------------------------------------------------------- */
716  /* get initial Li, Lx, Ui, and Ux */
717  /* ---------------------------------------------------------------------- */
718 
719  PRINTF (("input: lusize %d \n", lusize)) ;
720  ASSERT (lusize > 0) ;
721  LU = *p_LU ;
722 
723  /* ---------------------------------------------------------------------- */
724  /* initializations */
725  /* ---------------------------------------------------------------------- */
726 
727  firstrow = 0 ;
728  lup = 0 ;
729 
730  for (k = 0 ; k < n ; k++)
731  {
732  /* X [k] = 0 ; */
733  CLEAR (X [k]) ;
734  Flag [k] = AMESOS2_KLU2_EMPTY ;
735  Lpend [k] = AMESOS2_KLU2_EMPTY ; /* flag k as not pruned */
736  }
737 
738  /* ---------------------------------------------------------------------- */
739  /* mark all rows as non-pivotal and determine initial diagonal mapping */
740  /* ---------------------------------------------------------------------- */
741 
742  /* PSinv does the symmetric permutation, so don't do it here */
743  for (k = 0 ; k < n ; k++)
744  {
745  P [k] = k ;
746  Pinv [k] = FLIP (k) ; /* mark all rows as non-pivotal */
747  }
748  /* initialize the construction of the off-diagonal matrix */
749  Offp [0] = 0 ;
750 
751  /* P [k] = row means that UNFLIP (Pinv [row]) = k, and visa versa.
752  * If row is pivotal, then Pinv [row] >= 0. A row is initially "flipped"
753  * (Pinv [k] < AMESOS2_KLU2_EMPTY), and then marked "unflipped" when it becomes
754  * pivotal. */
755 
756 #ifndef NDEBUGKLU2
757  for (k = 0 ; k < n ; k++)
758  {
759  PRINTF (("Initial P [%d] = %d\n", k, P [k])) ;
760  }
761 #endif
762 
763  /* ---------------------------------------------------------------------- */
764  /* factorize */
765  /* ---------------------------------------------------------------------- */
766 
767  for (k = 0 ; k < n ; k++)
768  {
769 
770  PRINTF (("\n\n==================================== k: %d\n", k)) ;
771 
772  /* ------------------------------------------------------------------ */
773  /* determine if LU factors have grown too big */
774  /* ------------------------------------------------------------------ */
775 
776  /* (n - k) entries for L and k entries for U */
777  nunits = DUNITS (Int, n - k) + DUNITS (Int, k) +
778  DUNITS (Entry, n - k) + DUNITS (Entry, k) ;
779 
780  /* LU can grow by at most 'nunits' entries if the column is dense */
781  PRINTF (("lup %d lusize %g lup+nunits: %g\n", lup, (double) lusize,
782  lup+nunits));
783  xsize = ((double) lup) + nunits ;
784  if (xsize > (double) lusize)
785  {
786  /* check here how much to grow */
787  xsize = (memgrow * ((double) lusize) + 4*n + 1) ;
788  if (INT_OVERFLOW (xsize))
789  {
790  PRINTF (("Matrix is too large (Int overflow)\n")) ;
791  Common->status = KLU_TOO_LARGE ;
792  return (lusize) ;
793  }
794  newlusize = (size_t) (memgrow * lusize + 2*n + 1) ;
795  /* Future work: retry mechanism in case of malloc failure */
796  LU = (Unit *) KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
797  Common->nrealloc++ ;
798  *p_LU = LU ;
799  if (Common->status == KLU_OUT_OF_MEMORY)
800  {
801  PRINTF (("Matrix is too large (LU)\n")) ;
802  return (lusize) ;
803  }
804  lusize = newlusize ;
805  PRINTF (("inc LU to %d done\n", lusize)) ;
806  }
807 
808  /* ------------------------------------------------------------------ */
809  /* start the kth column of L and U */
810  /* ------------------------------------------------------------------ */
811 
812  Lip [k] = lup ;
813 
814  /* ------------------------------------------------------------------ */
815  /* compute the nonzero pattern of the kth column of L and U */
816  /* ------------------------------------------------------------------ */
817 
818 #ifndef NDEBUGKLU2
819  for (i = 0 ; i < n ; i++)
820  {
821  ASSERT (Flag [i] < k) ;
822  /* ASSERT (X [i] == 0) ; */
823  ASSERT (IS_ZERO (X [i])) ;
824  }
825 #endif
826 
827  top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag,
828  Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ;
829 
830 #ifndef NDEBUGKLU2
831  PRINTF (("--- in U:\n")) ;
832  for (p = top ; p < n ; p++)
833  {
834  PRINTF (("pattern of X for U: %d : %d pivot row: %d\n",
835  p, Stack [p], Pinv [Stack [p]])) ;
836  ASSERT (Flag [Stack [p]] == k) ;
837  }
838  PRINTF (("--- in L:\n")) ;
839  Li = (Int *) (LU + Lip [k]);
840  for (p = 0 ; p < Llen [k] ; p++)
841  {
842  PRINTF (("pattern of X in L: %d : %d pivot row: %d\n",
843  p, Li [p], Pinv [Li [p]])) ;
844  ASSERT (Flag [Li [p]] == k) ;
845  }
846  p = 0 ;
847  for (i = 0 ; i < n ; i++)
848  {
849  ASSERT (Flag [i] <= k) ;
850  if (Flag [i] == k) p++ ;
851  }
852 #endif
853 
854  /* ------------------------------------------------------------------ */
855  /* get the column of the matrix to factorize and scatter into X */
856  /* ------------------------------------------------------------------ */
857 
858  construct_column <Entry> (k, Ap, Ai, Ax, Q, X,
859  k1, PSinv, Rs, scale, Offp, Offi, Offx) ;
860 
861  /* ------------------------------------------------------------------ */
862  /* compute the numerical values of the kth column (s = L \ A (:,k)) */
863  /* ------------------------------------------------------------------ */
864 
865  lsolve_numeric <Entry> (Pinv, LU, Stack, Lip, top, n, Llen, X) ;
866 
867 #ifndef NDEBUGKLU2
868  for (p = top ; p < n ; p++)
869  {
870  PRINTF (("X for U %d : ", Stack [p])) ;
871  PRINT_ENTRY (X [Stack [p]]) ;
872  }
873  Li = (Int *) (LU + Lip [k]) ;
874  for (p = 0 ; p < Llen [k] ; p++)
875  {
876  PRINTF (("X for L %d : ", Li [p])) ;
877  PRINT_ENTRY (X [Li [p]]) ;
878  }
879 #endif
880 
881  /* ------------------------------------------------------------------ */
882  /* partial pivoting with diagonal preference */
883  /* ------------------------------------------------------------------ */
884 
885  /* determine what the "diagonal" is */
886  diagrow = P [k] ; /* might already be pivotal */
887  PRINTF (("k %d, diagrow = %d, UNFLIP (diagrow) = %d\n",
888  k, diagrow, UNFLIP (diagrow))) ;
889 
890  /* find a pivot and scale the pivot column */
891  if (!lpivot <Entry> (diagrow, &pivrow, &pivot, &abs_pivot, tol, X, LU, Lip,
892  Llen, k, n, Pinv, &firstrow, Common))
893  {
894  /* matrix is structurally or numerically singular */
895  Common->status = KLU_SINGULAR ;
896  if (Common->numerical_rank == AMESOS2_KLU2_EMPTY)
897  {
898  Common->numerical_rank = k+k1 ;
899  Common->singular_col = Q [k+k1] ;
900  }
901  if (Common->halt_if_singular)
902  {
903  /* do not continue the factorization */
904  return (lusize) ;
905  }
906  }
907 
908  /* we now have a valid pivot row, even if the column has NaN's or
909  * has no entries on or below the diagonal at all. */
910  PRINTF (("\nk %d : Pivot row %d : ", k, pivrow)) ;
911  PRINT_ENTRY (pivot) ;
912  ASSERT (pivrow >= 0 && pivrow < n) ;
913  ASSERT (Pinv [pivrow] < 0) ;
914 
915  /* set the Uip pointer */
916  Uip [k] = Lip [k] + UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
917 
918  /* move the lup pointer to the position where indices of U
919  * should be stored */
920  lup += UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
921 
922  Ulen [k] = n - top ;
923 
924  /* extract Stack [top..n-1] to Ui and the values to Ux and clear X */
925  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
926  for (p = top, i = 0 ; p < n ; p++, i++)
927  {
928  j = Stack [p] ;
929  Ui [i] = Pinv [j] ;
930  Ux [i] = X [j] ;
931  CLEAR (X [j]) ;
932  }
933 
934  /* position the lu index at the starting point for next column */
935  lup += UNITS (Int, Ulen [k]) + UNITS (Entry, Ulen [k]) ;
936 
937  /* U(k,k) = pivot */
938  Udiag [k] = pivot ;
939 
940  /* ------------------------------------------------------------------ */
941  /* log the pivot permutation */
942  /* ------------------------------------------------------------------ */
943 
944  ASSERT (UNFLIP (Pinv [diagrow]) < n) ;
945  ASSERT (P [UNFLIP (Pinv [diagrow])] == diagrow) ;
946 
947  if (pivrow != diagrow)
948  {
949  /* an off-diagonal pivot has been chosen */
950  Common->noffdiag++ ;
951  PRINTF ((">>>>>>>>>>>>>>>>> pivrow %d k %d off-diagonal\n",
952  pivrow, k)) ;
953  if (Pinv [diagrow] < 0)
954  {
955  /* the former diagonal row index, diagrow, has not yet been
956  * chosen as a pivot row. Log this diagrow as the "diagonal"
957  * entry in the column kbar for which the chosen pivot row,
958  * pivrow, was originally logged as the "diagonal" */
959  kbar = FLIP (Pinv [pivrow]) ;
960  P [kbar] = diagrow ;
961  Pinv [diagrow] = FLIP (kbar) ;
962  }
963  }
964  P [k] = pivrow ;
965  Pinv [pivrow] = k ;
966 
967 #ifndef NDEBUGKLU2
968  for (i = 0 ; i < n ; i++) { ASSERT (IS_ZERO (X [i])) ;}
969  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
970  for (p = 0 ; p < len ; p++)
971  {
972  PRINTF (("Column %d of U: %d : ", k, Ui [p])) ;
973  PRINT_ENTRY (Ux [p]) ;
974  }
975  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
976  for (p = 0 ; p < len ; p++)
977  {
978  PRINTF (("Column %d of L: %d : ", k, Li [p])) ;
979  PRINT_ENTRY (Lx [p]) ;
980  }
981 #endif
982 
983  /* ------------------------------------------------------------------ */
984  /* symmetric pruning */
985  /* ------------------------------------------------------------------ */
986 
987  prune<Entry> (Lpend, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ;
988 
989  *lnz += Llen [k] + 1 ; /* 1 added to lnz for diagonal */
990  *unz += Ulen [k] + 1 ; /* 1 added to unz for diagonal */
991  }
992 
993  /* ---------------------------------------------------------------------- */
994  /* finalize column pointers for L and U, and put L in the pivotal order */
995  /* ---------------------------------------------------------------------- */
996 
997  for (p = 0 ; p < n ; p++)
998  {
999  Li = (Int *) (LU + Lip [p]) ;
1000  for (i = 0 ; i < Llen [p] ; i++)
1001  {
1002  Li [i] = Pinv [Li [i]] ;
1003  }
1004  }
1005 
1006 #ifndef NDEBUGKLU2
1007  for (i = 0 ; i < n ; i++)
1008  {
1009  PRINTF (("P [%d] = %d Pinv [%d] = %d\n", i, P [i], i, Pinv [i])) ;
1010  }
1011  for (i = 0 ; i < n ; i++)
1012  {
1013  ASSERT (Pinv [i] >= 0 && Pinv [i] < n) ;
1014  ASSERT (P [i] >= 0 && P [i] < n) ;
1015  ASSERT (P [Pinv [i]] == i) ;
1016  ASSERT (IS_ZERO (X [i])) ;
1017  }
1018 #endif
1019 
1020  /* ---------------------------------------------------------------------- */
1021  /* shrink the LU factors to just the required size */
1022  /* ---------------------------------------------------------------------- */
1023 
1024  newlusize = lup ;
1025  ASSERT ((size_t) newlusize <= lusize) ;
1026 
1027  /* this cannot fail, since the block is descreasing in size */
1028  LU = (Unit *) KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
1029  *p_LU = LU ;
1030  return (newlusize) ;
1031 }
1032 
1033 #endif
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
int Ap[]
Column offsets.
Definition: klu2_simple.cpp:52
int Ai[]
Row values.
Definition: klu2_simple.cpp:54