Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_kernel.hpp
1 /* ========================================================================== */
2 /* === KLU_kernel =========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // ***********************************************************************
6 //
7 // KLU2: A Direct Linear Solver package
8 // Copyright 2011 Sandia Corporation
9 //
10 // Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11 // U.S. Government retains certain rights in this software.
12 //
13 // This library is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU Lesser General Public License as
15 // published by the Free Software Foundation; either version 2.1 of the
16 // License, or (at your option) any later version.
17 //
18 // This library is distributed in the hope that it will be useful, but
19 // WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 // Lesser General Public License for more details.
22 //
23 // You should have received a copy of the GNU Lesser General Public
24 // License along with this library; if not, write to the Free Software
25 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 // USA
27 // Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28 //
29 // KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30 // University of Florida. The Authors of KLU are Timothy A. Davis and
31 // Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32 // information for KLU.
33 //
34 // ***********************************************************************
35 // @HEADER
36 
37 /* Sparse left-looking LU factorization, with partial pivoting. Based on
38  * Gilbert & Peierl's method, with a non-recursive DFS and with Eisenstat &
39  * Liu's symmetric pruning. No user-callable routines are in this file.
40  */
41 
42 #ifndef KLU2_KERNEL_HPP
43 #define KLU2_KERNEL_HPP
44 
45 #include "klu2_internal.h"
46 #include "klu2_memory.hpp"
47 
48 /* ========================================================================== */
49 /* === dfs ================================================================== */
50 /* ========================================================================== */
51 
52 /* Does a depth-first-search, starting at node j. */
53 
54 template <typename Int>
55 static Int dfs
56 (
57  /* input, not modified on output: */
58  Int j, /* node at which to start the DFS */
59  Int k, /* mark value, for the Flag array */
60  Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or EMPTY if
61  * row i is not yet pivotal. */
62  Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
63  Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
64 
65  /* workspace, not defined on input or output */
66  Int Stack [ ], /* size n */
67 
68  /* input/output: */
69  Int Flag [ ], /* Flag [i] == k means i is marked */
70  Int Lpend [ ], /* for symmetric pruning */
71  Int top, /* top of stack on input*/
72  Unit LU [],
73  Int *Lik, /* Li row index array of the kth column */
74  Int *plength,
75 
76  /* other, not defined on input or output */
77  Int Ap_pos [ ] /* keeps track of position in adj list during DFS */
78 )
79 {
80  Int i, pos, jnew, head, l_length ;
81  Int *Li ;
82 
83  l_length = *plength ;
84 
85  head = 0 ;
86  Stack [0] = j ;
87  ASSERT (Flag [j] != k) ;
88 
89  while (head >= 0)
90  {
91  j = Stack [head] ;
92  jnew = Pinv [j] ;
93  ASSERT (jnew >= 0 && jnew < k) ; /* j is pivotal */
94 
95  if (Flag [j] != k) /* a node is not yet visited */
96  {
97  /* first time that j has been visited */
98  Flag [j] = k ;
99  PRINTF (("[ start dfs at %d : new %d\n", j, jnew)) ;
100  /* set Ap_pos [head] to one past the last entry in col j to scan */
101  Ap_pos [head] =
102  (Lpend [jnew] == EMPTY) ? Llen [jnew] : Lpend [jnew] ;
103  }
104 
105  /* add the adjacent nodes to the recursive stack by iterating through
106  * until finding another non-visited pivotal node */
107  Li = (Int *) (LU + Lip [jnew]) ;
108  for (pos = --Ap_pos [head] ; pos >= 0 ; --pos)
109  {
110  i = Li [pos] ;
111  if (Flag [i] != k)
112  {
113  /* node i is not yet visited */
114  if (Pinv [i] >= 0)
115  {
116  /* keep track of where we left off in the scan of the
117  * adjacency list of node j so we can restart j where we
118  * left off. */
119  Ap_pos [head] = pos ;
120 
121  /* node i is pivotal; push it onto the recursive stack
122  * and immediately break so we can recurse on node i. */
123  Stack [++head] = i ;
124  break ;
125  }
126  else
127  {
128  /* node i is not pivotal (no outgoing edges). */
129  /* Flag as visited and store directly into L,
130  * and continue with current node j. */
131  Flag [i] = k ;
132  Lik [l_length] = i ;
133  l_length++ ;
134  }
135  }
136  }
137 
138  if (pos == -1)
139  {
140  /* if all adjacent nodes of j are already visited, pop j from
141  * recursive stack and push j onto output stack */
142  head-- ;
143  Stack[--top] = j ;
144  PRINTF ((" end dfs at %d ] head : %d\n", j, head)) ;
145  }
146  }
147 
148  *plength = l_length ;
149  return (top) ;
150 }
151 
152 
153 /* ========================================================================== */
154 /* === lsolve_symbolic ====================================================== */
155 /* ========================================================================== */
156 
157 /* Finds the pattern of x, for the solution of Lx=b */
158 
159 template <typename Int>
160 static Int lsolve_symbolic
161 (
162  /* input, not modified on output: */
163  Int n, /* L is n-by-n, where n >= 0 */
164  Int k, /* also used as the mark value, for the Flag array */
165  Int Ap [ ],
166  Int Ai [ ],
167  Int Q [ ],
168  Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i
169  * is not yet pivotal. */
170 
171  /* workspace, not defined on input or output */
172  Int Stack [ ], /* size n */
173 
174  /* workspace, defined on input and output */
175  Int Flag [ ], /* size n. Initially, all of Flag [0..n-1] < k. After
176  * lsolve_symbolic is done, Flag [i] == k if i is in
177  * the pattern of the output, and Flag [0..n-1] <= k. */
178 
179  /* other */
180  Int Lpend [ ], /* for symmetric pruning */
181  Int Ap_pos [ ], /* workspace used in dfs */
182 
183  Unit LU [ ], /* LU factors (pattern and values) */
184  Int lup, /* pointer to free space in LU */
185  Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
186  Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
187 
188  /* ---- the following are only used in the BTF case --- */
189 
190  Int k1, /* the block of A is from k1 to k2-1 */
191  Int PSinv [ ] /* inverse of P from symbolic factorization */
192 )
193 {
194  Int *Lik ;
195  Int i, p, pend, oldcol, kglobal, top, l_length ;
196 
197  top = n ;
198  l_length = 0 ;
199  Lik = (Int *) (LU + lup);
200 
201  /* ---------------------------------------------------------------------- */
202  /* BTF factorization of A (k1:k2-1, k1:k2-1) */
203  /* ---------------------------------------------------------------------- */
204 
205  kglobal = k + k1 ; /* column k of the block is col kglobal of A */
206  oldcol = Q [kglobal] ; /* Q must be present for BTF case */
207  pend = Ap [oldcol+1] ;
208  for (p = Ap [oldcol] ; p < pend ; p++)
209  {
210  i = PSinv [Ai [p]] - k1 ;
211  if (i < 0) continue ; /* skip entry outside the block */
212 
213  /* (i,k) is an entry in the block. start a DFS at node i */
214  PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
215  if (Flag [i] != k)
216  {
217  if (Pinv [i] >= 0)
218  {
219  top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
220  Lpend, top, LU, Lik, &l_length, Ap_pos) ;
221  }
222  else
223  {
224  /* i is not pivotal, and not flagged. Flag and put in L */
225  Flag [i] = k ;
226  Lik [l_length] = i ;
227  l_length++;
228  }
229  }
230  }
231 
232  /* If Llen [k] is zero, the matrix is structurally singular */
233  Llen [k] = l_length ;
234  return (top) ;
235 }
236 
237 
238 /* ========================================================================== */
239 /* === construct_column ===================================================== */
240 /* ========================================================================== */
241 
242 /* Construct the kth column of A, and the off-diagonal part, if requested.
243  * Scatter the numerical values into the workspace X, and construct the
244  * corresponding column of the off-diagonal matrix. */
245 
246 template <typename Entry, typename Int>
247 static void construct_column
248 (
249  /* inputs, not modified on output */
250  Int k, /* the column of A (or the column of the block) to get */
251  Int Ap [ ],
252  Int Ai [ ],
253  Entry Ax [ ],
254  Int Q [ ], /* column pre-ordering */
255 
256  /* zero on input, modified on output */
257  Entry X [ ],
258 
259  /* ---- the following are only used in the BTF case --- */
260 
261  /* inputs, not modified on output */
262  Int k1, /* the block of A is from k1 to k2-1 */
263  Int PSinv [ ], /* inverse of P from symbolic factorization */
264  double Rs [ ], /* scale factors for A */
265  Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */
266 
267  /* inputs, modified on output */
268  Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
269  Int Offi [ ],
270  Entry Offx [ ]
271 )
272 {
273  Entry aik ;
274  Int i, p, pend, oldcol, kglobal, poff, oldrow ;
275 
276  /* ---------------------------------------------------------------------- */
277  /* Scale and scatter the column into X. */
278  /* ---------------------------------------------------------------------- */
279 
280  kglobal = k + k1 ; /* column k of the block is col kglobal of A */
281  poff = Offp [kglobal] ; /* start of off-diagonal column */
282  oldcol = Q [kglobal] ;
283  pend = Ap [oldcol+1] ;
284 
285  if (scale <= 0)
286  {
287  /* no scaling */
288  for (p = Ap [oldcol] ; p < pend ; p++)
289  {
290  oldrow = Ai [p] ;
291  i = PSinv [oldrow] - k1 ;
292  aik = Ax [p] ;
293  if (i < 0)
294  {
295  /* this is an entry in the off-diagonal part */
296  Offi [poff] = oldrow ;
297  Offx [poff] = aik ;
298  poff++ ;
299  }
300  else
301  {
302  /* (i,k) is an entry in the block. scatter into X */
303  X [i] = aik ;
304  }
305  }
306  }
307  else
308  {
309  /* row scaling */
310  for (p = Ap [oldcol] ; p < pend ; p++)
311  {
312  oldrow = Ai [p] ;
313  i = PSinv [oldrow] - k1 ;
314  aik = Ax [p] ;
315  SCALE_DIV (aik, Rs [oldrow]) ;
316  if (i < 0)
317  {
318  /* this is an entry in the off-diagonal part */
319  Offi [poff] = oldrow ;
320  Offx [poff] = aik ;
321  poff++ ;
322  }
323  else
324  {
325  /* (i,k) is an entry in the block. scatter into X */
326  X [i] = aik ;
327  }
328  }
329  }
330 
331  Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */
332 }
333 
334 
335 /* ========================================================================== */
336 /* === lsolve_numeric ======================================================= */
337 /* ========================================================================== */
338 
339 /* Computes the numerical values of x, for the solution of Lx=b. Note that x
340  * may include explicit zeros if numerical cancelation occurs. L is assumed
341  * to be unit-diagonal, with possibly unsorted columns (but the first entry in
342  * the column must always be the diagonal entry). */
343 
344 template <typename Entry, typename Int>
345 static void lsolve_numeric
346 (
347  /* input, not modified on output: */
348  Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i
349  * is not yet pivotal. */
350  Unit *LU, /* LU factors (pattern and values) */
351  Int Stack [ ], /* stack for dfs */
352  Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
353  Int top, /* top of stack on input */
354  Int n, /* A is n-by-n */
355  Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
356 
357  /* output, must be zero on input: */
358  Entry X [ ] /* size n, initially zero. On output,
359  * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]]
360  * contains the solution. */
361 
362 )
363 {
364  Entry xj ;
365  Entry *Lx ;
366  Int *Li ;
367  Int p, s, j, jnew, len ;
368 
369  /* solve Lx=b */
370  for (s = top ; s < n ; s++)
371  {
372  /* forward solve with column j of L */
373  j = Stack [s] ;
374  jnew = Pinv [j] ;
375  ASSERT (jnew >= 0) ;
376  xj = X [j] ;
377  GET_POINTER (LU, Lip, Llen, Li, Lx, jnew, len) ;
378  ASSERT (Lip [jnew] <= Lip [jnew+1]) ;
379  for (p = 0 ; p < len ; p++)
380  {
381  /*X [Li [p]] -= Lx [p] * xj ; */
382  MULT_SUB (X [Li [p]], Lx [p], xj) ;
383  }
384  }
385 }
386 
387 
388 /* ========================================================================== */
389 /* === lpivot =============================================================== */
390 /* ========================================================================== */
391 
392 /* Find a pivot via partial pivoting, and scale the column of L. */
393 
394 template <typename Entry, typename Int>
395 static Int lpivot
396 (
397  Int diagrow,
398  Int *p_pivrow,
399  Entry *p_pivot,
400  double *p_abs_pivot,
401  double tol,
402  Entry X [ ],
403  Unit *LU, /* LU factors (pattern and values) */
404  Int Lip [ ],
405  Int Llen [ ],
406  Int k,
407  Int n,
408 
409  Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or EMPTY if
410  * row i is not yet pivotal. */
411 
412  Int *p_firstrow,
413  KLU_common<Entry, Int> *Common
414 )
415 {
416  Entry x, pivot, *Lx ;
417  double abs_pivot, xabs ;
418  Int p, i, ppivrow, pdiag, pivrow, *Li, last_row_index, firstrow, len ;
419 
420  pivrow = EMPTY ;
421  if (Llen [k] == 0)
422  {
423  /* matrix is structurally singular */
424  if (Common->halt_if_singular)
425  {
426  return (FALSE) ;
427  }
428  for (firstrow = *p_firstrow ; firstrow < n ; firstrow++)
429  {
430  PRINTF (("check %d\n", firstrow)) ;
431  if (Pinv [firstrow] < 0)
432  {
433  /* found the lowest-numbered non-pivotal row. Pick it. */
434  pivrow = firstrow ;
435  PRINTF (("Got pivotal row: %d\n", pivrow)) ;
436  break ;
437  }
438  }
439  ASSERT (pivrow >= 0 && pivrow < n) ;
440  CLEAR (pivot) ;
441  *p_pivrow = pivrow ;
442  *p_pivot = pivot ;
443  *p_abs_pivot = 0 ;
444  *p_firstrow = firstrow ;
445  return (FALSE) ;
446  }
447 
448  pdiag = EMPTY ;
449  ppivrow = EMPTY ;
450  abs_pivot = EMPTY ;
451  i = Llen [k] - 1 ;
452  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
453  last_row_index = Li [i] ;
454 
455  /* decrement the length by 1 */
456  Llen [k] = i ;
457  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
458 
459  /* look in Li [0 ..Llen [k] - 1 ] for a pivot row */
460  for (p = 0 ; p < len ; p++)
461  {
462  /* gather the entry from X and store in L */
463  i = Li [p] ;
464  x = X [i] ;
465  CLEAR (X [i]) ;
466 
467  Lx [p] = x ;
468  /* xabs = ABS (x) ; */
469  KLU2_ABS (xabs, x) ;
470 
471  /* find the diagonal */
472  if (i == diagrow)
473  {
474  pdiag = p ;
475  }
476 
477  /* find the partial-pivoting choice */
478  if (xabs > abs_pivot)
479  {
480  abs_pivot = xabs ;
481  ppivrow = p ;
482  }
483  }
484 
485  /* xabs = ABS (X [last_row_index]) ;*/
486  KLU2_ABS (xabs, X [last_row_index]) ;
487  if (xabs > abs_pivot)
488  {
489  abs_pivot = xabs ;
490  ppivrow = EMPTY ;
491  }
492 
493  /* compare the diagonal with the largest entry */
494  if (last_row_index == diagrow)
495  {
496  if (xabs >= tol * abs_pivot)
497  {
498  abs_pivot = xabs ;
499  ppivrow = EMPTY ;
500  }
501  }
502  else if (pdiag != EMPTY)
503  {
504  /* xabs = ABS (Lx [pdiag]) ;*/
505  KLU2_ABS (xabs, Lx [pdiag]) ;
506  if (xabs >= tol * abs_pivot)
507  {
508  /* the diagonal is large enough */
509  abs_pivot = xabs ;
510  ppivrow = pdiag ;
511  }
512  }
513 
514  if (ppivrow != EMPTY)
515  {
516  pivrow = Li [ppivrow] ;
517  pivot = Lx [ppivrow] ;
518  /* overwrite the ppivrow values with last index values */
519  Li [ppivrow] = last_row_index ;
520  Lx [ppivrow] = X [last_row_index] ;
521  }
522  else
523  {
524  pivrow = last_row_index ;
525  pivot = X [last_row_index] ;
526  }
527  CLEAR (X [last_row_index]) ;
528 
529  *p_pivrow = pivrow ;
530  *p_pivot = pivot ;
531  *p_abs_pivot = abs_pivot ;
532  ASSERT (pivrow >= 0 && pivrow < n) ;
533 
534  if (IS_ZERO (pivot) && Common->halt_if_singular)
535  {
536  /* numerically singular case */
537  return (FALSE) ;
538  }
539 
540  /* divide L by the pivot value */
541  for (p = 0 ; p < Llen [k] ; p++)
542  {
543  /* Lx [p] /= pivot ; */
544  DIV (Lx [p], Lx [p], pivot) ;
545  }
546 
547  return (TRUE) ;
548 }
549 
550 
551 /* ========================================================================== */
552 /* === prune ================================================================ */
553 /* ========================================================================== */
554 
555 /* Prune the columns of L to reduce work in subsequent depth-first searches */
556 template <typename Entry, typename Int>
557 static void prune
558 (
559  /* input/output: */
560  Int Lpend [ ], /* Lpend [j] marks symmetric pruning point for L(:,j) */
561 
562  /* input: */
563  Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or EMPTY if
564  * row i is not yet pivotal. */
565  Int k, /* prune using column k of U */
566  Int pivrow, /* current pivot row */
567 
568  /* input/output: */
569  Unit *LU, /* LU factors (pattern and values) */
570 
571  /* input */
572  Int Uip [ ], /* size n, column pointers for U */
573  Int Lip [ ], /* size n, column pointers for L */
574  Int Ulen [ ], /* size n, column length of U */
575  Int Llen [ ] /* size n, column length of L */
576 )
577 {
578  Entry x ;
579  Entry *Lx, *Ux ;
580  Int *Li, *Ui ;
581  Int p, i, j, p2, phead, ptail, llen, ulen ;
582 
583  /* check to see if any column of L can be pruned */
584  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
585 
586  // Try not to warn about Ux never being used
587  (void) Ux;
588  for (p = 0 ; p < ulen ; p++)
589  {
590  j = Ui [p] ;
591  ASSERT (j < k) ;
592  PRINTF (("%d is pruned: %d. Lpend[j] %d Lip[j+1] %d\n",
593  j, Lpend [j] != EMPTY, Lpend [j], Lip [j+1])) ;
594  if (Lpend [j] == EMPTY)
595  {
596  /* scan column j of L for the pivot row */
597  GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
598  for (p2 = 0 ; p2 < llen ; p2++)
599  {
600  if (pivrow == Li [p2])
601  {
602  /* found it! This column can be pruned */
603 #ifndef NDEBUGKLU2
604  PRINTF (("==== PRUNE: col j %d of L\n", j)) ;
605  {
606  Int p3 ;
607  for (p3 = 0 ; p3 < Llen [j] ; p3++)
608  {
609  PRINTF (("before: %i pivotal: %d\n", Li [p3],
610  Pinv [Li [p3]] >= 0)) ;
611  }
612  }
613 #endif
614 
615  /* partition column j of L. The unit diagonal of L
616  * is not stored in the column of L. */
617  phead = 0 ;
618  ptail = Llen [j] ;
619  while (phead < ptail)
620  {
621  i = Li [phead] ;
622  if (Pinv [i] >= 0)
623  {
624  /* leave at the head */
625  phead++ ;
626  }
627  else
628  {
629  /* swap with the tail */
630  ptail-- ;
631  Li [phead] = Li [ptail] ;
632  Li [ptail] = i ;
633  x = Lx [phead] ;
634  Lx [phead] = Lx [ptail] ;
635  Lx [ptail] = x ;
636  }
637  }
638 
639  /* set Lpend to one past the last entry in the
640  * first part of the column of L. Entries in
641  * Li [0 ... Lpend [j]-1] are the only part of
642  * column j of L that needs to be scanned in the DFS.
643  * Lpend [j] was EMPTY; setting it >= 0 also flags
644  * column j as pruned. */
645  Lpend [j] = ptail ;
646 
647 #ifndef NDEBUGKLU2
648  {
649  Int p3 ;
650  for (p3 = 0 ; p3 < Llen [j] ; p3++)
651  {
652  if (p3 == Lpend [j]) PRINTF (("----\n")) ;
653  PRINTF (("after: %i pivotal: %d\n", Li [p3],
654  Pinv [Li [p3]] >= 0)) ;
655  }
656  }
657 #endif
658 
659  break ;
660  }
661  }
662  }
663  }
664 }
665 
666 
667 /* ========================================================================== */
668 /* === KLU_kernel =========================================================== */
669 /* ========================================================================== */
670 
671 template <typename Entry, typename Int>
672 size_t KLU_kernel /* final size of LU on output */
673 (
674  /* input, not modified */
675  Int n, /* A is n-by-n */
676  Int Ap [ ], /* size n+1, column pointers for A */
677  Int Ai [ ], /* size nz = Ap [n], row indices for A */
678  Entry Ax [ ], /* size nz, values of A */
679  Int Q [ ], /* size n, optional input permutation */
680  size_t lusize, /* initial size of LU on input */
681 
682  /* output, not defined on input */
683  Int Pinv [ ], /* size n, inverse row permutation, where Pinv [i] = k if
684  * row i is the kth pivot row */
685  Int P [ ], /* size n, row permutation, where P [k] = i if row i is the
686  * kth pivot row. */
687  Unit **p_LU, /* LU array, size lusize on input */
688  Entry Udiag [ ], /* size n, diagonal of U */
689  Int Llen [ ], /* size n, column length of L */
690  Int Ulen [ ], /* size n, column length of U */
691  Int Lip [ ], /* size n, column pointers for L */
692  Int Uip [ ], /* size n, column pointers for U */
693  Int *lnz, /* size of L*/
694  Int *unz, /* size of U*/
695  /* workspace, not defined on input */
696  Entry X [ ], /* size n, undefined on input, zero on output */
697 
698  /* workspace, not defined on input or output */
699  Int Stack [ ], /* size n */
700  Int Flag [ ], /* size n */
701  Int Ap_pos [ ], /* size n */
702 
703  /* other workspace: */
704  Int Lpend [ ], /* size n workspace, for pruning only */
705 
706  /* inputs, not modified on output */
707  Int k1, /* the block of A is from k1 to k2-1 */
708  Int PSinv [ ], /* inverse of P from symbolic factorization */
709  double Rs [ ], /* scale factors for A */
710 
711  /* inputs, modified on output */
712  Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
713  Int Offi [ ],
714  Entry Offx [ ],
715  /* --------------- */
716  KLU_common<Entry, Int> *Common
717 )
718 {
719  Entry pivot ;
720  double abs_pivot, xsize, nunits, tol, memgrow ;
721  Entry *Ux ;
722  Int *Li, *Ui ;
723  Unit *LU ; /* LU factors (pattern and values) */
724  Int k, p, i, j, pivrow = 0, kbar, diagrow, firstrow, lup, top, scale, len ;
725  size_t newlusize ;
726 
727 #ifndef NDEBUGKLU2
728  Entry *Lx ;
729 #endif
730 
731  ASSERT (Common != NULL) ;
732  scale = Common->scale ;
733  tol = Common->tol ;
734  memgrow = Common->memgrow ;
735  *lnz = 0 ;
736  *unz = 0 ;
737  CLEAR (pivot) ;
738 
739  /* ---------------------------------------------------------------------- */
740  /* get initial Li, Lx, Ui, and Ux */
741  /* ---------------------------------------------------------------------- */
742 
743  PRINTF (("input: lusize %d \n", lusize)) ;
744  ASSERT (lusize > 0) ;
745  LU = *p_LU ;
746 
747  /* ---------------------------------------------------------------------- */
748  /* initializations */
749  /* ---------------------------------------------------------------------- */
750 
751  firstrow = 0 ;
752  lup = 0 ;
753 
754  for (k = 0 ; k < n ; k++)
755  {
756  /* X [k] = 0 ; */
757  CLEAR (X [k]) ;
758  Flag [k] = EMPTY ;
759  Lpend [k] = EMPTY ; /* flag k as not pruned */
760  }
761 
762  /* ---------------------------------------------------------------------- */
763  /* mark all rows as non-pivotal and determine initial diagonal mapping */
764  /* ---------------------------------------------------------------------- */
765 
766  /* PSinv does the symmetric permutation, so don't do it here */
767  for (k = 0 ; k < n ; k++)
768  {
769  P [k] = k ;
770  Pinv [k] = FLIP (k) ; /* mark all rows as non-pivotal */
771  }
772  /* initialize the construction of the off-diagonal matrix */
773  Offp [0] = 0 ;
774 
775  /* P [k] = row means that UNFLIP (Pinv [row]) = k, and visa versa.
776  * If row is pivotal, then Pinv [row] >= 0. A row is initially "flipped"
777  * (Pinv [k] < EMPTY), and then marked "unflipped" when it becomes
778  * pivotal. */
779 
780 #ifndef NDEBUGKLU2
781  for (k = 0 ; k < n ; k++)
782  {
783  PRINTF (("Initial P [%d] = %d\n", k, P [k])) ;
784  }
785 #endif
786 
787  /* ---------------------------------------------------------------------- */
788  /* factorize */
789  /* ---------------------------------------------------------------------- */
790 
791  for (k = 0 ; k < n ; k++)
792  {
793 
794  PRINTF (("\n\n==================================== k: %d\n", k)) ;
795 
796  /* ------------------------------------------------------------------ */
797  /* determine if LU factors have grown too big */
798  /* ------------------------------------------------------------------ */
799 
800  /* (n - k) entries for L and k entries for U */
801  nunits = DUNITS (Int, n - k) + DUNITS (Int, k) +
802  DUNITS (Entry, n - k) + DUNITS (Entry, k) ;
803 
804  /* LU can grow by at most 'nunits' entries if the column is dense */
805  PRINTF (("lup %d lusize %g lup+nunits: %g\n", lup, (double) lusize,
806  lup+nunits));
807  xsize = ((double) lup) + nunits ;
808  if (xsize > (double) lusize)
809  {
810  /* check here how much to grow */
811  xsize = (memgrow * ((double) lusize) + 4*n + 1) ;
812  if (INT_OVERFLOW (xsize))
813  {
814  PRINTF (("Matrix is too large (Int overflow)\n")) ;
815  Common->status = KLU_TOO_LARGE ;
816  return (lusize) ;
817  }
818  newlusize = (size_t) (memgrow * lusize + 2*n + 1) ;
819  /* Future work: retry mechanism in case of malloc failure */
820  LU = (Unit *) KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
821  Common->nrealloc++ ;
822  *p_LU = LU ;
823  if (Common->status == KLU_OUT_OF_MEMORY)
824  {
825  PRINTF (("Matrix is too large (LU)\n")) ;
826  return (lusize) ;
827  }
828  lusize = newlusize ;
829  PRINTF (("inc LU to %d done\n", lusize)) ;
830  }
831 
832  /* ------------------------------------------------------------------ */
833  /* start the kth column of L and U */
834  /* ------------------------------------------------------------------ */
835 
836  Lip [k] = lup ;
837 
838  /* ------------------------------------------------------------------ */
839  /* compute the nonzero pattern of the kth column of L and U */
840  /* ------------------------------------------------------------------ */
841 
842 #ifndef NDEBUGKLU2
843  for (i = 0 ; i < n ; i++)
844  {
845  ASSERT (Flag [i] < k) ;
846  /* ASSERT (X [i] == 0) ; */
847  ASSERT (IS_ZERO (X [i])) ;
848  }
849 #endif
850 
851  top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag,
852  Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ;
853 
854 #ifndef NDEBUGKLU2
855  PRINTF (("--- in U:\n")) ;
856  for (p = top ; p < n ; p++)
857  {
858  PRINTF (("pattern of X for U: %d : %d pivot row: %d\n",
859  p, Stack [p], Pinv [Stack [p]])) ;
860  ASSERT (Flag [Stack [p]] == k) ;
861  }
862  PRINTF (("--- in L:\n")) ;
863  Li = (Int *) (LU + Lip [k]);
864  for (p = 0 ; p < Llen [k] ; p++)
865  {
866  PRINTF (("pattern of X in L: %d : %d pivot row: %d\n",
867  p, Li [p], Pinv [Li [p]])) ;
868  ASSERT (Flag [Li [p]] == k) ;
869  }
870  p = 0 ;
871  for (i = 0 ; i < n ; i++)
872  {
873  ASSERT (Flag [i] <= k) ;
874  if (Flag [i] == k) p++ ;
875  }
876 #endif
877 
878  /* ------------------------------------------------------------------ */
879  /* get the column of the matrix to factorize and scatter into X */
880  /* ------------------------------------------------------------------ */
881 
882  construct_column <Entry> (k, Ap, Ai, Ax, Q, X,
883  k1, PSinv, Rs, scale, Offp, Offi, Offx) ;
884 
885  /* ------------------------------------------------------------------ */
886  /* compute the numerical values of the kth column (s = L \ A (:,k)) */
887  /* ------------------------------------------------------------------ */
888 
889  lsolve_numeric <Entry> (Pinv, LU, Stack, Lip, top, n, Llen, X) ;
890 
891 #ifndef NDEBUGKLU2
892  for (p = top ; p < n ; p++)
893  {
894  PRINTF (("X for U %d : ", Stack [p])) ;
895  PRINT_ENTRY (X [Stack [p]]) ;
896  }
897  Li = (Int *) (LU + Lip [k]) ;
898  for (p = 0 ; p < Llen [k] ; p++)
899  {
900  PRINTF (("X for L %d : ", Li [p])) ;
901  PRINT_ENTRY (X [Li [p]]) ;
902  }
903 #endif
904 
905  /* ------------------------------------------------------------------ */
906  /* partial pivoting with diagonal preference */
907  /* ------------------------------------------------------------------ */
908 
909  /* determine what the "diagonal" is */
910  diagrow = P [k] ; /* might already be pivotal */
911  PRINTF (("k %d, diagrow = %d, UNFLIP (diagrow) = %d\n",
912  k, diagrow, UNFLIP (diagrow))) ;
913 
914  /* find a pivot and scale the pivot column */
915  if (!lpivot <Entry> (diagrow, &pivrow, &pivot, &abs_pivot, tol, X, LU, Lip,
916  Llen, k, n, Pinv, &firstrow, Common))
917  {
918  /* matrix is structurally or numerically singular */
919  Common->status = KLU_SINGULAR ;
920  if (Common->numerical_rank == EMPTY)
921  {
922  Common->numerical_rank = k+k1 ;
923  Common->singular_col = Q [k+k1] ;
924  }
925  if (Common->halt_if_singular)
926  {
927  /* do not continue the factorization */
928  return (lusize) ;
929  }
930  }
931 
932  /* we now have a valid pivot row, even if the column has NaN's or
933  * has no entries on or below the diagonal at all. */
934  PRINTF (("\nk %d : Pivot row %d : ", k, pivrow)) ;
935  PRINT_ENTRY (pivot) ;
936  ASSERT (pivrow >= 0 && pivrow < n) ;
937  ASSERT (Pinv [pivrow] < 0) ;
938 
939  /* set the Uip pointer */
940  Uip [k] = Lip [k] + UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
941 
942  /* move the lup pointer to the position where indices of U
943  * should be stored */
944  lup += UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
945 
946  Ulen [k] = n - top ;
947 
948  /* extract Stack [top..n-1] to Ui and the values to Ux and clear X */
949  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
950  for (p = top, i = 0 ; p < n ; p++, i++)
951  {
952  j = Stack [p] ;
953  Ui [i] = Pinv [j] ;
954  Ux [i] = X [j] ;
955  CLEAR (X [j]) ;
956  }
957 
958  /* position the lu index at the starting point for next column */
959  lup += UNITS (Int, Ulen [k]) + UNITS (Entry, Ulen [k]) ;
960 
961  /* U(k,k) = pivot */
962  Udiag [k] = pivot ;
963 
964  /* ------------------------------------------------------------------ */
965  /* log the pivot permutation */
966  /* ------------------------------------------------------------------ */
967 
968  ASSERT (UNFLIP (Pinv [diagrow]) < n) ;
969  ASSERT (P [UNFLIP (Pinv [diagrow])] == diagrow) ;
970 
971  if (pivrow != diagrow)
972  {
973  /* an off-diagonal pivot has been chosen */
974  Common->noffdiag++ ;
975  PRINTF ((">>>>>>>>>>>>>>>>> pivrow %d k %d off-diagonal\n",
976  pivrow, k)) ;
977  if (Pinv [diagrow] < 0)
978  {
979  /* the former diagonal row index, diagrow, has not yet been
980  * chosen as a pivot row. Log this diagrow as the "diagonal"
981  * entry in the column kbar for which the chosen pivot row,
982  * pivrow, was originally logged as the "diagonal" */
983  kbar = FLIP (Pinv [pivrow]) ;
984  P [kbar] = diagrow ;
985  Pinv [diagrow] = FLIP (kbar) ;
986  }
987  }
988  P [k] = pivrow ;
989  Pinv [pivrow] = k ;
990 
991 #ifndef NDEBUGKLU2
992  for (i = 0 ; i < n ; i++) { ASSERT (IS_ZERO (X [i])) ;}
993  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
994  for (p = 0 ; p < len ; p++)
995  {
996  PRINTF (("Column %d of U: %d : ", k, Ui [p])) ;
997  PRINT_ENTRY (Ux [p]) ;
998  }
999  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
1000  for (p = 0 ; p < len ; p++)
1001  {
1002  PRINTF (("Column %d of L: %d : ", k, Li [p])) ;
1003  PRINT_ENTRY (Lx [p]) ;
1004  }
1005 #endif
1006 
1007  /* ------------------------------------------------------------------ */
1008  /* symmetric pruning */
1009  /* ------------------------------------------------------------------ */
1010 
1011  prune<Entry> (Lpend, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ;
1012 
1013  *lnz += Llen [k] + 1 ; /* 1 added to lnz for diagonal */
1014  *unz += Ulen [k] + 1 ; /* 1 added to unz for diagonal */
1015  }
1016 
1017  /* ---------------------------------------------------------------------- */
1018  /* finalize column pointers for L and U, and put L in the pivotal order */
1019  /* ---------------------------------------------------------------------- */
1020 
1021  for (p = 0 ; p < n ; p++)
1022  {
1023  Li = (Int *) (LU + Lip [p]) ;
1024  for (i = 0 ; i < Llen [p] ; i++)
1025  {
1026  Li [i] = Pinv [Li [i]] ;
1027  }
1028  }
1029 
1030 #ifndef NDEBUGKLU2
1031  for (i = 0 ; i < n ; i++)
1032  {
1033  PRINTF (("P [%d] = %d Pinv [%d] = %d\n", i, P [i], i, Pinv [i])) ;
1034  }
1035  for (i = 0 ; i < n ; i++)
1036  {
1037  ASSERT (Pinv [i] >= 0 && Pinv [i] < n) ;
1038  ASSERT (P [i] >= 0 && P [i] < n) ;
1039  ASSERT (P [Pinv [i]] == i) ;
1040  ASSERT (IS_ZERO (X [i])) ;
1041  }
1042 #endif
1043 
1044  /* ---------------------------------------------------------------------- */
1045  /* shrink the LU factors to just the required size */
1046  /* ---------------------------------------------------------------------- */
1047 
1048  newlusize = lup ;
1049  ASSERT ((size_t) newlusize <= lusize) ;
1050 
1051  /* this cannot fail, since the block is descreasing in size */
1052  LU = (Unit *) KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
1053  *p_LU = LU ;
1054  return (newlusize) ;
1055 }
1056 
1057 #endif
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.