Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2.hpp
1 /* ========================================================================== */
2 /* === klu ================================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // *****************************************************************************
6 // KLU2: A Direct Linear Solver package
7 //
8 // Copyright 2011 NTESS and the KLU2 contributors.
9 // SPDX-License-Identifier: LGPL-2.1-or-later
10 // *****************************************************************************
11 // @HEADER
12 
13 /* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
14  * optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
15  * Davis. This algorithm is what appears as the default sparse LU routine in
16  * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
17  * Note that no column ordering is provided (see COLAMD or AMD for suitable
18  * orderings). SuperLU is based on this algorithm, except that it adds the
19  * use of dense matrix operations on "supernodes" (adjacent columns with
20  * identical). This code doesn't use supernodes, thus its name ("Kent" LU,
21  * as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
22  * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
23  * factors (such as for most finite-element problems). However, for matrices
24  * with very sparse LU factors, this algorithm is typically faster than both
25  * SuperLU and UMFPACK, since in this case there is little chance to exploit
26  * dense matrix kernels (the BLAS).
27  *
28  * Only one block of A is factorized, in the BTF form. The input n is the
29  * size of the block; k1 is the first row and column in the block.
30  *
31  * NOTE: no error checking is done on the inputs. This version is not meant to
32  * be called directly by the user. Use klu_factor instead.
33  *
34  * No fill-reducing ordering is provided. The ordering quality of
35  * klu_kernel_factor is the responsibility of the caller. The input A must
36  * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
37  * be provided.
38  *
39  * The input matrix A must be in compressed-column form, with either sorted
40  * or unsorted row indices. Row indices for column j of A is in
41  * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
42  * numerical values. No duplicate entries are allowed.
43  *
44  * Copyright 2004-2009, Tim Davis. All rights reserved. See the README
45  * file for details on permitted use. Note that no code from The MathWorks,
46  * Inc, or from SuperLU, or from any other source appears here. The code is
47  * written from scratch, from the algorithmic description in Gilbert & Peierls'
48  * and Eisenstat & Liu's journal papers [1,2].
49  *
50  * If an input permutation Q is provided, the factorization L*U = A (P,Q)
51  * is computed, where P is determined by partial pivoting, and Q is the input
52  * ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
53  * KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
54  * input permutation is applied symmetrically to the input matrix. The output
55  * permutation P includes both the partial pivoting ordering and the input
56  * permutation. If Q is NULL, then it is assumed to be the identity
57  * permutation. Q is not modified.
58  *
59  * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
60  * Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
61  * vol 9, pp. 862-874, 1988.
62  * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
63  * Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
64  * Applic., vol 13, pp. 202-211, 1992.
65  */
66 
67 /* ========================================================================== */
68 
69 #ifndef KLU2_HPP
70 #define KLU2_HPP
71 
72 #include "klu2_internal.h"
73 #include "klu2_kernel.hpp"
74 #include "klu2_memory.hpp"
75 
76 template <typename Entry, typename Int>
77 size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
78 (
79  /* inputs, not modified */
80  Int n, /* A is n-by-n. n must be > 0. */
81  Int Ap [ ], /* size n+1, column pointers for A */
82  Int Ai [ ], /* size nz = Ap [n], row indices for A */
83  Entry Ax [ ], /* size nz, values of A */
84  Int Q [ ], /* size n, optional column permutation */
85  double Lsize, /* estimate of number of nonzeros in L */
86 
87  /* outputs, not defined on input */
88  Unit **p_LU, /* row indices and values of L and U */
89  Entry Udiag [ ], /* size n, diagonal of U */
90  Int Llen [ ], /* size n, column length of L */
91  Int Ulen [ ], /* size n, column length of U */
92  Int Lip [ ], /* size n, column pointers for L */
93  Int Uip [ ], /* size n, column pointers for U */
94  Int P [ ], /* row permutation, size n */
95  Int *lnz, /* size of L */
96  Int *unz, /* size of U */
97 
98  /* workspace, undefined on input */
99  Entry *X, /* size n double's, zero on output */
100  Int *Work, /* size 5n Int's */
101 
102  /* inputs, not modified on output */
103  Int k1, /* the block of A is from k1 to k2-1 */
104  Int PSinv [ ], /* inverse of P from symbolic factorization */
105  double Rs [ ], /* scale factors for A */
106 
107  /* inputs, modified on output */
108  Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
109  Int Offi [ ],
110  Entry Offx [ ],
111  /* --------------- */
112  KLU_common<Entry, Int> *Common
113 )
114 {
115  double maxlnz, dunits ;
116  Unit *LU ;
117  Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
118  Int lsize, usize, anz, ok ;
119  size_t lusize ;
120  ASSERT (Common != NULL) ;
121 
122  /* ---------------------------------------------------------------------- */
123  /* get control parameters, or use defaults */
124  /* ---------------------------------------------------------------------- */
125 
126  n = MAX (1, n) ;
127  anz = Ap [n+k1] - Ap [k1] ;
128 
129  if (Lsize <= 0)
130  {
131  Lsize = -Lsize ;
132  Lsize = MAX (Lsize, 1.0) ;
133  lsize = (Int) Lsize * anz + n ;
134  }
135  else
136  {
137  lsize = (Int) Lsize ;
138  }
139 
140  usize = lsize ;
141 
142  lsize = MAX (n+1, lsize) ;
143  usize = MAX (n+1, usize) ;
144 
145  maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
146  maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
147  lsize = (Int) MIN (maxlnz, lsize) ;
148  usize = (Int) MIN (maxlnz, usize) ;
149 
150  PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
151  n, anz, k1, lsize, usize, maxlnz)) ;
152 
153  /* ---------------------------------------------------------------------- */
154  /* allocate workspace and outputs */
155  /* ---------------------------------------------------------------------- */
156 
157  /* return arguments are not yet assigned */
158  *p_LU = (Unit *) NULL ;
159 
160  /* these computations are safe from size_t overflow */
161  W = Work ;
162  Pinv = (Int *) W ; W += n ;
163  Stack = (Int *) W ; W += n ;
164  Flag = (Int *) W ; W += n ;
165  Lpend = (Int *) W ; W += n ;
166  Ap_pos = (Int *) W ; W += n ;
167 
168  dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
169  DUNITS (Int, usize) + DUNITS (Entry, usize) ;
170  lusize = (size_t) dunits ;
171  ok = !INT_OVERFLOW (dunits) ;
172  LU = (Unit *) (ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL) ;
173  if (LU == NULL)
174  {
175  /* out of memory, or problem too large */
176  Common->status = KLU_OUT_OF_MEMORY ;
177  lusize = 0 ;
178  return (lusize) ;
179  }
180 
181  /* ---------------------------------------------------------------------- */
182  /* factorize */
183  /* ---------------------------------------------------------------------- */
184 
185  /* with pruning, and non-recursive depth-first-search */
186  lusize = KLU_kernel<Entry> (n, Ap, Ai, Ax, Q, lusize,
187  Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
188  X, Stack, Flag, Ap_pos, Lpend,
189  k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
190 
191  /* ---------------------------------------------------------------------- */
192  /* return LU factors, or return nothing if an error occurred */
193  /* ---------------------------------------------------------------------- */
194 
195  if (Common->status < KLU_OK)
196  {
197  LU = (Unit *) KLU_free (LU, lusize, sizeof (Unit), Common) ;
198  lusize = 0 ;
199  }
200  *p_LU = LU ;
201  PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
202  return (lusize) ;
203 }
204 
205 /* ========================================================================== */
206 /* === KLU_lsolve =========================================================== */
207 /* ========================================================================== */
208 
209 /* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
210  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
211  * and is stored in ROW form with row dimension nrhs. nrhs must be in the
212  * range 1 to 4. */
213 
214 template <typename Entry, typename Int>
215 void KLU_lsolve
216 (
217  /* inputs, not modified: */
218  const Int n,
219  const Int *Lip,
220  const Int *Llen,
221  Unit *LU,
222  const Int nrhs,
223  /* right-hand-side on input, solution to Lx=b on output */
224  Entry *X
225 )
226 {
227  Entry x [4], lik ;
228  Int *Li ;
229  Entry *Lx ;
230  Int k, p, len, i ;
231 
232  switch (nrhs)
233  {
234 
235  case 1:
236  for (k = 0 ; k < n ; k++)
237  {
238  x [0] = X [k] ;
239  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
240  /* unit diagonal of L is not stored*/
241  for (p = 0 ; p < len ; p++)
242  {
243  /* X [Li [p]] -= Lx [p] * x [0] ; */
244  MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
245  }
246  }
247  break ;
248 
249  case 2:
250 
251  for (k = 0 ; k < n ; k++)
252  {
253  x [0] = X [2*k ] ;
254  x [1] = X [2*k + 1] ;
255  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
256  for (p = 0 ; p < len ; p++)
257  {
258  i = Li [p] ;
259  lik = Lx [p] ;
260  MULT_SUB (X [2*i], lik, x [0]) ;
261  MULT_SUB (X [2*i + 1], lik, x [1]) ;
262  }
263  }
264  break ;
265 
266  case 3:
267 
268  for (k = 0 ; k < n ; k++)
269  {
270  x [0] = X [3*k ] ;
271  x [1] = X [3*k + 1] ;
272  x [2] = X [3*k + 2] ;
273  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
274  for (p = 0 ; p < len ; p++)
275  {
276  i = Li [p] ;
277  lik = Lx [p] ;
278  MULT_SUB (X [3*i], lik, x [0]) ;
279  MULT_SUB (X [3*i + 1], lik, x [1]) ;
280  MULT_SUB (X [3*i + 2], lik, x [2]) ;
281  }
282  }
283  break ;
284 
285  case 4:
286 
287  for (k = 0 ; k < n ; k++)
288  {
289  x [0] = X [4*k ] ;
290  x [1] = X [4*k + 1] ;
291  x [2] = X [4*k + 2] ;
292  x [3] = X [4*k + 3] ;
293  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
294  for (p = 0 ; p < len ; p++)
295  {
296  i = Li [p] ;
297  lik = Lx [p] ;
298  MULT_SUB (X [4*i], lik, x [0]) ;
299  MULT_SUB (X [4*i + 1], lik, x [1]) ;
300  MULT_SUB (X [4*i + 2], lik, x [2]) ;
301  MULT_SUB (X [4*i + 3], lik, x [3]) ;
302  }
303  }
304  break ;
305 
306  default:
307  throw std::domain_error("More than 4 right-hand sides is not supported.");
308 
309  }
310 }
311 
312 /* ========================================================================== */
313 /* === KLU_usolve =========================================================== */
314 /* ========================================================================== */
315 
316 /* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
317  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
318  * and is stored in ROW form with row dimension nrhs. nrhs must be in the
319  * range 1 to 4. */
320 
321 template <typename Entry, typename Int>
322 void KLU_usolve
323 (
324  /* inputs, not modified: */
325  Int n,
326  const Int Uip [ ],
327  const Int Ulen [ ],
328  Unit LU [ ],
329  Entry Udiag [ ],
330  Int nrhs,
331  /* right-hand-side on input, solution to Ux=b on output */
332  Entry X [ ]
333 )
334 {
335  Entry x [4], uik, ukk ;
336  Int *Ui ;
337  Entry *Ux ;
338  Int k, p, len, i ;
339 
340  switch (nrhs)
341  {
342 
343  case 1:
344 
345  for (k = n-1 ; k >= 0 ; k--)
346  {
347  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
348  /* x [0] = X [k] / Udiag [k] ; */
349  DIV (x [0], X [k], Udiag [k]) ;
350  X [k] = x [0] ;
351  for (p = 0 ; p < len ; p++)
352  {
353  /* X [Ui [p]] -= Ux [p] * x [0] ; */
354  MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
355 
356  }
357  }
358 
359  break ;
360 
361  case 2:
362 
363  for (k = n-1 ; k >= 0 ; k--)
364  {
365  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
366  ukk = Udiag [k] ;
367  /* x [0] = X [2*k ] / ukk ;
368  x [1] = X [2*k + 1] / ukk ; */
369  DIV (x [0], X [2*k], ukk) ;
370  DIV (x [1], X [2*k + 1], ukk) ;
371 
372  X [2*k ] = x [0] ;
373  X [2*k + 1] = x [1] ;
374  for (p = 0 ; p < len ; p++)
375  {
376  i = Ui [p] ;
377  uik = Ux [p] ;
378  /* X [2*i ] -= uik * x [0] ;
379  X [2*i + 1] -= uik * x [1] ; */
380  MULT_SUB (X [2*i], uik, x [0]) ;
381  MULT_SUB (X [2*i + 1], uik, x [1]) ;
382  }
383  }
384 
385  break ;
386 
387  case 3:
388 
389  for (k = n-1 ; k >= 0 ; k--)
390  {
391  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
392  ukk = Udiag [k] ;
393 
394  DIV (x [0], X [3*k], ukk) ;
395  DIV (x [1], X [3*k + 1], ukk) ;
396  DIV (x [2], X [3*k + 2], ukk) ;
397 
398  X [3*k ] = x [0] ;
399  X [3*k + 1] = x [1] ;
400  X [3*k + 2] = x [2] ;
401  for (p = 0 ; p < len ; p++)
402  {
403  i = Ui [p] ;
404  uik = Ux [p] ;
405  MULT_SUB (X [3*i], uik, x [0]) ;
406  MULT_SUB (X [3*i + 1], uik, x [1]) ;
407  MULT_SUB (X [3*i + 2], uik, x [2]) ;
408  }
409  }
410 
411  break ;
412 
413  case 4:
414 
415  for (k = n-1 ; k >= 0 ; k--)
416  {
417  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
418  ukk = Udiag [k] ;
419 
420  DIV (x [0], X [4*k], ukk) ;
421  DIV (x [1], X [4*k + 1], ukk) ;
422  DIV (x [2], X [4*k + 2], ukk) ;
423  DIV (x [3], X [4*k + 3], ukk) ;
424 
425  X [4*k ] = x [0] ;
426  X [4*k + 1] = x [1] ;
427  X [4*k + 2] = x [2] ;
428  X [4*k + 3] = x [3] ;
429  for (p = 0 ; p < len ; p++)
430  {
431  i = Ui [p] ;
432  uik = Ux [p] ;
433 
434  MULT_SUB (X [4*i], uik, x [0]) ;
435  MULT_SUB (X [4*i + 1], uik, x [1]) ;
436  MULT_SUB (X [4*i + 2], uik, x [2]) ;
437  MULT_SUB (X [4*i + 3], uik, x [3]) ;
438  }
439  }
440 
441  break ;
442 
443  }
444 }
445 
446 /* ========================================================================== */
447 /* === KLU_ltsolve ========================================================== */
448 /* ========================================================================== */
449 
450 /* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
451  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
452  * and is stored in ROW form with row dimension nrhs. nrhs must in the
453  * range 1 to 4. */
454 
455 template <typename Entry, typename Int>
456 void KLU_ltsolve
457 (
458  /* inputs, not modified: */
459  Int n,
460  const Int Lip [ ],
461  const Int Llen [ ],
462  Unit LU [ ],
463  Int nrhs,
464 #ifdef COMPLEX
465  Int conj_solve,
466 #endif
467  /* right-hand-side on input, solution to L'x=b on output */
468  Entry X [ ]
469 )
470 {
471  Entry x [4], lik ;
472  Int *Li ;
473  Entry *Lx ;
474  Int k, p, len, i ;
475 
476  switch (nrhs)
477  {
478 
479  case 1:
480 
481  for (k = n-1 ; k >= 0 ; k--)
482  {
483  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
484  x [0] = X [k] ;
485  for (p = 0 ; p < len ; p++)
486  {
487 #ifdef COMPLEX
488  if (conj_solve)
489  {
490  /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
491  MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
492  }
493  else
494 #endif
495  {
496  /*x [0] -= Lx [p] * X [Li [p]] ;*/
497  MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
498  }
499  }
500  X [k] = x [0] ;
501  }
502  break ;
503 
504  case 2:
505 
506  for (k = n-1 ; k >= 0 ; k--)
507  {
508  x [0] = X [2*k ] ;
509  x [1] = X [2*k + 1] ;
510  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
511  for (p = 0 ; p < len ; p++)
512  {
513  i = Li [p] ;
514 #ifdef COMPLEX
515  if (conj_solve)
516  {
517  CONJ (lik, Lx [p]) ;
518  }
519  else
520 #endif
521  {
522  lik = Lx [p] ;
523  }
524  MULT_SUB (x [0], lik, X [2*i]) ;
525  MULT_SUB (x [1], lik, X [2*i + 1]) ;
526  }
527  X [2*k ] = x [0] ;
528  X [2*k + 1] = x [1] ;
529  }
530  break ;
531 
532  case 3:
533 
534  for (k = n-1 ; k >= 0 ; k--)
535  {
536  x [0] = X [3*k ] ;
537  x [1] = X [3*k + 1] ;
538  x [2] = X [3*k + 2] ;
539  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
540  for (p = 0 ; p < len ; p++)
541  {
542  i = Li [p] ;
543 #ifdef COMPLEX
544  if (conj_solve)
545  {
546  CONJ (lik, Lx [p]) ;
547  }
548  else
549 #endif
550  {
551  lik = Lx [p] ;
552  }
553  MULT_SUB (x [0], lik, X [3*i]) ;
554  MULT_SUB (x [1], lik, X [3*i + 1]) ;
555  MULT_SUB (x [2], lik, X [3*i + 2]) ;
556  }
557  X [3*k ] = x [0] ;
558  X [3*k + 1] = x [1] ;
559  X [3*k + 2] = x [2] ;
560  }
561  break ;
562 
563  case 4:
564 
565  for (k = n-1 ; k >= 0 ; k--)
566  {
567  x [0] = X [4*k ] ;
568  x [1] = X [4*k + 1] ;
569  x [2] = X [4*k + 2] ;
570  x [3] = X [4*k + 3] ;
571  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
572  for (p = 0 ; p < len ; p++)
573  {
574  i = Li [p] ;
575 #ifdef COMPLEX
576  if (conj_solve)
577  {
578  CONJ (lik, Lx [p]) ;
579  }
580  else
581 #endif
582  {
583  lik = Lx [p] ;
584  }
585  MULT_SUB (x [0], lik, X [4*i]) ;
586  MULT_SUB (x [1], lik, X [4*i + 1]) ;
587  MULT_SUB (x [2], lik, X [4*i + 2]) ;
588  MULT_SUB (x [3], lik, X [4*i + 3]) ;
589  }
590  X [4*k ] = x [0] ;
591  X [4*k + 1] = x [1] ;
592  X [4*k + 2] = x [2] ;
593  X [4*k + 3] = x [3] ;
594  }
595  break ;
596  }
597 }
598 
599 
600 /* ========================================================================== */
601 /* === KLU_utsolve ========================================================== */
602 /* ========================================================================== */
603 
604 /* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
605  * entry is stored (and appears last in each column of U). Overwrites B
606  * with the solution X. B is n-by-nrhs and is stored in ROW form with row
607  * dimension nrhs. nrhs must be in the range 1 to 4. */
608 
609 template <typename Entry, typename Int>
610 void KLU_utsolve
611 (
612  /* inputs, not modified: */
613  const Int n,
614  const Int Uip [ ],
615  const Int Ulen [ ],
616  Unit LU [ ],
617  const Entry Udiag [ ],
618  Int nrhs,
619 #ifdef COMPLEX
620  Int conj_solve,
621 #endif
622  /* right-hand-side on input, solution to Ux=b on output */
623  Entry X [ ]
624 )
625 {
626  Entry x [4], uik, ukk ;
627  Int k, p, len, i ;
628  Int *Ui ;
629  Entry *Ux ;
630 
631  switch (nrhs)
632  {
633 
634  case 1:
635 
636  for (k = 0 ; k < n ; k++)
637  {
638  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
639  x [0] = X [k] ;
640  for (p = 0 ; p < len ; p++)
641  {
642 #ifdef COMPLEX
643  if (conj_solve)
644  {
645  /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
646  MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
647  }
648  else
649 #endif
650  {
651  /* x [0] -= Ux [p] * X [Ui [p]] ; */
652  MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
653  }
654  }
655 #ifdef COMPLEX
656  if (conj_solve)
657  {
658  CONJ (ukk, Udiag [k]) ;
659  }
660  else
661 #endif
662  {
663  ukk = Udiag [k] ;
664  }
665  DIV (X [k], x [0], ukk) ;
666  }
667  break ;
668 
669  case 2:
670 
671  for (k = 0 ; k < n ; k++)
672  {
673  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
674  x [0] = X [2*k ] ;
675  x [1] = X [2*k + 1] ;
676  for (p = 0 ; p < len ; p++)
677  {
678  i = Ui [p] ;
679 #ifdef COMPLEX
680  if (conj_solve)
681  {
682  CONJ (uik, Ux [p]) ;
683  }
684  else
685 #endif
686  {
687  uik = Ux [p] ;
688  }
689  MULT_SUB (x [0], uik, X [2*i]) ;
690  MULT_SUB (x [1], uik, X [2*i + 1]) ;
691  }
692 #ifdef COMPLEX
693  if (conj_solve)
694  {
695  CONJ (ukk, Udiag [k]) ;
696  }
697  else
698 #endif
699  {
700  ukk = Udiag [k] ;
701  }
702  DIV (X [2*k], x [0], ukk) ;
703  DIV (X [2*k + 1], x [1], ukk) ;
704  }
705  break ;
706 
707  case 3:
708 
709  for (k = 0 ; k < n ; k++)
710  {
711  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
712  x [0] = X [3*k ] ;
713  x [1] = X [3*k + 1] ;
714  x [2] = X [3*k + 2] ;
715  for (p = 0 ; p < len ; p++)
716  {
717  i = Ui [p] ;
718 #ifdef COMPLEX
719  if (conj_solve)
720  {
721  CONJ (uik, Ux [p]) ;
722  }
723  else
724 #endif
725  {
726  uik = Ux [p] ;
727  }
728  MULT_SUB (x [0], uik, X [3*i]) ;
729  MULT_SUB (x [1], uik, X [3*i + 1]) ;
730  MULT_SUB (x [2], uik, X [3*i + 2]) ;
731  }
732 #ifdef COMPLEX
733  if (conj_solve)
734  {
735  CONJ (ukk, Udiag [k]) ;
736  }
737  else
738 #endif
739  {
740  ukk = Udiag [k] ;
741  }
742  DIV (X [3*k], x [0], ukk) ;
743  DIV (X [3*k + 1], x [1], ukk) ;
744  DIV (X [3*k + 2], x [2], ukk) ;
745  }
746  break ;
747 
748  case 4:
749 
750  for (k = 0 ; k < n ; k++)
751  {
752  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
753  x [0] = X [4*k ] ;
754  x [1] = X [4*k + 1] ;
755  x [2] = X [4*k + 2] ;
756  x [3] = X [4*k + 3] ;
757  for (p = 0 ; p < len ; p++)
758  {
759  i = Ui [p] ;
760 #ifdef COMPLEX
761  if (conj_solve)
762  {
763  CONJ (uik, Ux [p]) ;
764  }
765  else
766 #endif
767  {
768  uik = Ux [p] ;
769  }
770  MULT_SUB (x [0], uik, X [4*i]) ;
771  MULT_SUB (x [1], uik, X [4*i + 1]) ;
772  MULT_SUB (x [2], uik, X [4*i + 2]) ;
773  MULT_SUB (x [3], uik, X [4*i + 3]) ;
774  }
775 #ifdef COMPLEX
776  if (conj_solve)
777  {
778  CONJ (ukk, Udiag [k]) ;
779  }
780  else
781 #endif
782  {
783  ukk = Udiag [k] ;
784  }
785  DIV (X [4*k], x [0], ukk) ;
786  DIV (X [4*k + 1], x [1], ukk) ;
787  DIV (X [4*k + 2], x [2], ukk) ;
788  DIV (X [4*k + 3], x [3], ukk) ;
789  }
790  break ;
791  }
792 }
793 
794 #endif /* KLU2_HPP */
int Ap[]
Column offsets.
Definition: klu2_simple.cpp:52
int Ai[]
Row values.
Definition: klu2_simple.cpp:54