Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2.hpp
1 /* ========================================================================== */
2 /* === klu ================================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // ***********************************************************************
6 //
7 // KLU2: A Direct Linear Solver package
8 // Copyright 2011 Sandia Corporation
9 //
10 // Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11 // U.S. Government retains certain rights in this software.
12 //
13 // This library is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU Lesser General Public License as
15 // published by the Free Software Foundation; either version 2.1 of the
16 // License, or (at your option) any later version.
17 //
18 // This library is distributed in the hope that it will be useful, but
19 // WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 // Lesser General Public License for more details.
22 //
23 // You should have received a copy of the GNU Lesser General Public
24 // License along with this library; if not, write to the Free Software
25 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 // USA
27 // Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28 //
29 // KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30 // University of Florida. The Authors of KLU are Timothy A. Davis and
31 // Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32 // information for KLU.
33 //
34 // ***********************************************************************
35 // @HEADER
36 
37 /* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
38  * optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
39  * Davis. This algorithm is what appears as the default sparse LU routine in
40  * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
41  * Note that no column ordering is provided (see COLAMD or AMD for suitable
42  * orderings). SuperLU is based on this algorithm, except that it adds the
43  * use of dense matrix operations on "supernodes" (adjacent columns with
44  * identical). This code doesn't use supernodes, thus its name ("Kent" LU,
45  * as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
46  * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
47  * factors (such as for most finite-element problems). However, for matrices
48  * with very sparse LU factors, this algorithm is typically faster than both
49  * SuperLU and UMFPACK, since in this case there is little chance to exploit
50  * dense matrix kernels (the BLAS).
51  *
52  * Only one block of A is factorized, in the BTF form. The input n is the
53  * size of the block; k1 is the first row and column in the block.
54  *
55  * NOTE: no error checking is done on the inputs. This version is not meant to
56  * be called directly by the user. Use klu_factor instead.
57  *
58  * No fill-reducing ordering is provided. The ordering quality of
59  * klu_kernel_factor is the responsibility of the caller. The input A must
60  * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
61  * be provided.
62  *
63  * The input matrix A must be in compressed-column form, with either sorted
64  * or unsorted row indices. Row indices for column j of A is in
65  * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
66  * numerical values. No duplicate entries are allowed.
67  *
68  * Copyright 2004-2009, Tim Davis. All rights reserved. See the README
69  * file for details on permitted use. Note that no code from The MathWorks,
70  * Inc, or from SuperLU, or from any other source appears here. The code is
71  * written from scratch, from the algorithmic description in Gilbert & Peierls'
72  * and Eisenstat & Liu's journal papers [1,2].
73  *
74  * If an input permutation Q is provided, the factorization L*U = A (P,Q)
75  * is computed, where P is determined by partial pivoting, and Q is the input
76  * ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
77  * KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
78  * input permutation is applied symmetrically to the input matrix. The output
79  * permutation P includes both the partial pivoting ordering and the input
80  * permutation. If Q is NULL, then it is assumed to be the identity
81  * permutation. Q is not modified.
82  *
83  * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
84  * Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
85  * vol 9, pp. 862-874, 1988.
86  * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
87  * Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
88  * Applic., vol 13, pp. 202-211, 1992.
89  */
90 
91 /* ========================================================================== */
92 
93 #ifndef KLU2_HPP
94 #define KLU2_HPP
95 
96 #include "klu2_internal.h"
97 #include "klu2_kernel.hpp"
98 #include "klu2_memory.hpp"
99 
100 template <typename Entry, typename Int>
101 size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
102 (
103  /* inputs, not modified */
104  Int n, /* A is n-by-n. n must be > 0. */
105  Int Ap [ ], /* size n+1, column pointers for A */
106  Int Ai [ ], /* size nz = Ap [n], row indices for A */
107  Entry Ax [ ], /* size nz, values of A */
108  Int Q [ ], /* size n, optional column permutation */
109  double Lsize, /* estimate of number of nonzeros in L */
110 
111  /* outputs, not defined on input */
112  Unit **p_LU, /* row indices and values of L and U */
113  Entry Udiag [ ], /* size n, diagonal of U */
114  Int Llen [ ], /* size n, column length of L */
115  Int Ulen [ ], /* size n, column length of U */
116  Int Lip [ ], /* size n, column pointers for L */
117  Int Uip [ ], /* size n, column pointers for U */
118  Int P [ ], /* row permutation, size n */
119  Int *lnz, /* size of L */
120  Int *unz, /* size of U */
121 
122  /* workspace, undefined on input */
123  Entry *X, /* size n double's, zero on output */
124  Int *Work, /* size 5n Int's */
125 
126  /* inputs, not modified on output */
127  Int k1, /* the block of A is from k1 to k2-1 */
128  Int PSinv [ ], /* inverse of P from symbolic factorization */
129  double Rs [ ], /* scale factors for A */
130 
131  /* inputs, modified on output */
132  Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
133  Int Offi [ ],
134  Entry Offx [ ],
135  /* --------------- */
136  KLU_common<Entry, Int> *Common
137 )
138 {
139  double maxlnz, dunits ;
140  Unit *LU ;
141  Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
142  Int lsize, usize, anz, ok ;
143  size_t lusize ;
144  ASSERT (Common != NULL) ;
145 
146  /* ---------------------------------------------------------------------- */
147  /* get control parameters, or use defaults */
148  /* ---------------------------------------------------------------------- */
149 
150  n = MAX (1, n) ;
151  anz = Ap [n+k1] - Ap [k1] ;
152 
153  if (Lsize <= 0)
154  {
155  Lsize = -Lsize ;
156  Lsize = MAX (Lsize, 1.0) ;
157  lsize = (Int) Lsize * anz + n ;
158  }
159  else
160  {
161  lsize = (Int) Lsize ;
162  }
163 
164  usize = lsize ;
165 
166  lsize = MAX (n+1, lsize) ;
167  usize = MAX (n+1, usize) ;
168 
169  maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
170  maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
171  lsize = (Int) MIN (maxlnz, lsize) ;
172  usize = (Int) MIN (maxlnz, usize) ;
173 
174  PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
175  n, anz, k1, lsize, usize, maxlnz)) ;
176 
177  /* ---------------------------------------------------------------------- */
178  /* allocate workspace and outputs */
179  /* ---------------------------------------------------------------------- */
180 
181  /* return arguments are not yet assigned */
182  *p_LU = (Unit *) NULL ;
183 
184  /* these computations are safe from size_t overflow */
185  W = Work ;
186  Pinv = (Int *) W ; W += n ;
187  Stack = (Int *) W ; W += n ;
188  Flag = (Int *) W ; W += n ;
189  Lpend = (Int *) W ; W += n ;
190  Ap_pos = (Int *) W ; W += n ;
191 
192  dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
193  DUNITS (Int, usize) + DUNITS (Entry, usize) ;
194  lusize = (size_t) dunits ;
195  ok = !INT_OVERFLOW (dunits) ;
196  LU = (Unit *) (ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL) ;
197  if (LU == NULL)
198  {
199  /* out of memory, or problem too large */
200  Common->status = KLU_OUT_OF_MEMORY ;
201  lusize = 0 ;
202  return (lusize) ;
203  }
204 
205  /* ---------------------------------------------------------------------- */
206  /* factorize */
207  /* ---------------------------------------------------------------------- */
208 
209  /* with pruning, and non-recursive depth-first-search */
210  lusize = KLU_kernel<Entry> (n, Ap, Ai, Ax, Q, lusize,
211  Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
212  X, Stack, Flag, Ap_pos, Lpend,
213  k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
214 
215  /* ---------------------------------------------------------------------- */
216  /* return LU factors, or return nothing if an error occurred */
217  /* ---------------------------------------------------------------------- */
218 
219  if (Common->status < KLU_OK)
220  {
221  LU = (Unit *) KLU_free (LU, lusize, sizeof (Unit), Common) ;
222  lusize = 0 ;
223  }
224  *p_LU = LU ;
225  PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
226  return (lusize) ;
227 }
228 
229 /* ========================================================================== */
230 /* === KLU_lsolve =========================================================== */
231 /* ========================================================================== */
232 
233 /* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
234  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
235  * and is stored in ROW form with row dimension nrhs. nrhs must be in the
236  * range 1 to 4. */
237 
238 template <typename Entry, typename Int>
239 void KLU_lsolve
240 (
241  /* inputs, not modified: */
242  Int n,
243  Int Lip [ ],
244  Int Llen [ ],
245  Unit LU [ ],
246  Int nrhs,
247  /* right-hand-side on input, solution to Lx=b on output */
248  Entry X [ ]
249 )
250 {
251  Entry x [4], lik ;
252  Int *Li ;
253  Entry *Lx ;
254  Int k, p, len, i ;
255 
256  switch (nrhs)
257  {
258 
259  case 1:
260  for (k = 0 ; k < n ; k++)
261  {
262  x [0] = X [k] ;
263  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
264  /* unit diagonal of L is not stored*/
265  for (p = 0 ; p < len ; p++)
266  {
267  /* X [Li [p]] -= Lx [p] * x [0] ; */
268  MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
269  }
270  }
271  break ;
272 
273  case 2:
274 
275  for (k = 0 ; k < n ; k++)
276  {
277  x [0] = X [2*k ] ;
278  x [1] = X [2*k + 1] ;
279  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
280  for (p = 0 ; p < len ; p++)
281  {
282  i = Li [p] ;
283  lik = Lx [p] ;
284  MULT_SUB (X [2*i], lik, x [0]) ;
285  MULT_SUB (X [2*i + 1], lik, x [1]) ;
286  }
287  }
288  break ;
289 
290  case 3:
291 
292  for (k = 0 ; k < n ; k++)
293  {
294  x [0] = X [3*k ] ;
295  x [1] = X [3*k + 1] ;
296  x [2] = X [3*k + 2] ;
297  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
298  for (p = 0 ; p < len ; p++)
299  {
300  i = Li [p] ;
301  lik = Lx [p] ;
302  MULT_SUB (X [3*i], lik, x [0]) ;
303  MULT_SUB (X [3*i + 1], lik, x [1]) ;
304  MULT_SUB (X [3*i + 2], lik, x [2]) ;
305  }
306  }
307  break ;
308 
309  case 4:
310 
311  for (k = 0 ; k < n ; k++)
312  {
313  x [0] = X [4*k ] ;
314  x [1] = X [4*k + 1] ;
315  x [2] = X [4*k + 2] ;
316  x [3] = X [4*k + 3] ;
317  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
318  for (p = 0 ; p < len ; p++)
319  {
320  i = Li [p] ;
321  lik = Lx [p] ;
322  MULT_SUB (X [4*i], lik, x [0]) ;
323  MULT_SUB (X [4*i + 1], lik, x [1]) ;
324  MULT_SUB (X [4*i + 2], lik, x [2]) ;
325  MULT_SUB (X [4*i + 3], lik, x [3]) ;
326  }
327  }
328  break ;
329 
330  }
331 }
332 
333 /* ========================================================================== */
334 /* === KLU_usolve =========================================================== */
335 /* ========================================================================== */
336 
337 /* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
338  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
339  * and is stored in ROW form with row dimension nrhs. nrhs must be in the
340  * range 1 to 4. */
341 
342 template <typename Entry, typename Int>
343 void KLU_usolve
344 (
345  /* inputs, not modified: */
346  Int n,
347  Int Uip [ ],
348  Int Ulen [ ],
349  Unit LU [ ],
350  Entry Udiag [ ],
351  Int nrhs,
352  /* right-hand-side on input, solution to Ux=b on output */
353  Entry X [ ]
354 )
355 {
356  Entry x [4], uik, ukk ;
357  Int *Ui ;
358  Entry *Ux ;
359  Int k, p, len, i ;
360 
361  switch (nrhs)
362  {
363 
364  case 1:
365 
366  for (k = n-1 ; k >= 0 ; k--)
367  {
368  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
369  /* x [0] = X [k] / Udiag [k] ; */
370  DIV (x [0], X [k], Udiag [k]) ;
371  X [k] = x [0] ;
372  for (p = 0 ; p < len ; p++)
373  {
374  /* X [Ui [p]] -= Ux [p] * x [0] ; */
375  MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
376 
377  }
378  }
379 
380  break ;
381 
382  case 2:
383 
384  for (k = n-1 ; k >= 0 ; k--)
385  {
386  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
387  ukk = Udiag [k] ;
388  /* x [0] = X [2*k ] / ukk ;
389  x [1] = X [2*k + 1] / ukk ; */
390  DIV (x [0], X [2*k], ukk) ;
391  DIV (x [1], X [2*k + 1], ukk) ;
392 
393  X [2*k ] = x [0] ;
394  X [2*k + 1] = x [1] ;
395  for (p = 0 ; p < len ; p++)
396  {
397  i = Ui [p] ;
398  uik = Ux [p] ;
399  /* X [2*i ] -= uik * x [0] ;
400  X [2*i + 1] -= uik * x [1] ; */
401  MULT_SUB (X [2*i], uik, x [0]) ;
402  MULT_SUB (X [2*i + 1], uik, x [1]) ;
403  }
404  }
405 
406  break ;
407 
408  case 3:
409 
410  for (k = n-1 ; k >= 0 ; k--)
411  {
412  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
413  ukk = Udiag [k] ;
414 
415  DIV (x [0], X [3*k], ukk) ;
416  DIV (x [1], X [3*k + 1], ukk) ;
417  DIV (x [2], X [3*k + 2], ukk) ;
418 
419  X [3*k ] = x [0] ;
420  X [3*k + 1] = x [1] ;
421  X [3*k + 2] = x [2] ;
422  for (p = 0 ; p < len ; p++)
423  {
424  i = Ui [p] ;
425  uik = Ux [p] ;
426  MULT_SUB (X [3*i], uik, x [0]) ;
427  MULT_SUB (X [3*i + 1], uik, x [1]) ;
428  MULT_SUB (X [3*i + 2], uik, x [2]) ;
429  }
430  }
431 
432  break ;
433 
434  case 4:
435 
436  for (k = n-1 ; k >= 0 ; k--)
437  {
438  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
439  ukk = Udiag [k] ;
440 
441  DIV (x [0], X [4*k], ukk) ;
442  DIV (x [1], X [4*k + 1], ukk) ;
443  DIV (x [2], X [4*k + 2], ukk) ;
444  DIV (x [3], X [4*k + 3], ukk) ;
445 
446  X [4*k ] = x [0] ;
447  X [4*k + 1] = x [1] ;
448  X [4*k + 2] = x [2] ;
449  X [4*k + 3] = x [3] ;
450  for (p = 0 ; p < len ; p++)
451  {
452  i = Ui [p] ;
453  uik = Ux [p] ;
454 
455  MULT_SUB (X [4*i], uik, x [0]) ;
456  MULT_SUB (X [4*i + 1], uik, x [1]) ;
457  MULT_SUB (X [4*i + 2], uik, x [2]) ;
458  MULT_SUB (X [4*i + 3], uik, x [3]) ;
459  }
460  }
461 
462  break ;
463 
464  }
465 }
466 
467 /* ========================================================================== */
468 /* === KLU_ltsolve ========================================================== */
469 /* ========================================================================== */
470 
471 /* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
472  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
473  * and is stored in ROW form with row dimension nrhs. nrhs must in the
474  * range 1 to 4. */
475 
476 template <typename Entry, typename Int>
477 void KLU_ltsolve
478 (
479  /* inputs, not modified: */
480  Int n,
481  Int Lip [ ],
482  Int Llen [ ],
483  Unit LU [ ],
484  Int nrhs,
485 #ifdef COMPLEX
486  Int conj_solve,
487 #endif
488  /* right-hand-side on input, solution to L'x=b on output */
489  Entry X [ ]
490 )
491 {
492  Entry x [4], lik ;
493  Int *Li ;
494  Entry *Lx ;
495  Int k, p, len, i ;
496 
497  switch (nrhs)
498  {
499 
500  case 1:
501 
502  for (k = n-1 ; k >= 0 ; k--)
503  {
504  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
505  x [0] = X [k] ;
506  for (p = 0 ; p < len ; p++)
507  {
508 #ifdef COMPLEX
509  if (conj_solve)
510  {
511  /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
512  MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
513  }
514  else
515 #endif
516  {
517  /*x [0] -= Lx [p] * X [Li [p]] ;*/
518  MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
519  }
520  }
521  X [k] = x [0] ;
522  }
523  break ;
524 
525  case 2:
526 
527  for (k = n-1 ; k >= 0 ; k--)
528  {
529  x [0] = X [2*k ] ;
530  x [1] = X [2*k + 1] ;
531  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
532  for (p = 0 ; p < len ; p++)
533  {
534  i = Li [p] ;
535 #ifdef COMPLEX
536  if (conj_solve)
537  {
538  CONJ (lik, Lx [p]) ;
539  }
540  else
541 #endif
542  {
543  lik = Lx [p] ;
544  }
545  MULT_SUB (x [0], lik, X [2*i]) ;
546  MULT_SUB (x [1], lik, X [2*i + 1]) ;
547  }
548  X [2*k ] = x [0] ;
549  X [2*k + 1] = x [1] ;
550  }
551  break ;
552 
553  case 3:
554 
555  for (k = n-1 ; k >= 0 ; k--)
556  {
557  x [0] = X [3*k ] ;
558  x [1] = X [3*k + 1] ;
559  x [2] = X [3*k + 2] ;
560  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
561  for (p = 0 ; p < len ; p++)
562  {
563  i = Li [p] ;
564 #ifdef COMPLEX
565  if (conj_solve)
566  {
567  CONJ (lik, Lx [p]) ;
568  }
569  else
570 #endif
571  {
572  lik = Lx [p] ;
573  }
574  MULT_SUB (x [0], lik, X [3*i]) ;
575  MULT_SUB (x [1], lik, X [3*i + 1]) ;
576  MULT_SUB (x [2], lik, X [3*i + 2]) ;
577  }
578  X [3*k ] = x [0] ;
579  X [3*k + 1] = x [1] ;
580  X [3*k + 2] = x [2] ;
581  }
582  break ;
583 
584  case 4:
585 
586  for (k = n-1 ; k >= 0 ; k--)
587  {
588  x [0] = X [4*k ] ;
589  x [1] = X [4*k + 1] ;
590  x [2] = X [4*k + 2] ;
591  x [3] = X [4*k + 3] ;
592  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
593  for (p = 0 ; p < len ; p++)
594  {
595  i = Li [p] ;
596 #ifdef COMPLEX
597  if (conj_solve)
598  {
599  CONJ (lik, Lx [p]) ;
600  }
601  else
602 #endif
603  {
604  lik = Lx [p] ;
605  }
606  MULT_SUB (x [0], lik, X [4*i]) ;
607  MULT_SUB (x [1], lik, X [4*i + 1]) ;
608  MULT_SUB (x [2], lik, X [4*i + 2]) ;
609  MULT_SUB (x [3], lik, X [4*i + 3]) ;
610  }
611  X [4*k ] = x [0] ;
612  X [4*k + 1] = x [1] ;
613  X [4*k + 2] = x [2] ;
614  X [4*k + 3] = x [3] ;
615  }
616  break ;
617  }
618 }
619 
620 
621 /* ========================================================================== */
622 /* === KLU_utsolve ========================================================== */
623 /* ========================================================================== */
624 
625 /* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
626  * entry is stored (and appears last in each column of U). Overwrites B
627  * with the solution X. B is n-by-nrhs and is stored in ROW form with row
628  * dimension nrhs. nrhs must be in the range 1 to 4. */
629 
630 template <typename Entry, typename Int>
631 void KLU_utsolve
632 (
633  /* inputs, not modified: */
634  Int n,
635  Int Uip [ ],
636  Int Ulen [ ],
637  Unit LU [ ],
638  Entry Udiag [ ],
639  Int nrhs,
640 #ifdef COMPLEX
641  Int conj_solve,
642 #endif
643  /* right-hand-side on input, solution to Ux=b on output */
644  Entry X [ ]
645 )
646 {
647  Entry x [4], uik, ukk ;
648  Int k, p, len, i ;
649  Int *Ui ;
650  Entry *Ux ;
651 
652  switch (nrhs)
653  {
654 
655  case 1:
656 
657  for (k = 0 ; k < n ; k++)
658  {
659  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
660  x [0] = X [k] ;
661  for (p = 0 ; p < len ; p++)
662  {
663 #ifdef COMPLEX
664  if (conj_solve)
665  {
666  /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
667  MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
668  }
669  else
670 #endif
671  {
672  /* x [0] -= Ux [p] * X [Ui [p]] ; */
673  MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
674  }
675  }
676 #ifdef COMPLEX
677  if (conj_solve)
678  {
679  CONJ (ukk, Udiag [k]) ;
680  }
681  else
682 #endif
683  {
684  ukk = Udiag [k] ;
685  }
686  DIV (X [k], x [0], ukk) ;
687  }
688  break ;
689 
690  case 2:
691 
692  for (k = 0 ; k < n ; k++)
693  {
694  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
695  x [0] = X [2*k ] ;
696  x [1] = X [2*k + 1] ;
697  for (p = 0 ; p < len ; p++)
698  {
699  i = Ui [p] ;
700 #ifdef COMPLEX
701  if (conj_solve)
702  {
703  CONJ (uik, Ux [p]) ;
704  }
705  else
706 #endif
707  {
708  uik = Ux [p] ;
709  }
710  MULT_SUB (x [0], uik, X [2*i]) ;
711  MULT_SUB (x [1], uik, X [2*i + 1]) ;
712  }
713 #ifdef COMPLEX
714  if (conj_solve)
715  {
716  CONJ (ukk, Udiag [k]) ;
717  }
718  else
719 #endif
720  {
721  ukk = Udiag [k] ;
722  }
723  DIV (X [2*k], x [0], ukk) ;
724  DIV (X [2*k + 1], x [1], ukk) ;
725  }
726  break ;
727 
728  case 3:
729 
730  for (k = 0 ; k < n ; k++)
731  {
732  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
733  x [0] = X [3*k ] ;
734  x [1] = X [3*k + 1] ;
735  x [2] = X [3*k + 2] ;
736  for (p = 0 ; p < len ; p++)
737  {
738  i = Ui [p] ;
739 #ifdef COMPLEX
740  if (conj_solve)
741  {
742  CONJ (uik, Ux [p]) ;
743  }
744  else
745 #endif
746  {
747  uik = Ux [p] ;
748  }
749  MULT_SUB (x [0], uik, X [3*i]) ;
750  MULT_SUB (x [1], uik, X [3*i + 1]) ;
751  MULT_SUB (x [2], uik, X [3*i + 2]) ;
752  }
753 #ifdef COMPLEX
754  if (conj_solve)
755  {
756  CONJ (ukk, Udiag [k]) ;
757  }
758  else
759 #endif
760  {
761  ukk = Udiag [k] ;
762  }
763  DIV (X [3*k], x [0], ukk) ;
764  DIV (X [3*k + 1], x [1], ukk) ;
765  DIV (X [3*k + 2], x [2], ukk) ;
766  }
767  break ;
768 
769  case 4:
770 
771  for (k = 0 ; k < n ; k++)
772  {
773  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
774  x [0] = X [4*k ] ;
775  x [1] = X [4*k + 1] ;
776  x [2] = X [4*k + 2] ;
777  x [3] = X [4*k + 3] ;
778  for (p = 0 ; p < len ; p++)
779  {
780  i = Ui [p] ;
781 #ifdef COMPLEX
782  if (conj_solve)
783  {
784  CONJ (uik, Ux [p]) ;
785  }
786  else
787 #endif
788  {
789  uik = Ux [p] ;
790  }
791  MULT_SUB (x [0], uik, X [4*i]) ;
792  MULT_SUB (x [1], uik, X [4*i + 1]) ;
793  MULT_SUB (x [2], uik, X [4*i + 2]) ;
794  MULT_SUB (x [3], uik, X [4*i + 3]) ;
795  }
796 #ifdef COMPLEX
797  if (conj_solve)
798  {
799  CONJ (ukk, Udiag [k]) ;
800  }
801  else
802 #endif
803  {
804  ukk = Udiag [k] ;
805  }
806  DIV (X [4*k], x [0], ukk) ;
807  DIV (X [4*k + 1], x [1], ukk) ;
808  DIV (X [4*k + 2], x [2], ukk) ;
809  DIV (X [4*k + 3], x [3], ukk) ;
810  }
811  break ;
812  }
813 }
814 
815 #endif /* KLU2_HPP */