Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2.hpp
1 /* ========================================================================== */
2 /* === klu ================================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // ***********************************************************************
6 //
7 // KLU2: A Direct Linear Solver package
8 // Copyright 2011 Sandia Corporation
9 //
10 // Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11 // U.S. Government retains certain rights in this software.
12 //
13 // This library is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU Lesser General Public License as
15 // published by the Free Software Foundation; either version 2.1 of the
16 // License, or (at your option) any later version.
17 //
18 // This library is distributed in the hope that it will be useful, but
19 // WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 // Lesser General Public License for more details.
22 //
23 // You should have received a copy of the GNU Lesser General Public
24 // License along with this library; if not, write to the Free Software
25 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 // USA
27 // Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28 //
29 // KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30 // University of Florida. The Authors of KLU are Timothy A. Davis and
31 // Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32 // information for KLU.
33 //
34 // ***********************************************************************
35 // @HEADER
36 
37 /* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
38  * optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
39  * Davis. This algorithm is what appears as the default sparse LU routine in
40  * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
41  * Note that no column ordering is provided (see COLAMD or AMD for suitable
42  * orderings). SuperLU is based on this algorithm, except that it adds the
43  * use of dense matrix operations on "supernodes" (adjacent columns with
44  * identical). This code doesn't use supernodes, thus its name ("Kent" LU,
45  * as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
46  * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
47  * factors (such as for most finite-element problems). However, for matrices
48  * with very sparse LU factors, this algorithm is typically faster than both
49  * SuperLU and UMFPACK, since in this case there is little chance to exploit
50  * dense matrix kernels (the BLAS).
51  *
52  * Only one block of A is factorized, in the BTF form. The input n is the
53  * size of the block; k1 is the first row and column in the block.
54  *
55  * NOTE: no error checking is done on the inputs. This version is not meant to
56  * be called directly by the user. Use klu_factor instead.
57  *
58  * No fill-reducing ordering is provided. The ordering quality of
59  * klu_kernel_factor is the responsibility of the caller. The input A must
60  * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
61  * be provided.
62  *
63  * The input matrix A must be in compressed-column form, with either sorted
64  * or unsorted row indices. Row indices for column j of A is in
65  * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
66  * numerical values. No duplicate entries are allowed.
67  *
68  * Copyright 2004-2009, Tim Davis. All rights reserved. See the README
69  * file for details on permitted use. Note that no code from The MathWorks,
70  * Inc, or from SuperLU, or from any other source appears here. The code is
71  * written from scratch, from the algorithmic description in Gilbert & Peierls'
72  * and Eisenstat & Liu's journal papers [1,2].
73  *
74  * If an input permutation Q is provided, the factorization L*U = A (P,Q)
75  * is computed, where P is determined by partial pivoting, and Q is the input
76  * ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
77  * KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
78  * input permutation is applied symmetrically to the input matrix. The output
79  * permutation P includes both the partial pivoting ordering and the input
80  * permutation. If Q is NULL, then it is assumed to be the identity
81  * permutation. Q is not modified.
82  *
83  * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
84  * Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
85  * vol 9, pp. 862-874, 1988.
86  * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
87  * Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
88  * Applic., vol 13, pp. 202-211, 1992.
89  */
90 
91 /* ========================================================================== */
92 
93 #ifndef KLU2_HPP
94 #define KLU2_HPP
95 
96 #include "klu2_internal.h"
97 #include "klu2_kernel.hpp"
98 #include "klu2_memory.hpp"
99 
100 template <typename Entry, typename Int>
101 size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
102 (
103  /* inputs, not modified */
104  Int n, /* A is n-by-n. n must be > 0. */
105  Int Ap [ ], /* size n+1, column pointers for A */
106  Int Ai [ ], /* size nz = Ap [n], row indices for A */
107  Entry Ax [ ], /* size nz, values of A */
108  Int Q [ ], /* size n, optional column permutation */
109  double Lsize, /* estimate of number of nonzeros in L */
110 
111  /* outputs, not defined on input */
112  Unit **p_LU, /* row indices and values of L and U */
113  Entry Udiag [ ], /* size n, diagonal of U */
114  Int Llen [ ], /* size n, column length of L */
115  Int Ulen [ ], /* size n, column length of U */
116  Int Lip [ ], /* size n, column pointers for L */
117  Int Uip [ ], /* size n, column pointers for U */
118  Int P [ ], /* row permutation, size n */
119  Int *lnz, /* size of L */
120  Int *unz, /* size of U */
121 
122  /* workspace, undefined on input */
123  Entry *X, /* size n double's, zero on output */
124  Int *Work, /* size 5n Int's */
125 
126  /* inputs, not modified on output */
127  Int k1, /* the block of A is from k1 to k2-1 */
128  Int PSinv [ ], /* inverse of P from symbolic factorization */
129  double Rs [ ], /* scale factors for A */
130 
131  /* inputs, modified on output */
132  Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
133  Int Offi [ ],
134  Entry Offx [ ],
135  /* --------------- */
136  KLU_common<Entry, Int> *Common
137 )
138 {
139  double maxlnz, dunits ;
140  Unit *LU ;
141  Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
142  Int lsize, usize, anz, ok ;
143  size_t lusize ;
144  ASSERT (Common != NULL) ;
145 
146  /* ---------------------------------------------------------------------- */
147  /* get control parameters, or use defaults */
148  /* ---------------------------------------------------------------------- */
149 
150  n = MAX (1, n) ;
151  anz = Ap [n+k1] - Ap [k1] ;
152 
153  if (Lsize <= 0)
154  {
155  Lsize = -Lsize ;
156  Lsize = MAX (Lsize, 1.0) ;
157  lsize = (Int) Lsize * anz + n ;
158  }
159  else
160  {
161  lsize = (Int) Lsize ;
162  }
163 
164  usize = lsize ;
165 
166  lsize = MAX (n+1, lsize) ;
167  usize = MAX (n+1, usize) ;
168 
169  maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
170  maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
171  lsize = (Int) MIN (maxlnz, lsize) ;
172  usize = (Int) MIN (maxlnz, usize) ;
173 
174  PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
175  n, anz, k1, lsize, usize, maxlnz)) ;
176 
177  /* ---------------------------------------------------------------------- */
178  /* allocate workspace and outputs */
179  /* ---------------------------------------------------------------------- */
180 
181  /* return arguments are not yet assigned */
182  *p_LU = (Unit *) NULL ;
183 
184  /* these computations are safe from size_t overflow */
185  W = Work ;
186  Pinv = (Int *) W ; W += n ;
187  Stack = (Int *) W ; W += n ;
188  Flag = (Int *) W ; W += n ;
189  Lpend = (Int *) W ; W += n ;
190  Ap_pos = (Int *) W ; W += n ;
191 
192  dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
193  DUNITS (Int, usize) + DUNITS (Entry, usize) ;
194  lusize = (size_t) dunits ;
195  ok = !INT_OVERFLOW (dunits) ;
196  LU = (Unit *) (ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL) ;
197  if (LU == NULL)
198  {
199  /* out of memory, or problem too large */
200  Common->status = KLU_OUT_OF_MEMORY ;
201  lusize = 0 ;
202  return (lusize) ;
203  }
204 
205  /* ---------------------------------------------------------------------- */
206  /* factorize */
207  /* ---------------------------------------------------------------------- */
208 
209  /* with pruning, and non-recursive depth-first-search */
210  lusize = KLU_kernel<Entry> (n, Ap, Ai, Ax, Q, lusize,
211  Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
212  X, Stack, Flag, Ap_pos, Lpend,
213  k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
214 
215  /* ---------------------------------------------------------------------- */
216  /* return LU factors, or return nothing if an error occurred */
217  /* ---------------------------------------------------------------------- */
218 
219  if (Common->status < KLU_OK)
220  {
221  LU = (Unit *) KLU_free (LU, lusize, sizeof (Unit), Common) ;
222  lusize = 0 ;
223  }
224  *p_LU = LU ;
225  PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
226  return (lusize) ;
227 }
228 
229 /* ========================================================================== */
230 /* === KLU_lsolve =========================================================== */
231 /* ========================================================================== */
232 
233 /* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
234  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
235  * and is stored in ROW form with row dimension nrhs. nrhs must be in the
236  * range 1 to 4. */
237 
238 template <typename Entry, typename Int>
239 void KLU_lsolve
240 (
241  /* inputs, not modified: */
242  const Int n,
243  const Int *Lip,
244  const Int *Llen,
245  Unit *LU,
246  const Int nrhs,
247  /* right-hand-side on input, solution to Lx=b on output */
248  Entry *X
249 )
250 {
251  Entry x [4], lik ;
252  Int *Li ;
253  Entry *Lx ;
254  Int k, p, len, i ;
255 
256  switch (nrhs)
257  {
258 
259  case 1:
260  for (k = 0 ; k < n ; k++)
261  {
262  x [0] = X [k] ;
263  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
264  /* unit diagonal of L is not stored*/
265  for (p = 0 ; p < len ; p++)
266  {
267  /* X [Li [p]] -= Lx [p] * x [0] ; */
268  MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
269  }
270  }
271  break ;
272 
273  case 2:
274 
275  for (k = 0 ; k < n ; k++)
276  {
277  x [0] = X [2*k ] ;
278  x [1] = X [2*k + 1] ;
279  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
280  for (p = 0 ; p < len ; p++)
281  {
282  i = Li [p] ;
283  lik = Lx [p] ;
284  MULT_SUB (X [2*i], lik, x [0]) ;
285  MULT_SUB (X [2*i + 1], lik, x [1]) ;
286  }
287  }
288  break ;
289 
290  case 3:
291 
292  for (k = 0 ; k < n ; k++)
293  {
294  x [0] = X [3*k ] ;
295  x [1] = X [3*k + 1] ;
296  x [2] = X [3*k + 2] ;
297  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
298  for (p = 0 ; p < len ; p++)
299  {
300  i = Li [p] ;
301  lik = Lx [p] ;
302  MULT_SUB (X [3*i], lik, x [0]) ;
303  MULT_SUB (X [3*i + 1], lik, x [1]) ;
304  MULT_SUB (X [3*i + 2], lik, x [2]) ;
305  }
306  }
307  break ;
308 
309  case 4:
310 
311  for (k = 0 ; k < n ; k++)
312  {
313  x [0] = X [4*k ] ;
314  x [1] = X [4*k + 1] ;
315  x [2] = X [4*k + 2] ;
316  x [3] = X [4*k + 3] ;
317  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
318  for (p = 0 ; p < len ; p++)
319  {
320  i = Li [p] ;
321  lik = Lx [p] ;
322  MULT_SUB (X [4*i], lik, x [0]) ;
323  MULT_SUB (X [4*i + 1], lik, x [1]) ;
324  MULT_SUB (X [4*i + 2], lik, x [2]) ;
325  MULT_SUB (X [4*i + 3], lik, x [3]) ;
326  }
327  }
328  break ;
329 
330  default:
331  throw std::domain_error("More than 4 right-hand sides is not supported.");
332 
333  }
334 }
335 
336 /* ========================================================================== */
337 /* === KLU_usolve =========================================================== */
338 /* ========================================================================== */
339 
340 /* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
341  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
342  * and is stored in ROW form with row dimension nrhs. nrhs must be in the
343  * range 1 to 4. */
344 
345 template <typename Entry, typename Int>
346 void KLU_usolve
347 (
348  /* inputs, not modified: */
349  Int n,
350  const Int Uip [ ],
351  const Int Ulen [ ],
352  Unit LU [ ],
353  Entry Udiag [ ],
354  Int nrhs,
355  /* right-hand-side on input, solution to Ux=b on output */
356  Entry X [ ]
357 )
358 {
359  Entry x [4], uik, ukk ;
360  Int *Ui ;
361  Entry *Ux ;
362  Int k, p, len, i ;
363 
364  switch (nrhs)
365  {
366 
367  case 1:
368 
369  for (k = n-1 ; k >= 0 ; k--)
370  {
371  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
372  /* x [0] = X [k] / Udiag [k] ; */
373  DIV (x [0], X [k], Udiag [k]) ;
374  X [k] = x [0] ;
375  for (p = 0 ; p < len ; p++)
376  {
377  /* X [Ui [p]] -= Ux [p] * x [0] ; */
378  MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
379 
380  }
381  }
382 
383  break ;
384 
385  case 2:
386 
387  for (k = n-1 ; k >= 0 ; k--)
388  {
389  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
390  ukk = Udiag [k] ;
391  /* x [0] = X [2*k ] / ukk ;
392  x [1] = X [2*k + 1] / ukk ; */
393  DIV (x [0], X [2*k], ukk) ;
394  DIV (x [1], X [2*k + 1], ukk) ;
395 
396  X [2*k ] = x [0] ;
397  X [2*k + 1] = x [1] ;
398  for (p = 0 ; p < len ; p++)
399  {
400  i = Ui [p] ;
401  uik = Ux [p] ;
402  /* X [2*i ] -= uik * x [0] ;
403  X [2*i + 1] -= uik * x [1] ; */
404  MULT_SUB (X [2*i], uik, x [0]) ;
405  MULT_SUB (X [2*i + 1], uik, x [1]) ;
406  }
407  }
408 
409  break ;
410 
411  case 3:
412 
413  for (k = n-1 ; k >= 0 ; k--)
414  {
415  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
416  ukk = Udiag [k] ;
417 
418  DIV (x [0], X [3*k], ukk) ;
419  DIV (x [1], X [3*k + 1], ukk) ;
420  DIV (x [2], X [3*k + 2], ukk) ;
421 
422  X [3*k ] = x [0] ;
423  X [3*k + 1] = x [1] ;
424  X [3*k + 2] = x [2] ;
425  for (p = 0 ; p < len ; p++)
426  {
427  i = Ui [p] ;
428  uik = Ux [p] ;
429  MULT_SUB (X [3*i], uik, x [0]) ;
430  MULT_SUB (X [3*i + 1], uik, x [1]) ;
431  MULT_SUB (X [3*i + 2], uik, x [2]) ;
432  }
433  }
434 
435  break ;
436 
437  case 4:
438 
439  for (k = n-1 ; k >= 0 ; k--)
440  {
441  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
442  ukk = Udiag [k] ;
443 
444  DIV (x [0], X [4*k], ukk) ;
445  DIV (x [1], X [4*k + 1], ukk) ;
446  DIV (x [2], X [4*k + 2], ukk) ;
447  DIV (x [3], X [4*k + 3], ukk) ;
448 
449  X [4*k ] = x [0] ;
450  X [4*k + 1] = x [1] ;
451  X [4*k + 2] = x [2] ;
452  X [4*k + 3] = x [3] ;
453  for (p = 0 ; p < len ; p++)
454  {
455  i = Ui [p] ;
456  uik = Ux [p] ;
457 
458  MULT_SUB (X [4*i], uik, x [0]) ;
459  MULT_SUB (X [4*i + 1], uik, x [1]) ;
460  MULT_SUB (X [4*i + 2], uik, x [2]) ;
461  MULT_SUB (X [4*i + 3], uik, x [3]) ;
462  }
463  }
464 
465  break ;
466 
467  }
468 }
469 
470 /* ========================================================================== */
471 /* === KLU_ltsolve ========================================================== */
472 /* ========================================================================== */
473 
474 /* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
475  * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
476  * and is stored in ROW form with row dimension nrhs. nrhs must in the
477  * range 1 to 4. */
478 
479 template <typename Entry, typename Int>
480 void KLU_ltsolve
481 (
482  /* inputs, not modified: */
483  Int n,
484  const Int Lip [ ],
485  const Int Llen [ ],
486  Unit LU [ ],
487  Int nrhs,
488 #ifdef COMPLEX
489  Int conj_solve,
490 #endif
491  /* right-hand-side on input, solution to L'x=b on output */
492  Entry X [ ]
493 )
494 {
495  Entry x [4], lik ;
496  Int *Li ;
497  Entry *Lx ;
498  Int k, p, len, i ;
499 
500  switch (nrhs)
501  {
502 
503  case 1:
504 
505  for (k = n-1 ; k >= 0 ; k--)
506  {
507  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
508  x [0] = X [k] ;
509  for (p = 0 ; p < len ; p++)
510  {
511 #ifdef COMPLEX
512  if (conj_solve)
513  {
514  /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
515  MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
516  }
517  else
518 #endif
519  {
520  /*x [0] -= Lx [p] * X [Li [p]] ;*/
521  MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
522  }
523  }
524  X [k] = x [0] ;
525  }
526  break ;
527 
528  case 2:
529 
530  for (k = n-1 ; k >= 0 ; k--)
531  {
532  x [0] = X [2*k ] ;
533  x [1] = X [2*k + 1] ;
534  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
535  for (p = 0 ; p < len ; p++)
536  {
537  i = Li [p] ;
538 #ifdef COMPLEX
539  if (conj_solve)
540  {
541  CONJ (lik, Lx [p]) ;
542  }
543  else
544 #endif
545  {
546  lik = Lx [p] ;
547  }
548  MULT_SUB (x [0], lik, X [2*i]) ;
549  MULT_SUB (x [1], lik, X [2*i + 1]) ;
550  }
551  X [2*k ] = x [0] ;
552  X [2*k + 1] = x [1] ;
553  }
554  break ;
555 
556  case 3:
557 
558  for (k = n-1 ; k >= 0 ; k--)
559  {
560  x [0] = X [3*k ] ;
561  x [1] = X [3*k + 1] ;
562  x [2] = X [3*k + 2] ;
563  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
564  for (p = 0 ; p < len ; p++)
565  {
566  i = Li [p] ;
567 #ifdef COMPLEX
568  if (conj_solve)
569  {
570  CONJ (lik, Lx [p]) ;
571  }
572  else
573 #endif
574  {
575  lik = Lx [p] ;
576  }
577  MULT_SUB (x [0], lik, X [3*i]) ;
578  MULT_SUB (x [1], lik, X [3*i + 1]) ;
579  MULT_SUB (x [2], lik, X [3*i + 2]) ;
580  }
581  X [3*k ] = x [0] ;
582  X [3*k + 1] = x [1] ;
583  X [3*k + 2] = x [2] ;
584  }
585  break ;
586 
587  case 4:
588 
589  for (k = n-1 ; k >= 0 ; k--)
590  {
591  x [0] = X [4*k ] ;
592  x [1] = X [4*k + 1] ;
593  x [2] = X [4*k + 2] ;
594  x [3] = X [4*k + 3] ;
595  GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
596  for (p = 0 ; p < len ; p++)
597  {
598  i = Li [p] ;
599 #ifdef COMPLEX
600  if (conj_solve)
601  {
602  CONJ (lik, Lx [p]) ;
603  }
604  else
605 #endif
606  {
607  lik = Lx [p] ;
608  }
609  MULT_SUB (x [0], lik, X [4*i]) ;
610  MULT_SUB (x [1], lik, X [4*i + 1]) ;
611  MULT_SUB (x [2], lik, X [4*i + 2]) ;
612  MULT_SUB (x [3], lik, X [4*i + 3]) ;
613  }
614  X [4*k ] = x [0] ;
615  X [4*k + 1] = x [1] ;
616  X [4*k + 2] = x [2] ;
617  X [4*k + 3] = x [3] ;
618  }
619  break ;
620  }
621 }
622 
623 
624 /* ========================================================================== */
625 /* === KLU_utsolve ========================================================== */
626 /* ========================================================================== */
627 
628 /* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
629  * entry is stored (and appears last in each column of U). Overwrites B
630  * with the solution X. B is n-by-nrhs and is stored in ROW form with row
631  * dimension nrhs. nrhs must be in the range 1 to 4. */
632 
633 template <typename Entry, typename Int>
634 void KLU_utsolve
635 (
636  /* inputs, not modified: */
637  const Int n,
638  const Int Uip [ ],
639  const Int Ulen [ ],
640  Unit LU [ ],
641  const Entry Udiag [ ],
642  Int nrhs,
643 #ifdef COMPLEX
644  Int conj_solve,
645 #endif
646  /* right-hand-side on input, solution to Ux=b on output */
647  Entry X [ ]
648 )
649 {
650  Entry x [4], uik, ukk ;
651  Int k, p, len, i ;
652  Int *Ui ;
653  Entry *Ux ;
654 
655  switch (nrhs)
656  {
657 
658  case 1:
659 
660  for (k = 0 ; k < n ; k++)
661  {
662  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
663  x [0] = X [k] ;
664  for (p = 0 ; p < len ; p++)
665  {
666 #ifdef COMPLEX
667  if (conj_solve)
668  {
669  /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
670  MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
671  }
672  else
673 #endif
674  {
675  /* x [0] -= Ux [p] * X [Ui [p]] ; */
676  MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
677  }
678  }
679 #ifdef COMPLEX
680  if (conj_solve)
681  {
682  CONJ (ukk, Udiag [k]) ;
683  }
684  else
685 #endif
686  {
687  ukk = Udiag [k] ;
688  }
689  DIV (X [k], x [0], ukk) ;
690  }
691  break ;
692 
693  case 2:
694 
695  for (k = 0 ; k < n ; k++)
696  {
697  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
698  x [0] = X [2*k ] ;
699  x [1] = X [2*k + 1] ;
700  for (p = 0 ; p < len ; p++)
701  {
702  i = Ui [p] ;
703 #ifdef COMPLEX
704  if (conj_solve)
705  {
706  CONJ (uik, Ux [p]) ;
707  }
708  else
709 #endif
710  {
711  uik = Ux [p] ;
712  }
713  MULT_SUB (x [0], uik, X [2*i]) ;
714  MULT_SUB (x [1], uik, X [2*i + 1]) ;
715  }
716 #ifdef COMPLEX
717  if (conj_solve)
718  {
719  CONJ (ukk, Udiag [k]) ;
720  }
721  else
722 #endif
723  {
724  ukk = Udiag [k] ;
725  }
726  DIV (X [2*k], x [0], ukk) ;
727  DIV (X [2*k + 1], x [1], ukk) ;
728  }
729  break ;
730 
731  case 3:
732 
733  for (k = 0 ; k < n ; k++)
734  {
735  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
736  x [0] = X [3*k ] ;
737  x [1] = X [3*k + 1] ;
738  x [2] = X [3*k + 2] ;
739  for (p = 0 ; p < len ; p++)
740  {
741  i = Ui [p] ;
742 #ifdef COMPLEX
743  if (conj_solve)
744  {
745  CONJ (uik, Ux [p]) ;
746  }
747  else
748 #endif
749  {
750  uik = Ux [p] ;
751  }
752  MULT_SUB (x [0], uik, X [3*i]) ;
753  MULT_SUB (x [1], uik, X [3*i + 1]) ;
754  MULT_SUB (x [2], uik, X [3*i + 2]) ;
755  }
756 #ifdef COMPLEX
757  if (conj_solve)
758  {
759  CONJ (ukk, Udiag [k]) ;
760  }
761  else
762 #endif
763  {
764  ukk = Udiag [k] ;
765  }
766  DIV (X [3*k], x [0], ukk) ;
767  DIV (X [3*k + 1], x [1], ukk) ;
768  DIV (X [3*k + 2], x [2], ukk) ;
769  }
770  break ;
771 
772  case 4:
773 
774  for (k = 0 ; k < n ; k++)
775  {
776  GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
777  x [0] = X [4*k ] ;
778  x [1] = X [4*k + 1] ;
779  x [2] = X [4*k + 2] ;
780  x [3] = X [4*k + 3] ;
781  for (p = 0 ; p < len ; p++)
782  {
783  i = Ui [p] ;
784 #ifdef COMPLEX
785  if (conj_solve)
786  {
787  CONJ (uik, Ux [p]) ;
788  }
789  else
790 #endif
791  {
792  uik = Ux [p] ;
793  }
794  MULT_SUB (x [0], uik, X [4*i]) ;
795  MULT_SUB (x [1], uik, X [4*i + 1]) ;
796  MULT_SUB (x [2], uik, X [4*i + 2]) ;
797  MULT_SUB (x [3], uik, X [4*i + 3]) ;
798  }
799 #ifdef COMPLEX
800  if (conj_solve)
801  {
802  CONJ (ukk, Udiag [k]) ;
803  }
804  else
805 #endif
806  {
807  ukk = Udiag [k] ;
808  }
809  DIV (X [4*k], x [0], ukk) ;
810  DIV (X [4*k + 1], x [1], ukk) ;
811  DIV (X [4*k + 2], x [2], ukk) ;
812  DIV (X [4*k + 3], x [3], ukk) ;
813  }
814  break ;
815  }
816 }
817 
818 #endif /* KLU2_HPP */
int Ap[]
Column offsets.
Definition: klu2_simple.cpp:76
int Ai[]
Row values.
Definition: klu2_simple.cpp:78