72 #include "klu2_internal.h"
73 #include "klu2_kernel.hpp"
74 #include "klu2_memory.hpp"
76 template <
typename Entry,
typename Int>
77 size_t KLU_kernel_factor
112 KLU_common<Entry, Int> *Common
115 double maxlnz, dunits ;
117 Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
118 Int lsize, usize, anz, ok ;
120 ASSERT (Common != NULL) ;
127 anz =
Ap [n+k1] -
Ap [k1] ;
132 Lsize = MAX (Lsize, 1.0) ;
133 lsize = (Int) Lsize * anz + n ;
137 lsize = (Int) Lsize ;
142 lsize = MAX (n+1, lsize) ;
143 usize = MAX (n+1, usize) ;
145 maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
146 maxlnz = MIN (maxlnz, ((
double) INT_MAX)) ;
147 lsize = (Int) MIN (maxlnz, lsize) ;
148 usize = (Int) MIN (maxlnz, usize) ;
150 PRINTF ((
"Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
151 n, anz, k1, lsize, usize, maxlnz)) ;
158 *p_LU = (Unit *) NULL ;
162 Pinv = (Int *) W ; W += n ;
163 Stack = (Int *) W ; W += n ;
164 Flag = (Int *) W ; W += n ;
165 Lpend = (Int *) W ; W += n ;
166 Ap_pos = (Int *) W ; W += n ;
168 dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
169 DUNITS (Int, usize) + DUNITS (Entry, usize) ;
170 lusize = (size_t) dunits ;
171 ok = !INT_OVERFLOW (dunits) ;
172 LU = (Unit *) (ok ? KLU_malloc (lusize,
sizeof (Unit), Common) : NULL) ;
176 Common->status = KLU_OUT_OF_MEMORY ;
186 lusize = KLU_kernel<Entry> (n,
Ap,
Ai, Ax, Q, lusize,
187 Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
188 X, Stack, Flag, Ap_pos, Lpend,
189 k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
195 if (Common->status < KLU_OK)
197 LU = (Unit *) KLU_free (LU, lusize,
sizeof (Unit), Common) ;
201 PRINTF ((
" in klu noffdiag %d\n", Common->noffdiag)) ;
214 template <
typename Entry,
typename Int>
236 for (k = 0 ; k < n ; k++)
239 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
241 for (p = 0 ; p < len ; p++)
244 MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
251 for (k = 0 ; k < n ; k++)
254 x [1] = X [2*k + 1] ;
255 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
256 for (p = 0 ; p < len ; p++)
260 MULT_SUB (X [2*i], lik, x [0]) ;
261 MULT_SUB (X [2*i + 1], lik, x [1]) ;
268 for (k = 0 ; k < n ; k++)
271 x [1] = X [3*k + 1] ;
272 x [2] = X [3*k + 2] ;
273 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
274 for (p = 0 ; p < len ; p++)
278 MULT_SUB (X [3*i], lik, x [0]) ;
279 MULT_SUB (X [3*i + 1], lik, x [1]) ;
280 MULT_SUB (X [3*i + 2], lik, x [2]) ;
287 for (k = 0 ; k < n ; k++)
290 x [1] = X [4*k + 1] ;
291 x [2] = X [4*k + 2] ;
292 x [3] = X [4*k + 3] ;
293 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
294 for (p = 0 ; p < len ; p++)
298 MULT_SUB (X [4*i], lik, x [0]) ;
299 MULT_SUB (X [4*i + 1], lik, x [1]) ;
300 MULT_SUB (X [4*i + 2], lik, x [2]) ;
301 MULT_SUB (X [4*i + 3], lik, x [3]) ;
307 throw std::domain_error(
"More than 4 right-hand sides is not supported.");
321 template <
typename Entry,
typename Int>
335 Entry x [4], uik, ukk ;
345 for (k = n-1 ; k >= 0 ; k--)
347 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
349 DIV (x [0], X [k], Udiag [k]) ;
351 for (p = 0 ; p < len ; p++)
354 MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
363 for (k = n-1 ; k >= 0 ; k--)
365 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
369 DIV (x [0], X [2*k], ukk) ;
370 DIV (x [1], X [2*k + 1], ukk) ;
373 X [2*k + 1] = x [1] ;
374 for (p = 0 ; p < len ; p++)
380 MULT_SUB (X [2*i], uik, x [0]) ;
381 MULT_SUB (X [2*i + 1], uik, x [1]) ;
389 for (k = n-1 ; k >= 0 ; k--)
391 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
394 DIV (x [0], X [3*k], ukk) ;
395 DIV (x [1], X [3*k + 1], ukk) ;
396 DIV (x [2], X [3*k + 2], ukk) ;
399 X [3*k + 1] = x [1] ;
400 X [3*k + 2] = x [2] ;
401 for (p = 0 ; p < len ; p++)
405 MULT_SUB (X [3*i], uik, x [0]) ;
406 MULT_SUB (X [3*i + 1], uik, x [1]) ;
407 MULT_SUB (X [3*i + 2], uik, x [2]) ;
415 for (k = n-1 ; k >= 0 ; k--)
417 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
420 DIV (x [0], X [4*k], ukk) ;
421 DIV (x [1], X [4*k + 1], ukk) ;
422 DIV (x [2], X [4*k + 2], ukk) ;
423 DIV (x [3], X [4*k + 3], ukk) ;
426 X [4*k + 1] = x [1] ;
427 X [4*k + 2] = x [2] ;
428 X [4*k + 3] = x [3] ;
429 for (p = 0 ; p < len ; p++)
434 MULT_SUB (X [4*i], uik, x [0]) ;
435 MULT_SUB (X [4*i + 1], uik, x [1]) ;
436 MULT_SUB (X [4*i + 2], uik, x [2]) ;
437 MULT_SUB (X [4*i + 3], uik, x [3]) ;
455 template <
typename Entry,
typename Int>
481 for (k = n-1 ; k >= 0 ; k--)
483 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
485 for (p = 0 ; p < len ; p++)
491 MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
497 MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
506 for (k = n-1 ; k >= 0 ; k--)
509 x [1] = X [2*k + 1] ;
510 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
511 for (p = 0 ; p < len ; p++)
524 MULT_SUB (x [0], lik, X [2*i]) ;
525 MULT_SUB (x [1], lik, X [2*i + 1]) ;
528 X [2*k + 1] = x [1] ;
534 for (k = n-1 ; k >= 0 ; k--)
537 x [1] = X [3*k + 1] ;
538 x [2] = X [3*k + 2] ;
539 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
540 for (p = 0 ; p < len ; p++)
553 MULT_SUB (x [0], lik, X [3*i]) ;
554 MULT_SUB (x [1], lik, X [3*i + 1]) ;
555 MULT_SUB (x [2], lik, X [3*i + 2]) ;
558 X [3*k + 1] = x [1] ;
559 X [3*k + 2] = x [2] ;
565 for (k = n-1 ; k >= 0 ; k--)
568 x [1] = X [4*k + 1] ;
569 x [2] = X [4*k + 2] ;
570 x [3] = X [4*k + 3] ;
571 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
572 for (p = 0 ; p < len ; p++)
585 MULT_SUB (x [0], lik, X [4*i]) ;
586 MULT_SUB (x [1], lik, X [4*i + 1]) ;
587 MULT_SUB (x [2], lik, X [4*i + 2]) ;
588 MULT_SUB (x [3], lik, X [4*i + 3]) ;
591 X [4*k + 1] = x [1] ;
592 X [4*k + 2] = x [2] ;
593 X [4*k + 3] = x [3] ;
609 template <
typename Entry,
typename Int>
617 const Entry Udiag [ ],
626 Entry x [4], uik, ukk ;
636 for (k = 0 ; k < n ; k++)
638 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
640 for (p = 0 ; p < len ; p++)
646 MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
652 MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
658 CONJ (ukk, Udiag [k]) ;
665 DIV (X [k], x [0], ukk) ;
671 for (k = 0 ; k < n ; k++)
673 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
675 x [1] = X [2*k + 1] ;
676 for (p = 0 ; p < len ; p++)
689 MULT_SUB (x [0], uik, X [2*i]) ;
690 MULT_SUB (x [1], uik, X [2*i + 1]) ;
695 CONJ (ukk, Udiag [k]) ;
702 DIV (X [2*k], x [0], ukk) ;
703 DIV (X [2*k + 1], x [1], ukk) ;
709 for (k = 0 ; k < n ; k++)
711 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
713 x [1] = X [3*k + 1] ;
714 x [2] = X [3*k + 2] ;
715 for (p = 0 ; p < len ; p++)
728 MULT_SUB (x [0], uik, X [3*i]) ;
729 MULT_SUB (x [1], uik, X [3*i + 1]) ;
730 MULT_SUB (x [2], uik, X [3*i + 2]) ;
735 CONJ (ukk, Udiag [k]) ;
742 DIV (X [3*k], x [0], ukk) ;
743 DIV (X [3*k + 1], x [1], ukk) ;
744 DIV (X [3*k + 2], x [2], ukk) ;
750 for (k = 0 ; k < n ; k++)
752 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
754 x [1] = X [4*k + 1] ;
755 x [2] = X [4*k + 2] ;
756 x [3] = X [4*k + 3] ;
757 for (p = 0 ; p < len ; p++)
770 MULT_SUB (x [0], uik, X [4*i]) ;
771 MULT_SUB (x [1], uik, X [4*i + 1]) ;
772 MULT_SUB (x [2], uik, X [4*i + 2]) ;
773 MULT_SUB (x [3], uik, X [4*i + 3]) ;
778 CONJ (ukk, Udiag [k]) ;
785 DIV (X [4*k], x [0], ukk) ;
786 DIV (X [4*k + 1], x [1], ukk) ;
787 DIV (X [4*k + 2], x [2], ukk) ;
788 DIV (X [4*k + 3], x [3], ukk) ;
int Ap[]
Column offsets.
Definition: klu2_simple.cpp:52
int Ai[]
Row values.
Definition: klu2_simple.cpp:54