96 #include "klu2_internal.h"
97 #include "klu2_kernel.hpp"
98 #include "klu2_memory.hpp"
100 template <
typename Entry,
typename Int>
101 size_t KLU_kernel_factor
136 KLU_common<Entry, Int> *Common
139 double maxlnz, dunits ;
141 Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
142 Int lsize, usize, anz, ok ;
144 ASSERT (Common != NULL) ;
151 anz = Ap [n+k1] - Ap [k1] ;
156 Lsize = MAX (Lsize, 1.0) ;
157 lsize = (Int) Lsize * anz + n ;
161 lsize = (Int) Lsize ;
166 lsize = MAX (n+1, lsize) ;
167 usize = MAX (n+1, usize) ;
169 maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
170 maxlnz = MIN (maxlnz, ((
double) INT_MAX)) ;
171 lsize = (Int) MIN (maxlnz, lsize) ;
172 usize = (Int) MIN (maxlnz, usize) ;
174 PRINTF ((
"Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
175 n, anz, k1, lsize, usize, maxlnz)) ;
182 *p_LU = (Unit *) NULL ;
186 Pinv = (Int *) W ; W += n ;
187 Stack = (Int *) W ; W += n ;
188 Flag = (Int *) W ; W += n ;
189 Lpend = (Int *) W ; W += n ;
190 Ap_pos = (Int *) W ; W += n ;
192 dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
193 DUNITS (Int, usize) + DUNITS (Entry, usize) ;
194 lusize = (size_t) dunits ;
195 ok = !INT_OVERFLOW (dunits) ;
196 LU = (Unit *) (ok ? KLU_malloc (lusize,
sizeof (Unit), Common) : NULL) ;
200 Common->status = KLU_OUT_OF_MEMORY ;
210 lusize = KLU_kernel<Entry> (n, Ap, Ai, Ax, Q, lusize,
211 Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
212 X, Stack, Flag, Ap_pos, Lpend,
213 k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
219 if (Common->status < KLU_OK)
221 LU = (Unit *) KLU_free (LU, lusize,
sizeof (Unit), Common) ;
225 PRINTF ((
" in klu noffdiag %d\n", Common->noffdiag)) ;
238 template <
typename Entry,
typename Int>
260 for (k = 0 ; k < n ; k++)
263 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
265 for (p = 0 ; p < len ; p++)
268 MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
275 for (k = 0 ; k < n ; k++)
278 x [1] = X [2*k + 1] ;
279 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
280 for (p = 0 ; p < len ; p++)
284 MULT_SUB (X [2*i], lik, x [0]) ;
285 MULT_SUB (X [2*i + 1], lik, x [1]) ;
292 for (k = 0 ; k < n ; k++)
295 x [1] = X [3*k + 1] ;
296 x [2] = X [3*k + 2] ;
297 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
298 for (p = 0 ; p < len ; p++)
302 MULT_SUB (X [3*i], lik, x [0]) ;
303 MULT_SUB (X [3*i + 1], lik, x [1]) ;
304 MULT_SUB (X [3*i + 2], lik, x [2]) ;
311 for (k = 0 ; k < n ; k++)
314 x [1] = X [4*k + 1] ;
315 x [2] = X [4*k + 2] ;
316 x [3] = X [4*k + 3] ;
317 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
318 for (p = 0 ; p < len ; p++)
322 MULT_SUB (X [4*i], lik, x [0]) ;
323 MULT_SUB (X [4*i + 1], lik, x [1]) ;
324 MULT_SUB (X [4*i + 2], lik, x [2]) ;
325 MULT_SUB (X [4*i + 3], lik, x [3]) ;
342 template <
typename Entry,
typename Int>
356 Entry x [4], uik, ukk ;
366 for (k = n-1 ; k >= 0 ; k--)
368 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
370 DIV (x [0], X [k], Udiag [k]) ;
372 for (p = 0 ; p < len ; p++)
375 MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
384 for (k = n-1 ; k >= 0 ; k--)
386 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
390 DIV (x [0], X [2*k], ukk) ;
391 DIV (x [1], X [2*k + 1], ukk) ;
394 X [2*k + 1] = x [1] ;
395 for (p = 0 ; p < len ; p++)
401 MULT_SUB (X [2*i], uik, x [0]) ;
402 MULT_SUB (X [2*i + 1], uik, x [1]) ;
410 for (k = n-1 ; k >= 0 ; k--)
412 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
415 DIV (x [0], X [3*k], ukk) ;
416 DIV (x [1], X [3*k + 1], ukk) ;
417 DIV (x [2], X [3*k + 2], ukk) ;
420 X [3*k + 1] = x [1] ;
421 X [3*k + 2] = x [2] ;
422 for (p = 0 ; p < len ; p++)
426 MULT_SUB (X [3*i], uik, x [0]) ;
427 MULT_SUB (X [3*i + 1], uik, x [1]) ;
428 MULT_SUB (X [3*i + 2], uik, x [2]) ;
436 for (k = n-1 ; k >= 0 ; k--)
438 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
441 DIV (x [0], X [4*k], ukk) ;
442 DIV (x [1], X [4*k + 1], ukk) ;
443 DIV (x [2], X [4*k + 2], ukk) ;
444 DIV (x [3], X [4*k + 3], ukk) ;
447 X [4*k + 1] = x [1] ;
448 X [4*k + 2] = x [2] ;
449 X [4*k + 3] = x [3] ;
450 for (p = 0 ; p < len ; p++)
455 MULT_SUB (X [4*i], uik, x [0]) ;
456 MULT_SUB (X [4*i + 1], uik, x [1]) ;
457 MULT_SUB (X [4*i + 2], uik, x [2]) ;
458 MULT_SUB (X [4*i + 3], uik, x [3]) ;
476 template <
typename Entry,
typename Int>
502 for (k = n-1 ; k >= 0 ; k--)
504 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
506 for (p = 0 ; p < len ; p++)
512 MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
518 MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
527 for (k = n-1 ; k >= 0 ; k--)
530 x [1] = X [2*k + 1] ;
531 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
532 for (p = 0 ; p < len ; p++)
545 MULT_SUB (x [0], lik, X [2*i]) ;
546 MULT_SUB (x [1], lik, X [2*i + 1]) ;
549 X [2*k + 1] = x [1] ;
555 for (k = n-1 ; k >= 0 ; k--)
558 x [1] = X [3*k + 1] ;
559 x [2] = X [3*k + 2] ;
560 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
561 for (p = 0 ; p < len ; p++)
574 MULT_SUB (x [0], lik, X [3*i]) ;
575 MULT_SUB (x [1], lik, X [3*i + 1]) ;
576 MULT_SUB (x [2], lik, X [3*i + 2]) ;
579 X [3*k + 1] = x [1] ;
580 X [3*k + 2] = x [2] ;
586 for (k = n-1 ; k >= 0 ; k--)
589 x [1] = X [4*k + 1] ;
590 x [2] = X [4*k + 2] ;
591 x [3] = X [4*k + 3] ;
592 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
593 for (p = 0 ; p < len ; p++)
606 MULT_SUB (x [0], lik, X [4*i]) ;
607 MULT_SUB (x [1], lik, X [4*i + 1]) ;
608 MULT_SUB (x [2], lik, X [4*i + 2]) ;
609 MULT_SUB (x [3], lik, X [4*i + 3]) ;
612 X [4*k + 1] = x [1] ;
613 X [4*k + 2] = x [2] ;
614 X [4*k + 3] = x [3] ;
630 template <
typename Entry,
typename Int>
647 Entry x [4], uik, ukk ;
657 for (k = 0 ; k < n ; k++)
659 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
661 for (p = 0 ; p < len ; p++)
667 MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
673 MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
679 CONJ (ukk, Udiag [k]) ;
686 DIV (X [k], x [0], ukk) ;
692 for (k = 0 ; k < n ; k++)
694 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
696 x [1] = X [2*k + 1] ;
697 for (p = 0 ; p < len ; p++)
710 MULT_SUB (x [0], uik, X [2*i]) ;
711 MULT_SUB (x [1], uik, X [2*i + 1]) ;
716 CONJ (ukk, Udiag [k]) ;
723 DIV (X [2*k], x [0], ukk) ;
724 DIV (X [2*k + 1], x [1], ukk) ;
730 for (k = 0 ; k < n ; k++)
732 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
734 x [1] = X [3*k + 1] ;
735 x [2] = X [3*k + 2] ;
736 for (p = 0 ; p < len ; p++)
749 MULT_SUB (x [0], uik, X [3*i]) ;
750 MULT_SUB (x [1], uik, X [3*i + 1]) ;
751 MULT_SUB (x [2], uik, X [3*i + 2]) ;
756 CONJ (ukk, Udiag [k]) ;
763 DIV (X [3*k], x [0], ukk) ;
764 DIV (X [3*k + 1], x [1], ukk) ;
765 DIV (X [3*k + 2], x [2], ukk) ;
771 for (k = 0 ; k < n ; k++)
773 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
775 x [1] = X [4*k + 1] ;
776 x [2] = X [4*k + 2] ;
777 x [3] = X [4*k + 3] ;
778 for (p = 0 ; p < len ; p++)
791 MULT_SUB (x [0], uik, X [4*i]) ;
792 MULT_SUB (x [1], uik, X [4*i + 1]) ;
793 MULT_SUB (x [2], uik, X [4*i + 2]) ;
794 MULT_SUB (x [3], uik, X [4*i + 3]) ;
799 CONJ (ukk, Udiag [k]) ;
806 DIV (X [4*k], x [0], ukk) ;
807 DIV (X [4*k + 1], x [1], ukk) ;
808 DIV (X [4*k + 2], x [2], ukk) ;
809 DIV (X [4*k + 3], x [3], ukk) ;