20 #ifndef KLU2_SOLVE_HPP
21 #define KLU2_SOLVE_HPP
23 #include "klu2_internal.h"
26 template <
typename Entry,
typename Int>
30 KLU_symbolic<Entry, Int> *Symbolic,
31 KLU_numeric<Entry, Int> *Numeric,
40 KLU_common<Entry, Int> *Common
43 Entry x [4], offik, s ;
45 Entry *Offx, *X, *Bz, *Udiag ;
46 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
48 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
58 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
61 Common->status = KLU_INVALID ;
64 Common->status = KLU_OK ;
72 nblocks = Symbolic->nblocks ;
80 ASSERT (nblocks == Numeric->nblocks) ;
81 Pnum = Numeric->Pnum ;
82 Offp = Numeric->Offp ;
83 Offi = Numeric->Offi ;
84 Offx = (Entry *) Numeric->Offx ;
87 Llen = Numeric->Llen ;
89 Ulen = Numeric->Ulen ;
90 LUbx = (Unit **) Numeric->LUbx ;
91 Udiag = (Entry *) Numeric->Udiag ;
94 X = (Entry *) Numeric->Xwork ;
96 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
102 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
109 nr = MIN (nrhs - chunk, 4) ;
124 for (k = 0 ; k < n ; k++)
126 X [k] = Bz [Pnum [k]] ;
132 for (k = 0 ; k < n ; k++)
136 X [2*k + 1] = Bz [i + d ] ;
142 for (k = 0 ; k < n ; k++)
146 X [3*k + 1] = Bz [i + d ] ;
147 X [3*k + 2] = Bz [i + d*2] ;
153 for (k = 0 ; k < n ; k++)
157 X [4*k + 1] = Bz [i + d ] ;
158 X [4*k + 2] = Bz [i + d*2] ;
159 X [4*k + 3] = Bz [i + d*3] ;
173 for (k = 0 ; k < n ; k++)
175 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
181 for (k = 0 ; k < n ; k++)
185 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
186 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
192 for (k = 0 ; k < n ; k++)
196 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
197 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
198 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
204 for (k = 0 ; k < n ; k++)
208 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
209 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
210 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
211 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
221 for (block = nblocks-1 ; block >= 0 ; block--)
231 PRINTF ((
"solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
241 DIV (X [k1], X [k1], s) ;
245 DIV (X [2*k1], X [2*k1], s) ;
246 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
250 DIV (X [3*k1], X [3*k1], s) ;
251 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
252 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
256 DIV (X [4*k1], X [4*k1], s) ;
257 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
258 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
259 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
266 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
268 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
269 Udiag + k1, nr, X + nr*k1) ;
283 for (k = k1 ; k < k2 ; k++)
287 for (p = Offp [k] ; p < pend ; p++)
289 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
296 for (k = k1 ; k < k2 ; k++)
300 x [1] = X [2*k + 1] ;
301 for (p = Offp [k] ; p < pend ; p++)
305 MULT_SUB (X [2*i], offik, x [0]) ;
306 MULT_SUB (X [2*i + 1], offik, x [1]) ;
313 for (k = k1 ; k < k2 ; k++)
317 x [1] = X [3*k + 1] ;
318 x [2] = X [3*k + 2] ;
319 for (p = Offp [k] ; p < pend ; p++)
323 MULT_SUB (X [3*i], offik, x [0]) ;
324 MULT_SUB (X [3*i + 1], offik, x [1]) ;
325 MULT_SUB (X [3*i + 2], offik, x [2]) ;
332 for (k = k1 ; k < k2 ; k++)
336 x [1] = X [4*k + 1] ;
337 x [2] = X [4*k + 2] ;
338 x [3] = X [4*k + 3] ;
339 for (p = Offp [k] ; p < pend ; p++)
343 MULT_SUB (X [4*i], offik, x [0]) ;
344 MULT_SUB (X [4*i + 1], offik, x [1]) ;
345 MULT_SUB (X [4*i + 2], offik, x [2]) ;
346 MULT_SUB (X [4*i + 3], offik, x [3]) ;
363 for (k = 0 ; k < n ; k++)
371 for (k = 0 ; k < n ; k++)
375 Bz [i + d ] = X [2*k + 1] ;
381 for (k = 0 ; k < n ; k++)
385 Bz [i + d ] = X [3*k + 1] ;
386 Bz [i + d*2] = X [3*k + 2] ;
392 for (k = 0 ; k < n ; k++)
396 Bz [i + d ] = X [4*k + 1] ;
397 Bz [i + d*2] = X [4*k + 2] ;
398 Bz [i + d*3] = X [4*k + 3] ;
414 template <
typename Entry,
typename Int>
418 KLU_symbolic<Entry, Int> *Symbolic,
419 KLU_numeric<Entry, Int> *Numeric,
430 KLU_common<Entry, Int> *Common
433 Entry x [4], offik, s ;
435 Entry *Offx, *X, *Bz, *Udiag ;
436 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
438 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
448 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
449 B == NULL || Xout == NULL)
451 Common->status = KLU_INVALID ;
454 Common->status = KLU_OK ;
462 nblocks = Symbolic->nblocks ;
470 ASSERT (nblocks == Numeric->nblocks) ;
471 Pnum = Numeric->Pnum ;
472 Offp = Numeric->Offp ;
473 Offi = Numeric->Offi ;
474 Offx = (Entry *) Numeric->Offx ;
477 Llen = Numeric->Llen ;
479 Ulen = Numeric->Ulen ;
480 LUbx = (Unit **) Numeric->LUbx ;
481 Udiag = (Entry *) Numeric->Udiag ;
484 X = (Entry *) Numeric->Xwork ;
486 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
492 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
499 nr = MIN (nrhs - chunk, 4) ;
514 for (k = 0 ; k < n ; k++)
516 X [k] = Bz [Pnum [k]] ;
522 for (k = 0 ; k < n ; k++)
526 X [2*k + 1] = Bz [i + d ] ;
532 for (k = 0 ; k < n ; k++)
536 X [3*k + 1] = Bz [i + d ] ;
537 X [3*k + 2] = Bz [i + d*2] ;
543 for (k = 0 ; k < n ; k++)
547 X [4*k + 1] = Bz [i + d ] ;
548 X [4*k + 2] = Bz [i + d*2] ;
549 X [4*k + 3] = Bz [i + d*3] ;
563 for (k = 0 ; k < n ; k++)
565 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
571 for (k = 0 ; k < n ; k++)
575 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
576 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
582 for (k = 0 ; k < n ; k++)
586 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
587 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
588 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
594 for (k = 0 ; k < n ; k++)
598 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
599 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
600 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
601 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
611 for (block = nblocks-1 ; block >= 0 ; block--)
621 PRINTF ((
"solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
631 DIV (X [k1], X [k1], s) ;
635 DIV (X [2*k1], X [2*k1], s) ;
636 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
640 DIV (X [3*k1], X [3*k1], s) ;
641 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
642 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
646 DIV (X [4*k1], X [4*k1], s) ;
647 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
648 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
649 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
656 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
658 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
659 Udiag + k1, nr, X + nr*k1) ;
673 for (k = k1 ; k < k2 ; k++)
677 for (p = Offp [k] ; p < pend ; p++)
679 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
686 for (k = k1 ; k < k2 ; k++)
690 x [1] = X [2*k + 1] ;
691 for (p = Offp [k] ; p < pend ; p++)
695 MULT_SUB (X [2*i], offik, x [0]) ;
696 MULT_SUB (X [2*i + 1], offik, x [1]) ;
703 for (k = k1 ; k < k2 ; k++)
707 x [1] = X [3*k + 1] ;
708 x [2] = X [3*k + 2] ;
709 for (p = Offp [k] ; p < pend ; p++)
713 MULT_SUB (X [3*i], offik, x [0]) ;
714 MULT_SUB (X [3*i + 1], offik, x [1]) ;
715 MULT_SUB (X [3*i + 2], offik, x [2]) ;
722 for (k = k1 ; k < k2 ; k++)
726 x [1] = X [4*k + 1] ;
727 x [2] = X [4*k + 2] ;
728 x [3] = X [4*k + 3] ;
729 for (p = Offp [k] ; p < pend ; p++)
733 MULT_SUB (X [4*i], offik, x [0]) ;
734 MULT_SUB (X [4*i + 1], offik, x [1]) ;
735 MULT_SUB (X [4*i + 2], offik, x [2]) ;
736 MULT_SUB (X [4*i + 3], offik, x [3]) ;
754 for (k = 0 ; k < n ; k++)
756 Xout [Q [k]] = X [k] ;
762 for (k = 0 ; k < n ; k++)
765 Xout [i ] = X [2*k ] ;
766 Xout [i + d ] = X [2*k + 1] ;
772 for (k = 0 ; k < n ; k++)
775 Xout [i ] = X [3*k ] ;
776 Xout [i + d ] = X [3*k + 1] ;
777 Xout [i + d*2] = X [3*k + 2] ;
783 for (k = 0 ; k < n ; k++)
786 Xout [i ] = X [4*k ] ;
787 Xout [i + d ] = X [4*k + 1] ;
788 Xout [i + d*2] = X [4*k + 2] ;
789 Xout [i + d*3] = X [4*k + 3] ;