44 #ifndef KLU2_SOLVE_HPP
45 #define KLU2_SOLVE_HPP
47 #include "klu2_internal.h"
50 template <
typename Entry,
typename Int>
54 KLU_symbolic<Entry, Int> *Symbolic,
55 KLU_numeric<Entry, Int> *Numeric,
64 KLU_common<Entry, Int> *Common
67 Entry x [4], offik, s ;
69 Entry *Offx, *X, *Bz, *Udiag ;
70 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
72 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
82 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
85 Common->status = KLU_INVALID ;
88 Common->status = KLU_OK ;
96 nblocks = Symbolic->nblocks ;
104 ASSERT (nblocks == Numeric->nblocks) ;
105 Pnum = Numeric->Pnum ;
106 Offp = Numeric->Offp ;
107 Offi = Numeric->Offi ;
108 Offx = (Entry *) Numeric->Offx ;
111 Llen = Numeric->Llen ;
113 Ulen = Numeric->Ulen ;
114 LUbx = (Unit **) Numeric->LUbx ;
115 Udiag = (Entry *) Numeric->Udiag ;
118 X = (Entry *) Numeric->Xwork ;
120 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
126 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
133 nr = MIN (nrhs - chunk, 4) ;
148 for (k = 0 ; k < n ; k++)
150 X [k] = Bz [Pnum [k]] ;
156 for (k = 0 ; k < n ; k++)
160 X [2*k + 1] = Bz [i + d ] ;
166 for (k = 0 ; k < n ; k++)
170 X [3*k + 1] = Bz [i + d ] ;
171 X [3*k + 2] = Bz [i + d*2] ;
177 for (k = 0 ; k < n ; k++)
181 X [4*k + 1] = Bz [i + d ] ;
182 X [4*k + 2] = Bz [i + d*2] ;
183 X [4*k + 3] = Bz [i + d*3] ;
197 for (k = 0 ; k < n ; k++)
199 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
205 for (k = 0 ; k < n ; k++)
209 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
210 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
216 for (k = 0 ; k < n ; k++)
220 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
221 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
222 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
228 for (k = 0 ; k < n ; k++)
232 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
233 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
234 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
235 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
245 for (block = nblocks-1 ; block >= 0 ; block--)
255 PRINTF ((
"solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
265 DIV (X [k1], X [k1], s) ;
269 DIV (X [2*k1], X [2*k1], s) ;
270 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
274 DIV (X [3*k1], X [3*k1], s) ;
275 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
276 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
280 DIV (X [4*k1], X [4*k1], s) ;
281 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
282 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
283 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
290 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
292 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
293 Udiag + k1, nr, X + nr*k1) ;
307 for (k = k1 ; k < k2 ; k++)
311 for (p = Offp [k] ; p < pend ; p++)
313 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
320 for (k = k1 ; k < k2 ; k++)
324 x [1] = X [2*k + 1] ;
325 for (p = Offp [k] ; p < pend ; p++)
329 MULT_SUB (X [2*i], offik, x [0]) ;
330 MULT_SUB (X [2*i + 1], offik, x [1]) ;
337 for (k = k1 ; k < k2 ; k++)
341 x [1] = X [3*k + 1] ;
342 x [2] = X [3*k + 2] ;
343 for (p = Offp [k] ; p < pend ; p++)
347 MULT_SUB (X [3*i], offik, x [0]) ;
348 MULT_SUB (X [3*i + 1], offik, x [1]) ;
349 MULT_SUB (X [3*i + 2], offik, x [2]) ;
356 for (k = k1 ; k < k2 ; k++)
360 x [1] = X [4*k + 1] ;
361 x [2] = X [4*k + 2] ;
362 x [3] = X [4*k + 3] ;
363 for (p = Offp [k] ; p < pend ; p++)
367 MULT_SUB (X [4*i], offik, x [0]) ;
368 MULT_SUB (X [4*i + 1], offik, x [1]) ;
369 MULT_SUB (X [4*i + 2], offik, x [2]) ;
370 MULT_SUB (X [4*i + 3], offik, x [3]) ;
387 for (k = 0 ; k < n ; k++)
395 for (k = 0 ; k < n ; k++)
399 Bz [i + d ] = X [2*k + 1] ;
405 for (k = 0 ; k < n ; k++)
409 Bz [i + d ] = X [3*k + 1] ;
410 Bz [i + d*2] = X [3*k + 2] ;
416 for (k = 0 ; k < n ; k++)
420 Bz [i + d ] = X [4*k + 1] ;
421 Bz [i + d*2] = X [4*k + 2] ;
422 Bz [i + d*3] = X [4*k + 3] ;
438 template <
typename Entry,
typename Int>
442 KLU_symbolic<Entry, Int> *Symbolic,
443 KLU_numeric<Entry, Int> *Numeric,
454 KLU_common<Entry, Int> *Common
457 Entry x [4], offik, s ;
459 Entry *Offx, *X, *Bz, *Udiag ;
460 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
462 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
472 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
473 B == NULL || Xout == NULL)
475 Common->status = KLU_INVALID ;
478 Common->status = KLU_OK ;
486 nblocks = Symbolic->nblocks ;
494 ASSERT (nblocks == Numeric->nblocks) ;
495 Pnum = Numeric->Pnum ;
496 Offp = Numeric->Offp ;
497 Offi = Numeric->Offi ;
498 Offx = (Entry *) Numeric->Offx ;
501 Llen = Numeric->Llen ;
503 Ulen = Numeric->Ulen ;
504 LUbx = (Unit **) Numeric->LUbx ;
505 Udiag = (Entry *) Numeric->Udiag ;
508 X = (Entry *) Numeric->Xwork ;
510 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
516 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
523 nr = MIN (nrhs - chunk, 4) ;
538 for (k = 0 ; k < n ; k++)
540 X [k] = Bz [Pnum [k]] ;
546 for (k = 0 ; k < n ; k++)
550 X [2*k + 1] = Bz [i + d ] ;
556 for (k = 0 ; k < n ; k++)
560 X [3*k + 1] = Bz [i + d ] ;
561 X [3*k + 2] = Bz [i + d*2] ;
567 for (k = 0 ; k < n ; k++)
571 X [4*k + 1] = Bz [i + d ] ;
572 X [4*k + 2] = Bz [i + d*2] ;
573 X [4*k + 3] = Bz [i + d*3] ;
587 for (k = 0 ; k < n ; k++)
589 SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
595 for (k = 0 ; k < n ; k++)
599 SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
600 SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
606 for (k = 0 ; k < n ; k++)
610 SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
611 SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
612 SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
618 for (k = 0 ; k < n ; k++)
622 SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
623 SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
624 SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
625 SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
635 for (block = nblocks-1 ; block >= 0 ; block--)
645 PRINTF ((
"solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
655 DIV (X [k1], X [k1], s) ;
659 DIV (X [2*k1], X [2*k1], s) ;
660 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
664 DIV (X [3*k1], X [3*k1], s) ;
665 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
666 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
670 DIV (X [4*k1], X [4*k1], s) ;
671 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
672 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
673 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
680 KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
682 KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
683 Udiag + k1, nr, X + nr*k1) ;
697 for (k = k1 ; k < k2 ; k++)
701 for (p = Offp [k] ; p < pend ; p++)
703 MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
710 for (k = k1 ; k < k2 ; k++)
714 x [1] = X [2*k + 1] ;
715 for (p = Offp [k] ; p < pend ; p++)
719 MULT_SUB (X [2*i], offik, x [0]) ;
720 MULT_SUB (X [2*i + 1], offik, x [1]) ;
727 for (k = k1 ; k < k2 ; k++)
731 x [1] = X [3*k + 1] ;
732 x [2] = X [3*k + 2] ;
733 for (p = Offp [k] ; p < pend ; p++)
737 MULT_SUB (X [3*i], offik, x [0]) ;
738 MULT_SUB (X [3*i + 1], offik, x [1]) ;
739 MULT_SUB (X [3*i + 2], offik, x [2]) ;
746 for (k = k1 ; k < k2 ; k++)
750 x [1] = X [4*k + 1] ;
751 x [2] = X [4*k + 2] ;
752 x [3] = X [4*k + 3] ;
753 for (p = Offp [k] ; p < pend ; p++)
757 MULT_SUB (X [4*i], offik, x [0]) ;
758 MULT_SUB (X [4*i + 1], offik, x [1]) ;
759 MULT_SUB (X [4*i + 2], offik, x [2]) ;
760 MULT_SUB (X [4*i + 3], offik, x [3]) ;
778 for (k = 0 ; k < n ; k++)
780 Xout [Q [k]] = X [k] ;
786 for (k = 0 ; k < n ; k++)
789 Xout [i ] = X [2*k ] ;
790 Xout [i + d ] = X [2*k + 1] ;
796 for (k = 0 ; k < n ; k++)
799 Xout [i ] = X [3*k ] ;
800 Xout [i + d ] = X [3*k + 1] ;
801 Xout [i + d*2] = X [3*k + 2] ;
807 for (k = 0 ; k < n ; k++)
810 Xout [i ] = X [4*k ] ;
811 Xout [i + d ] = X [4*k + 1] ;
812 Xout [i + d*2] = X [4*k + 2] ;
813 Xout [i + d*3] = X [4*k + 3] ;