44 #ifndef KLU2_TSOLVE_HPP
45 #define KLU2_TSOLVE_HPP
47 #include "klu2_internal.h"
50 template <
typename Entry,
typename Int>
54 KLU_symbolic<Entry, Int> *Symbolic,
55 KLU_numeric<Entry, Int> *Numeric,
68 KLU_common<Entry, Int> *Common
71 Entry x [4], offik, s ;
73 Entry *Offx, *X, *Bz, *Udiag ;
74 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
76 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
86 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
89 Common->status = KLU_INVALID ;
92 Common->status = KLU_OK ;
100 nblocks = Symbolic->nblocks ;
108 ASSERT (nblocks == Numeric->nblocks) ;
109 Pnum = Numeric->Pnum ;
110 Offp = Numeric->Offp ;
111 Offi = Numeric->Offi ;
112 Offx = (Entry *) Numeric->Offx ;
115 Llen = Numeric->Llen ;
117 Ulen = Numeric->Ulen ;
118 LUbx = (Unit **) Numeric->LUbx ;
119 Udiag = (Entry *) Numeric->Udiag ;
122 X = (Entry *) Numeric->Xwork ;
123 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
129 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
136 nr = MIN (nrhs - chunk, 4) ;
147 for (k = 0 ; k < n ; k++)
155 for (k = 0 ; k < n ; k++)
159 X [2*k + 1] = Bz [i + d ] ;
165 for (k = 0 ; k < n ; k++)
169 X [3*k + 1] = Bz [i + d ] ;
170 X [3*k + 2] = Bz [i + d*2] ;
176 for (k = 0 ; k < n ; k++)
180 X [4*k + 1] = Bz [i + d ] ;
181 X [4*k + 2] = Bz [i + d*2] ;
182 X [4*k + 3] = Bz [i + d*3] ;
192 for (block = 0 ; block < nblocks ; block++)
202 PRINTF ((
"tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
215 for (k = k1 ; k < k2 ; k++)
218 for (p = Offp [k] ; p < pend ; p++)
223 MULT_SUB_CONJ (X [k], X [Offi [p]],
229 MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
237 for (k = k1 ; k < k2 ; k++)
241 x [1] = X [2*k + 1] ;
242 for (p = Offp [k] ; p < pend ; p++)
248 CONJ (offik, Offx [p]) ;
255 MULT_SUB (x [0], offik, X [2*i]) ;
256 MULT_SUB (x [1], offik, X [2*i + 1]) ;
259 X [2*k + 1] = x [1] ;
265 for (k = k1 ; k < k2 ; k++)
269 x [1] = X [3*k + 1] ;
270 x [2] = X [3*k + 2] ;
271 for (p = Offp [k] ; p < pend ; p++)
277 CONJ (offik, Offx [p]) ;
284 MULT_SUB (x [0], offik, X [3*i]) ;
285 MULT_SUB (x [1], offik, X [3*i + 1]) ;
286 MULT_SUB (x [2], offik, X [3*i + 2]) ;
289 X [3*k + 1] = x [1] ;
290 X [3*k + 2] = x [2] ;
296 for (k = k1 ; k < k2 ; k++)
300 x [1] = X [4*k + 1] ;
301 x [2] = X [4*k + 2] ;
302 x [3] = X [4*k + 3] ;
303 for (p = Offp [k] ; p < pend ; p++)
309 CONJ(offik, Offx [p]) ;
316 MULT_SUB (x [0], offik, X [4*i]) ;
317 MULT_SUB (x [1], offik, X [4*i + 1]) ;
318 MULT_SUB (x [2], offik, X [4*i + 2]) ;
319 MULT_SUB (x [3], offik, X [4*i + 3]) ;
322 X [4*k + 1] = x [1] ;
323 X [4*k + 2] = x [2] ;
324 X [4*k + 3] = x [3] ;
339 CONJ (s, Udiag [k1]) ;
350 DIV (X [k1], X [k1], s) ;
354 DIV (X [2*k1], X [2*k1], s) ;
355 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
359 DIV (X [3*k1], X [3*k1], s) ;
360 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
361 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
365 DIV (X [4*k1], X [4*k1], s) ;
366 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
367 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
368 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
375 KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
381 KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
402 for (k = 0 ; k < n ; k++)
404 Bz [Pnum [k]] = X [k] ;
410 for (k = 0 ; k < n ; k++)
414 Bz [i + d ] = X [2*k + 1] ;
420 for (k = 0 ; k < n ; k++)
424 Bz [i + d ] = X [3*k + 1] ;
425 Bz [i + d*2] = X [3*k + 2] ;
431 for (k = 0 ; k < n ; k++)
435 Bz [i + d ] = X [4*k + 1] ;
436 Bz [i + d*2] = X [4*k + 2] ;
437 Bz [i + d*3] = X [4*k + 3] ;
451 for (k = 0 ; k < n ; k++)
453 SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
459 for (k = 0 ; k < n ; k++)
463 SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
464 SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
470 for (k = 0 ; k < n ; k++)
474 SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
475 SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
476 SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
482 for (k = 0 ; k < n ; k++)
486 SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
487 SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
488 SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
489 SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
506 template <
typename Entry,
typename Int>
510 KLU_symbolic<Entry, Int> *Symbolic,
511 KLU_numeric<Entry, Int> *Numeric,
526 KLU_common<Entry, Int> *Common
529 Entry x [4], offik, s ;
531 Entry *Offx, *X, *Bz, *Udiag ;
532 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
534 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
544 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
545 B == NULL || Xout == NULL)
547 Common->status = KLU_INVALID ;
550 Common->status = KLU_OK ;
558 nblocks = Symbolic->nblocks ;
566 ASSERT (nblocks == Numeric->nblocks) ;
567 Pnum = Numeric->Pnum ;
568 Offp = Numeric->Offp ;
569 Offi = Numeric->Offi ;
570 Offx = (Entry *) Numeric->Offx ;
573 Llen = Numeric->Llen ;
575 Ulen = Numeric->Ulen ;
576 LUbx = (Unit **) Numeric->LUbx ;
577 Udiag = (Entry *) Numeric->Udiag ;
580 X = (Entry *) Numeric->Xwork ;
581 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
587 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
594 nr = MIN (nrhs - chunk, 4) ;
605 for (k = 0 ; k < n ; k++)
613 for (k = 0 ; k < n ; k++)
617 X [2*k + 1] = Bz [i + d ] ;
623 for (k = 0 ; k < n ; k++)
627 X [3*k + 1] = Bz [i + d ] ;
628 X [3*k + 2] = Bz [i + d*2] ;
634 for (k = 0 ; k < n ; k++)
638 X [4*k + 1] = Bz [i + d ] ;
639 X [4*k + 2] = Bz [i + d*2] ;
640 X [4*k + 3] = Bz [i + d*3] ;
650 for (block = 0 ; block < nblocks ; block++)
660 PRINTF ((
"tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
673 for (k = k1 ; k < k2 ; k++)
676 for (p = Offp [k] ; p < pend ; p++)
681 MULT_SUB_CONJ (X [k], X [Offi [p]],
687 MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
695 for (k = k1 ; k < k2 ; k++)
699 x [1] = X [2*k + 1] ;
700 for (p = Offp [k] ; p < pend ; p++)
706 CONJ (offik, Offx [p]) ;
713 MULT_SUB (x [0], offik, X [2*i]) ;
714 MULT_SUB (x [1], offik, X [2*i + 1]) ;
717 X [2*k + 1] = x [1] ;
723 for (k = k1 ; k < k2 ; k++)
727 x [1] = X [3*k + 1] ;
728 x [2] = X [3*k + 2] ;
729 for (p = Offp [k] ; p < pend ; p++)
735 CONJ (offik, Offx [p]) ;
742 MULT_SUB (x [0], offik, X [3*i]) ;
743 MULT_SUB (x [1], offik, X [3*i + 1]) ;
744 MULT_SUB (x [2], offik, X [3*i + 2]) ;
747 X [3*k + 1] = x [1] ;
748 X [3*k + 2] = x [2] ;
754 for (k = k1 ; k < k2 ; k++)
758 x [1] = X [4*k + 1] ;
759 x [2] = X [4*k + 2] ;
760 x [3] = X [4*k + 3] ;
761 for (p = Offp [k] ; p < pend ; p++)
767 CONJ(offik, Offx [p]) ;
774 MULT_SUB (x [0], offik, X [4*i]) ;
775 MULT_SUB (x [1], offik, X [4*i + 1]) ;
776 MULT_SUB (x [2], offik, X [4*i + 2]) ;
777 MULT_SUB (x [3], offik, X [4*i + 3]) ;
780 X [4*k + 1] = x [1] ;
781 X [4*k + 2] = x [2] ;
782 X [4*k + 3] = x [3] ;
797 CONJ (s, Udiag [k1]) ;
808 DIV (X [k1], X [k1], s) ;
812 DIV (X [2*k1], X [2*k1], s) ;
813 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
817 DIV (X [3*k1], X [3*k1], s) ;
818 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
819 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
823 DIV (X [4*k1], X [4*k1], s) ;
824 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
825 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
826 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
833 KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
839 KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
861 for (k = 0 ; k < n ; k++)
863 Xout [Pnum [k]] = X [k] ;
869 for (k = 0 ; k < n ; k++)
872 Xout [i ] = X [2*k ] ;
873 Xout [i + d ] = X [2*k + 1] ;
879 for (k = 0 ; k < n ; k++)
882 Xout [i ] = X [3*k ] ;
883 Xout [i + d ] = X [3*k + 1] ;
884 Xout [i + d*2] = X [3*k + 2] ;
890 for (k = 0 ; k < n ; k++)
893 Xout [i ] = X [4*k ] ;
894 Xout [i + d ] = X [4*k + 1] ;
895 Xout [i + d*2] = X [4*k + 2] ;
896 Xout [i + d*3] = X [4*k + 3] ;
910 for (k = 0 ; k < n ; k++)
912 SCALE_DIV_ASSIGN (Xout [Pnum [k]], X [k], Rs [k]) ;
918 for (k = 0 ; k < n ; k++)
922 SCALE_DIV_ASSIGN (Xout [i], X [2*k], rs) ;
923 SCALE_DIV_ASSIGN (Xout [i + d], X [2*k + 1], rs) ;
929 for (k = 0 ; k < n ; k++)
933 SCALE_DIV_ASSIGN (Xout [i], X [3*k], rs) ;
934 SCALE_DIV_ASSIGN (Xout [i + d], X [3*k + 1], rs) ;
935 SCALE_DIV_ASSIGN (Xout [i + d*2], X [3*k + 2], rs) ;
941 for (k = 0 ; k < n ; k++)
945 SCALE_DIV_ASSIGN (Xout [i], X [4*k], rs) ;
946 SCALE_DIV_ASSIGN (Xout [i + d], X [4*k + 1], rs) ;
947 SCALE_DIV_ASSIGN (Xout [i + d*2], X [4*k + 2], rs) ;
948 SCALE_DIV_ASSIGN (Xout [i + d*3], X [4*k + 3], rs) ;