34 Entry x [4], offik, s ;
36 Entry *Offx, *X, *Bz, *Udiag ;
37 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
39 Int k1, k2, nk, k, block, pend,
n, p, nblocks, chunk, nr, i ;
40 #ifdef KLU_ENABLE_OPENMP
53 if (Numeric ==
NULL || Symbolic ==
NULL || d < Symbolic->n || nrhs < 0 ||
67 nblocks = Symbolic->nblocks ;
75 ASSERT (nblocks == Numeric->nblocks) ;
76 Pnum = Numeric->Pnum ;
77 Offp = Numeric->Offp ;
78 Offi = Numeric->Offi ;
79 Offx = (
Entry *) Numeric->Offx ;
82 Llen = Numeric->Llen ;
84 Ulen = Numeric->Ulen ;
85 LUbx = (
Unit **) Numeric->LUbx ;
86 Udiag = Numeric->Udiag ;
89 #ifdef KLU_ENABLE_OPENMP
90 X1 = (
Entry *) Numeric->Xwork ;
92 X = (
Entry *) Numeric->Xwork ;
100 #ifdef KLU_ENABLE_OPENMP
101 #pragma omp parallel for schedule(guided) private(tid, nr, k, block, k1, k2, nk, pend, p, s, i, Bz, X, x, offik, rs)
103 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
105 #ifdef KLU_ENABLE_OPENMP
106 Bz = ((
Entry *) B) + d*chunk ;
107 tid = omp_get_thread_num();
108 X = X1 + (tid * n * 4);
116 nr =
MIN (nrhs - chunk, 4) ;
127 for (k = 0 ; k < n ; k++)
135 for (k = 0 ; k < n ; k++)
139 X [2*k + 1] = Bz [i + d ] ;
145 for (k = 0 ; k < n ; k++)
149 X [3*k + 1] = Bz [i + d ] ;
150 X [3*k + 2] = Bz [i + d*2] ;
156 for (k = 0 ; k < n ; k++)
160 X [4*k + 1] = Bz [i + d ] ;
161 X [4*k + 2] = Bz [i + d*2] ;
162 X [4*k + 3] = Bz [i + d*3] ;
172 for (block = 0 ; block < nblocks ; block++)
182 PRINTF ((
"tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
195 for (k = k1 ; k < k2 ; k++)
198 for (p = Offp [k] ; p < pend ; p++)
209 MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
217 for (k = k1 ; k < k2 ; k++)
221 x [1] = X [2*k + 1] ;
222 for (p = Offp [k] ; p < pend ; p++)
228 CONJ (offik, Offx [p]) ;
236 MULT_SUB (x [1], offik, X [2*i + 1]) ;
239 X [2*k + 1] = x [1] ;
245 for (k = k1 ; k < k2 ; k++)
249 x [1] = X [3*k + 1] ;
250 x [2] = X [3*k + 2] ;
251 for (p = Offp [k] ; p < pend ; p++)
257 CONJ (offik, Offx [p]) ;
265 MULT_SUB (x [1], offik, X [3*i + 1]) ;
266 MULT_SUB (x [2], offik, X [3*i + 2]) ;
269 X [3*k + 1] = x [1] ;
270 X [3*k + 2] = x [2] ;
276 for (k = k1 ; k < k2 ; k++)
280 x [1] = X [4*k + 1] ;
281 x [2] = X [4*k + 2] ;
282 x [3] = X [4*k + 3] ;
283 for (p = Offp [k] ; p < pend ; p++)
289 CONJ(offik, Offx [p]) ;
297 MULT_SUB (x [1], offik, X [4*i + 1]) ;
298 MULT_SUB (x [2], offik, X [4*i + 2]) ;
299 MULT_SUB (x [3], offik, X [4*i + 3]) ;
302 X [4*k + 1] = x [1] ;
303 X [4*k + 2] = x [2] ;
304 X [4*k + 3] = x [3] ;
319 CONJ (s, Udiag [k1]) ;
330 DIV (X [k1], X [k1], s) ;
334 DIV (X [2*k1], X [2*k1], s) ;
335 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
339 DIV (X [3*k1], X [3*k1], s) ;
340 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
341 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
345 DIV (X [4*k1], X [4*k1], s) ;
346 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
347 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
348 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
355 KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
361 KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
382 for (k = 0 ; k < n ; k++)
384 Bz [Pnum [k]] = X [k] ;
390 for (k = 0 ; k < n ; k++)
394 Bz [i + d ] = X [2*k + 1] ;
400 for (k = 0 ; k < n ; k++)
404 Bz [i + d ] = X [3*k + 1] ;
405 Bz [i + d*2] = X [3*k + 2] ;
411 for (k = 0 ; k < n ; k++)
415 Bz [i + d ] = X [4*k + 1] ;
416 Bz [i + d*2] = X [4*k + 2] ;
417 Bz [i + d*3] = X [4*k + 3] ;
431 for (k = 0 ; k < n ; k++)
439 for (k = 0 ; k < n ; k++)
450 for (k = 0 ; k < n ; k++)
462 for (k = 0 ; k < n ; k++)
479 #ifndef KLU_ENABLE_OPENMP
#define ASSERT(expression)
#define MULT_SUB(c, a, b)
#define MULT_SUB_CONJ(c, a, b)
#define SCALE_DIV_ASSIGN(a, c, s)
Int KLU_tsolve(KLU_symbolic *Symbolic, KLU_numeric *Numeric, Int d, Int nrhs, double B[], KLU_common *Common)