Amesos Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
amesos_klu_l_solve.c
Go to the documentation of this file.
1 /* ========================================================================== */
2 /* === KLU_solve ============================================================ */
3 /* ========================================================================== */
4 
5 /* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
6  * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
7  * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
8  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
9  * Numeric->Iwork).
10  */
11 
12 /* This file should make the long int version of KLU */
13 #define DLONG 1
14 
15 #include "amesos_klu_internal.h"
16 
18 (
19  /* inputs, not modified */
20  KLU_symbolic *Symbolic,
21  KLU_numeric *Numeric,
22  Int d, /* leading dimension of B */
23  Int nrhs, /* number of right-hand-sides */
24 
25  /* right-hand-side on input, overwritten with solution to Ax=b on output */
26  double B [ ], /* size n*nrhs, in column-oriented form, with
27  * leading dimension d. */
28  /* --------------- */
29  KLU_common *Common
30 )
31 {
32  Entry x [4], offik, s ;
33  double rs, *Rs ;
34  Entry *Offx, *X, *Bz, *Udiag ;
35  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
36  Unit **LUbx ;
37  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
38 
39  /* ---------------------------------------------------------------------- */
40  /* check inputs */
41  /* ---------------------------------------------------------------------- */
42 
43  if (Common == NULL)
44  {
45  return (FALSE) ;
46  }
47  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
48  B == NULL)
49  {
50  Common->status = KLU_INVALID ;
51  return (FALSE) ;
52  }
53  Common->status = KLU_OK ;
54 
55  /* ---------------------------------------------------------------------- */
56  /* get the contents of the Symbolic object */
57  /* ---------------------------------------------------------------------- */
58 
59  Bz = (Entry *) B ;
60  n = Symbolic->n ;
61  nblocks = Symbolic->nblocks ;
62  Q = Symbolic->Q ;
63  R = Symbolic->R ;
64 
65  /* ---------------------------------------------------------------------- */
66  /* get the contents of the Numeric object */
67  /* ---------------------------------------------------------------------- */
68 
69  ASSERT (nblocks == Numeric->nblocks) ;
70  Pnum = Numeric->Pnum ;
71  Offp = Numeric->Offp ;
72  Offi = Numeric->Offi ;
73  Offx = (Entry *) Numeric->Offx ;
74 
75  Lip = Numeric->Lip ;
76  Llen = Numeric->Llen ;
77  Uip = Numeric->Uip ;
78  Ulen = Numeric->Ulen ;
79  LUbx = (Unit **) Numeric->LUbx ;
80  Udiag = Numeric->Udiag ;
81 
82  Rs = Numeric->Rs ;
83  X = (Entry *) Numeric->Xwork ;
84 
85  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
86 
87  /* ---------------------------------------------------------------------- */
88  /* solve in chunks of 4 columns at a time */
89  /* ---------------------------------------------------------------------- */
90 
91  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
92  {
93 
94  /* ------------------------------------------------------------------ */
95  /* get the size of the current chunk */
96  /* ------------------------------------------------------------------ */
97 
98  nr = MIN (nrhs - chunk, 4) ;
99 
100  /* ------------------------------------------------------------------ */
101  /* scale and permute the right hand side, X = P*(R\B) */
102  /* ------------------------------------------------------------------ */
103 
104  if (Rs == NULL)
105  {
106 
107  /* no scaling */
108  switch (nr)
109  {
110 
111  case 1:
112 
113  for (k = 0 ; k < n ; k++)
114  {
115  X [k] = Bz [Pnum [k]] ;
116  }
117  break ;
118 
119  case 2:
120 
121  for (k = 0 ; k < n ; k++)
122  {
123  i = Pnum [k] ;
124  X [2*k ] = Bz [i ] ;
125  X [2*k + 1] = Bz [i + d ] ;
126  }
127  break ;
128 
129  case 3:
130 
131  for (k = 0 ; k < n ; k++)
132  {
133  i = Pnum [k] ;
134  X [3*k ] = Bz [i ] ;
135  X [3*k + 1] = Bz [i + d ] ;
136  X [3*k + 2] = Bz [i + d*2] ;
137  }
138  break ;
139 
140  case 4:
141 
142  for (k = 0 ; k < n ; k++)
143  {
144  i = Pnum [k] ;
145  X [4*k ] = Bz [i ] ;
146  X [4*k + 1] = Bz [i + d ] ;
147  X [4*k + 2] = Bz [i + d*2] ;
148  X [4*k + 3] = Bz [i + d*3] ;
149  }
150  break ;
151  }
152 
153  }
154  else
155  {
156 
157  switch (nr)
158  {
159 
160  case 1:
161 
162  for (k = 0 ; k < n ; k++)
163  {
164  SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
165  }
166  break ;
167 
168  case 2:
169 
170  for (k = 0 ; k < n ; k++)
171  {
172  i = Pnum [k] ;
173  rs = Rs [k] ;
174  SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
175  SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
176  }
177  break ;
178 
179  case 3:
180 
181  for (k = 0 ; k < n ; k++)
182  {
183  i = Pnum [k] ;
184  rs = Rs [k] ;
185  SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
186  SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
187  SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
188  }
189  break ;
190 
191  case 4:
192 
193  for (k = 0 ; k < n ; k++)
194  {
195  i = Pnum [k] ;
196  rs = Rs [k] ;
197  SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
198  SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
199  SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
200  SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
201  }
202  break ;
203  }
204  }
205 
206  /* ------------------------------------------------------------------ */
207  /* solve X = (L*U + Off)\X */
208  /* ------------------------------------------------------------------ */
209 
210  for (block = nblocks-1 ; block >= 0 ; block--)
211  {
212 
213  /* -------------------------------------------------------------- */
214  /* the block of size nk is from rows/columns k1 to k2-1 */
215  /* -------------------------------------------------------------- */
216 
217  k1 = R [block] ;
218  k2 = R [block+1] ;
219  nk = k2 - k1 ;
220  PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
221 
222  /* solve the block system */
223  if (nk == 1)
224  {
225  s = Udiag [k1] ;
226  switch (nr)
227  {
228 
229  case 1:
230  DIV (X [k1], X [k1], s) ;
231  break ;
232 
233  case 2:
234  DIV (X [2*k1], X [2*k1], s) ;
235  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
236  break ;
237 
238  case 3:
239  DIV (X [3*k1], X [3*k1], s) ;
240  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
241  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
242  break ;
243 
244  case 4:
245  DIV (X [4*k1], X [4*k1], s) ;
246  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
247  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
248  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
249  break ;
250 
251  }
252  }
253  else
254  {
255  KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
256  X + nr*k1) ;
257  KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
258  Udiag + k1, nr, X + nr*k1) ;
259  }
260 
261  /* -------------------------------------------------------------- */
262  /* block back-substitution for the off-diagonal-block entries */
263  /* -------------------------------------------------------------- */
264 
265  if (block > 0)
266  {
267  switch (nr)
268  {
269 
270  case 1:
271 
272  for (k = k1 ; k < k2 ; k++)
273  {
274  pend = Offp [k+1] ;
275  x [0] = X [k] ;
276  for (p = Offp [k] ; p < pend ; p++)
277  {
278  MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
279  }
280  }
281  break ;
282 
283  case 2:
284 
285  for (k = k1 ; k < k2 ; k++)
286  {
287  pend = Offp [k+1] ;
288  x [0] = X [2*k ] ;
289  x [1] = X [2*k + 1] ;
290  for (p = Offp [k] ; p < pend ; p++)
291  {
292  i = Offi [p] ;
293  offik = Offx [p] ;
294  MULT_SUB (X [2*i], offik, x [0]) ;
295  MULT_SUB (X [2*i + 1], offik, x [1]) ;
296  }
297  }
298  break ;
299 
300  case 3:
301 
302  for (k = k1 ; k < k2 ; k++)
303  {
304  pend = Offp [k+1] ;
305  x [0] = X [3*k ] ;
306  x [1] = X [3*k + 1] ;
307  x [2] = X [3*k + 2] ;
308  for (p = Offp [k] ; p < pend ; p++)
309  {
310  i = Offi [p] ;
311  offik = Offx [p] ;
312  MULT_SUB (X [3*i], offik, x [0]) ;
313  MULT_SUB (X [3*i + 1], offik, x [1]) ;
314  MULT_SUB (X [3*i + 2], offik, x [2]) ;
315  }
316  }
317  break ;
318 
319  case 4:
320 
321  for (k = k1 ; k < k2 ; k++)
322  {
323  pend = Offp [k+1] ;
324  x [0] = X [4*k ] ;
325  x [1] = X [4*k + 1] ;
326  x [2] = X [4*k + 2] ;
327  x [3] = X [4*k + 3] ;
328  for (p = Offp [k] ; p < pend ; p++)
329  {
330  i = Offi [p] ;
331  offik = Offx [p] ;
332  MULT_SUB (X [4*i], offik, x [0]) ;
333  MULT_SUB (X [4*i + 1], offik, x [1]) ;
334  MULT_SUB (X [4*i + 2], offik, x [2]) ;
335  MULT_SUB (X [4*i + 3], offik, x [3]) ;
336  }
337  }
338  break ;
339  }
340  }
341  }
342 
343  /* ------------------------------------------------------------------ */
344  /* permute the result, Bz = Q*X */
345  /* ------------------------------------------------------------------ */
346 
347  switch (nr)
348  {
349 
350  case 1:
351 
352  for (k = 0 ; k < n ; k++)
353  {
354  Bz [Q [k]] = X [k] ;
355  }
356  break ;
357 
358  case 2:
359 
360  for (k = 0 ; k < n ; k++)
361  {
362  i = Q [k] ;
363  Bz [i ] = X [2*k ] ;
364  Bz [i + d ] = X [2*k + 1] ;
365  }
366  break ;
367 
368  case 3:
369 
370  for (k = 0 ; k < n ; k++)
371  {
372  i = Q [k] ;
373  Bz [i ] = X [3*k ] ;
374  Bz [i + d ] = X [3*k + 1] ;
375  Bz [i + d*2] = X [3*k + 2] ;
376  }
377  break ;
378 
379  case 4:
380 
381  for (k = 0 ; k < n ; k++)
382  {
383  i = Q [k] ;
384  Bz [i ] = X [4*k ] ;
385  Bz [i + d ] = X [4*k + 1] ;
386  Bz [i + d*2] = X [4*k + 2] ;
387  Bz [i + d*3] = X [4*k + 3] ;
388  }
389  break ;
390  }
391 
392  /* ------------------------------------------------------------------ */
393  /* go to the next chunk of B */
394  /* ------------------------------------------------------------------ */
395 
396  Bz += d*4 ;
397  }
398  return (TRUE) ;
399 }
#define KLU_usolve
#define KLU_INVALID
#define KLU_symbolic
#define Int
#define FALSE
#define KLU_lsolve
double Unit
Int KLU_solve(KLU_symbolic *Symbolic, KLU_numeric *Numeric, Int d, Int nrhs, double B[], KLU_common *Common)
#define NULL
#define ASSERT(expression)
#define MULT_SUB(c, a, b)
#define KLU_numeric
#define SCALE_DIV_ASSIGN(a, c, s)
#define KLU_valid
#define DIV(c, a, b)
#define KLU_common
#define MIN(a, b)
#define Entry
#define PRINTF(params)
int n
#define TRUE
#define KLU_OK