Amesos Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
amesos_klu_solve.c
Go to the documentation of this file.
1 /* ========================================================================== */
2 /* === KLU_solve ============================================================ */
3 /* ========================================================================== */
4 
5 /* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
6  * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
7  * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
8  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
9  * Numeric->Iwork).
10  */
11 
12 #include "amesos_klu_internal.h"
13 
15 (
16  /* inputs, not modified */
17  KLU_symbolic *Symbolic,
18  KLU_numeric *Numeric,
19  Int d, /* leading dimension of B */
20  Int nrhs, /* number of right-hand-sides */
21 
22  /* right-hand-side on input, overwritten with solution to Ax=b on output */
23  double B [ ], /* size n*nrhs, in column-oriented form, with
24  * leading dimension d. */
25  /* --------------- */
26  KLU_common *Common
27 )
28 {
29  Entry x [4], offik, s ;
30  double rs, *Rs ;
31  Entry *Offx, *X, *Bz, *Udiag ;
32  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
33  Unit **LUbx ;
34  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
35 
36  /* ---------------------------------------------------------------------- */
37  /* check inputs */
38  /* ---------------------------------------------------------------------- */
39 
40  if (Common == NULL)
41  {
42  return (FALSE) ;
43  }
44  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
45  B == NULL)
46  {
47  Common->status = KLU_INVALID ;
48  return (FALSE) ;
49  }
50  Common->status = KLU_OK ;
51 
52  /* ---------------------------------------------------------------------- */
53  /* get the contents of the Symbolic object */
54  /* ---------------------------------------------------------------------- */
55 
56  Bz = (Entry *) B ;
57  n = Symbolic->n ;
58  nblocks = Symbolic->nblocks ;
59  Q = Symbolic->Q ;
60  R = Symbolic->R ;
61 
62  /* ---------------------------------------------------------------------- */
63  /* get the contents of the Numeric object */
64  /* ---------------------------------------------------------------------- */
65 
66  ASSERT (nblocks == Numeric->nblocks) ;
67  Pnum = Numeric->Pnum ;
68  Offp = Numeric->Offp ;
69  Offi = Numeric->Offi ;
70  Offx = (Entry *) Numeric->Offx ;
71 
72  Lip = Numeric->Lip ;
73  Llen = Numeric->Llen ;
74  Uip = Numeric->Uip ;
75  Ulen = Numeric->Ulen ;
76  LUbx = (Unit **) Numeric->LUbx ;
77  Udiag = Numeric->Udiag ;
78 
79  Rs = Numeric->Rs ;
80  X = (Entry *) Numeric->Xwork ;
81 
82  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
83 
84  /* ---------------------------------------------------------------------- */
85  /* solve in chunks of 4 columns at a time */
86  /* ---------------------------------------------------------------------- */
87 
88  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
89  {
90 
91  /* ------------------------------------------------------------------ */
92  /* get the size of the current chunk */
93  /* ------------------------------------------------------------------ */
94 
95  nr = MIN (nrhs - chunk, 4) ;
96 
97  /* ------------------------------------------------------------------ */
98  /* scale and permute the right hand side, X = P*(R\B) */
99  /* ------------------------------------------------------------------ */
100 
101  if (Rs == NULL)
102  {
103 
104  /* no scaling */
105  switch (nr)
106  {
107 
108  case 1:
109 
110  for (k = 0 ; k < n ; k++)
111  {
112  X [k] = Bz [Pnum [k]] ;
113  }
114  break ;
115 
116  case 2:
117 
118  for (k = 0 ; k < n ; k++)
119  {
120  i = Pnum [k] ;
121  X [2*k ] = Bz [i ] ;
122  X [2*k + 1] = Bz [i + d ] ;
123  }
124  break ;
125 
126  case 3:
127 
128  for (k = 0 ; k < n ; k++)
129  {
130  i = Pnum [k] ;
131  X [3*k ] = Bz [i ] ;
132  X [3*k + 1] = Bz [i + d ] ;
133  X [3*k + 2] = Bz [i + d*2] ;
134  }
135  break ;
136 
137  case 4:
138 
139  for (k = 0 ; k < n ; k++)
140  {
141  i = Pnum [k] ;
142  X [4*k ] = Bz [i ] ;
143  X [4*k + 1] = Bz [i + d ] ;
144  X [4*k + 2] = Bz [i + d*2] ;
145  X [4*k + 3] = Bz [i + d*3] ;
146  }
147  break ;
148  }
149 
150  }
151  else
152  {
153 
154  switch (nr)
155  {
156 
157  case 1:
158 
159  for (k = 0 ; k < n ; k++)
160  {
161  SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
162  }
163  break ;
164 
165  case 2:
166 
167  for (k = 0 ; k < n ; k++)
168  {
169  i = Pnum [k] ;
170  rs = Rs [k] ;
171  SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
172  SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
173  }
174  break ;
175 
176  case 3:
177 
178  for (k = 0 ; k < n ; k++)
179  {
180  i = Pnum [k] ;
181  rs = Rs [k] ;
182  SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
183  SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
184  SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
185  }
186  break ;
187 
188  case 4:
189 
190  for (k = 0 ; k < n ; k++)
191  {
192  i = Pnum [k] ;
193  rs = Rs [k] ;
194  SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
195  SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
196  SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
197  SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
198  }
199  break ;
200  }
201  }
202 
203  /* ------------------------------------------------------------------ */
204  /* solve X = (L*U + Off)\X */
205  /* ------------------------------------------------------------------ */
206 
207  for (block = nblocks-1 ; block >= 0 ; block--)
208  {
209 
210  /* -------------------------------------------------------------- */
211  /* the block of size nk is from rows/columns k1 to k2-1 */
212  /* -------------------------------------------------------------- */
213 
214  k1 = R [block] ;
215  k2 = R [block+1] ;
216  nk = k2 - k1 ;
217  PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
218 
219  /* solve the block system */
220  if (nk == 1)
221  {
222  s = Udiag [k1] ;
223  switch (nr)
224  {
225 
226  case 1:
227  DIV (X [k1], X [k1], s) ;
228  break ;
229 
230  case 2:
231  DIV (X [2*k1], X [2*k1], s) ;
232  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
233  break ;
234 
235  case 3:
236  DIV (X [3*k1], X [3*k1], s) ;
237  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
238  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
239  break ;
240 
241  case 4:
242  DIV (X [4*k1], X [4*k1], s) ;
243  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
244  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
245  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
246  break ;
247 
248  }
249  }
250  else
251  {
252  KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
253  X + nr*k1) ;
254  KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
255  Udiag + k1, nr, X + nr*k1) ;
256  }
257 
258  /* -------------------------------------------------------------- */
259  /* block back-substitution for the off-diagonal-block entries */
260  /* -------------------------------------------------------------- */
261 
262  if (block > 0)
263  {
264  switch (nr)
265  {
266 
267  case 1:
268 
269  for (k = k1 ; k < k2 ; k++)
270  {
271  pend = Offp [k+1] ;
272  x [0] = X [k] ;
273  for (p = Offp [k] ; p < pend ; p++)
274  {
275  MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
276  }
277  }
278  break ;
279 
280  case 2:
281 
282  for (k = k1 ; k < k2 ; k++)
283  {
284  pend = Offp [k+1] ;
285  x [0] = X [2*k ] ;
286  x [1] = X [2*k + 1] ;
287  for (p = Offp [k] ; p < pend ; p++)
288  {
289  i = Offi [p] ;
290  offik = Offx [p] ;
291  MULT_SUB (X [2*i], offik, x [0]) ;
292  MULT_SUB (X [2*i + 1], offik, x [1]) ;
293  }
294  }
295  break ;
296 
297  case 3:
298 
299  for (k = k1 ; k < k2 ; k++)
300  {
301  pend = Offp [k+1] ;
302  x [0] = X [3*k ] ;
303  x [1] = X [3*k + 1] ;
304  x [2] = X [3*k + 2] ;
305  for (p = Offp [k] ; p < pend ; p++)
306  {
307  i = Offi [p] ;
308  offik = Offx [p] ;
309  MULT_SUB (X [3*i], offik, x [0]) ;
310  MULT_SUB (X [3*i + 1], offik, x [1]) ;
311  MULT_SUB (X [3*i + 2], offik, x [2]) ;
312  }
313  }
314  break ;
315 
316  case 4:
317 
318  for (k = k1 ; k < k2 ; k++)
319  {
320  pend = Offp [k+1] ;
321  x [0] = X [4*k ] ;
322  x [1] = X [4*k + 1] ;
323  x [2] = X [4*k + 2] ;
324  x [3] = X [4*k + 3] ;
325  for (p = Offp [k] ; p < pend ; p++)
326  {
327  i = Offi [p] ;
328  offik = Offx [p] ;
329  MULT_SUB (X [4*i], offik, x [0]) ;
330  MULT_SUB (X [4*i + 1], offik, x [1]) ;
331  MULT_SUB (X [4*i + 2], offik, x [2]) ;
332  MULT_SUB (X [4*i + 3], offik, x [3]) ;
333  }
334  }
335  break ;
336  }
337  }
338  }
339 
340  /* ------------------------------------------------------------------ */
341  /* permute the result, Bz = Q*X */
342  /* ------------------------------------------------------------------ */
343 
344  switch (nr)
345  {
346 
347  case 1:
348 
349  for (k = 0 ; k < n ; k++)
350  {
351  Bz [Q [k]] = X [k] ;
352  }
353  break ;
354 
355  case 2:
356 
357  for (k = 0 ; k < n ; k++)
358  {
359  i = Q [k] ;
360  Bz [i ] = X [2*k ] ;
361  Bz [i + d ] = X [2*k + 1] ;
362  }
363  break ;
364 
365  case 3:
366 
367  for (k = 0 ; k < n ; k++)
368  {
369  i = Q [k] ;
370  Bz [i ] = X [3*k ] ;
371  Bz [i + d ] = X [3*k + 1] ;
372  Bz [i + d*2] = X [3*k + 2] ;
373  }
374  break ;
375 
376  case 4:
377 
378  for (k = 0 ; k < n ; k++)
379  {
380  i = Q [k] ;
381  Bz [i ] = X [4*k ] ;
382  Bz [i + d ] = X [4*k + 1] ;
383  Bz [i + d*2] = X [4*k + 2] ;
384  Bz [i + d*3] = X [4*k + 3] ;
385  }
386  break ;
387  }
388 
389  /* ------------------------------------------------------------------ */
390  /* go to the next chunk of B */
391  /* ------------------------------------------------------------------ */
392 
393  Bz += d*4 ;
394  }
395  return (TRUE) ;
396 }
#define KLU_usolve
#define KLU_INVALID
#define KLU_symbolic
#define Int
#define FALSE
#define KLU_lsolve
double Unit
#define NULL
#define ASSERT(expression)
#define MULT_SUB(c, a, b)
#define KLU_numeric
Int KLU_solve(KLU_symbolic *Symbolic, KLU_numeric *Numeric, Int d, Int nrhs, double B[], KLU_common *Common)
#define SCALE_DIV_ASSIGN(a, c, s)
#define KLU_valid
#define DIV(c, a, b)
#define KLU_common
#define MIN(a, b)
#define Entry
#define PRINTF(params)
int n
#define TRUE
#define KLU_OK