Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_tsolve.hpp
1 /* ========================================================================== */
2 /* === KLU_tsolve =========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // ***********************************************************************
6 //
7 // KLU2: A Direct Linear Solver package
8 // Copyright 2011 Sandia Corporation
9 //
10 // Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11 // U.S. Government retains certain rights in this software.
12 //
13 // This library is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU Lesser General Public License as
15 // published by the Free Software Foundation; either version 2.1 of the
16 // License, or (at your option) any later version.
17 //
18 // This library is distributed in the hope that it will be useful, but
19 // WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 // Lesser General Public License for more details.
22 //
23 // You should have received a copy of the GNU Lesser General Public
24 // License along with this library; if not, write to the Free Software
25 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 // USA
27 // Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28 //
29 // KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30 // University of Florida. The Authors of KLU are Timothy A. Davis and
31 // Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32 // information for KLU.
33 //
34 // ***********************************************************************
35 // @HEADER
36 
37 /* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
38  * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
39  * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
40  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
41  * Numeric->Iwork).
42  */
43 
44 #ifndef KLU2_TSOLVE_HPP
45 #define KLU2_TSOLVE_HPP
46 
47 #include "klu2_internal.h"
48 #include "klu2.hpp"
49 
50 template <typename Entry, typename Int>
51 Int KLU_tsolve
52 (
53  /* inputs, not modified */
54  KLU_symbolic<Entry, Int> *Symbolic,
55  KLU_numeric<Entry, Int> *Numeric,
56  Int d, /* leading dimension of B */
57  Int nrhs, /* number of right-hand-sides */
58 
59  /* right-hand-side on input, overwritten with solution to Ax=b on output */
60  Entry B [ ], /* size n*nrhs, in column-oriented form, with
61  * leading dimension d. */
62 #ifdef COMPLEX
63  Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
64  * array transpose solve. Used for the complex
65  * case only. */
66 #endif
67  /* --------------- */
68  KLU_common<Entry, Int> *Common
69 )
70 {
71  Entry x [4], offik, s ;
72  double rs, *Rs ;
73  Entry *Offx, *X, *Bz, *Udiag ;
74  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
75  Unit **LUbx ;
76  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
77 
78  /* ---------------------------------------------------------------------- */
79  /* check inputs */
80  /* ---------------------------------------------------------------------- */
81 
82  if (Common == NULL)
83  {
84  return (FALSE) ;
85  }
86  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
87  B == NULL)
88  {
89  Common->status = KLU_INVALID ;
90  return (FALSE) ;
91  }
92  Common->status = KLU_OK ;
93 
94  /* ---------------------------------------------------------------------- */
95  /* get the contents of the Symbolic object */
96  /* ---------------------------------------------------------------------- */
97 
98  Bz = (Entry *) B ;
99  n = Symbolic->n ;
100  nblocks = Symbolic->nblocks ;
101  Q = Symbolic->Q ;
102  R = Symbolic->R ;
103 
104  /* ---------------------------------------------------------------------- */
105  /* get the contents of the Numeric object */
106  /* ---------------------------------------------------------------------- */
107 
108  ASSERT (nblocks == Numeric->nblocks) ;
109  Pnum = Numeric->Pnum ;
110  Offp = Numeric->Offp ;
111  Offi = Numeric->Offi ;
112  Offx = (Entry *) Numeric->Offx ;
113 
114  Lip = Numeric->Lip ;
115  Llen = Numeric->Llen ;
116  Uip = Numeric->Uip ;
117  Ulen = Numeric->Ulen ;
118  LUbx = (Unit **) Numeric->LUbx ;
119  Udiag = (Entry *) Numeric->Udiag ;
120 
121  Rs = Numeric->Rs ;
122  X = (Entry *) Numeric->Xwork ;
123  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
124 
125  /* ---------------------------------------------------------------------- */
126  /* solve in chunks of 4 columns at a time */
127  /* ---------------------------------------------------------------------- */
128 
129  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
130  {
131 
132  /* ------------------------------------------------------------------ */
133  /* get the size of the current chunk */
134  /* ------------------------------------------------------------------ */
135 
136  nr = MIN (nrhs - chunk, 4) ;
137 
138  /* ------------------------------------------------------------------ */
139  /* permute the right hand side, X = Q'*B */
140  /* ------------------------------------------------------------------ */
141 
142  switch (nr)
143  {
144 
145  case 1:
146 
147  for (k = 0 ; k < n ; k++)
148  {
149  X [k] = Bz [Q [k]] ;
150  }
151  break ;
152 
153  case 2:
154 
155  for (k = 0 ; k < n ; k++)
156  {
157  i = Q [k] ;
158  X [2*k ] = Bz [i ] ;
159  X [2*k + 1] = Bz [i + d ] ;
160  }
161  break ;
162 
163  case 3:
164 
165  for (k = 0 ; k < n ; k++)
166  {
167  i = Q [k] ;
168  X [3*k ] = Bz [i ] ;
169  X [3*k + 1] = Bz [i + d ] ;
170  X [3*k + 2] = Bz [i + d*2] ;
171  }
172  break ;
173 
174  case 4:
175 
176  for (k = 0 ; k < n ; k++)
177  {
178  i = Q [k] ;
179  X [4*k ] = Bz [i ] ;
180  X [4*k + 1] = Bz [i + d ] ;
181  X [4*k + 2] = Bz [i + d*2] ;
182  X [4*k + 3] = Bz [i + d*3] ;
183  }
184  break ;
185 
186  }
187 
188  /* ------------------------------------------------------------------ */
189  /* solve X = (L*U + Off)'\X */
190  /* ------------------------------------------------------------------ */
191 
192  for (block = 0 ; block < nblocks ; block++)
193  {
194 
195  /* -------------------------------------------------------------- */
196  /* the block of size nk is from rows/columns k1 to k2-1 */
197  /* -------------------------------------------------------------- */
198 
199  k1 = R [block] ;
200  k2 = R [block+1] ;
201  nk = k2 - k1 ;
202  PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
203 
204  /* -------------------------------------------------------------- */
205  /* block back-substitution for the off-diagonal-block entries */
206  /* -------------------------------------------------------------- */
207 
208  if (block > 0)
209  {
210  switch (nr)
211  {
212 
213  case 1:
214 
215  for (k = k1 ; k < k2 ; k++)
216  {
217  pend = Offp [k+1] ;
218  for (p = Offp [k] ; p < pend ; p++)
219  {
220 #ifdef COMPLEX
221  if (conj_solve)
222  {
223  MULT_SUB_CONJ (X [k], X [Offi [p]],
224  Offx [p]) ;
225  }
226  else
227 #endif
228  {
229  MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
230  }
231  }
232  }
233  break ;
234 
235  case 2:
236 
237  for (k = k1 ; k < k2 ; k++)
238  {
239  pend = Offp [k+1] ;
240  x [0] = X [2*k ] ;
241  x [1] = X [2*k + 1] ;
242  for (p = Offp [k] ; p < pend ; p++)
243  {
244  i = Offi [p] ;
245 #ifdef COMPLEX
246  if (conj_solve)
247  {
248  CONJ (offik, Offx [p]) ;
249  }
250  else
251 #endif
252  {
253  offik = Offx [p] ;
254  }
255  MULT_SUB (x [0], offik, X [2*i]) ;
256  MULT_SUB (x [1], offik, X [2*i + 1]) ;
257  }
258  X [2*k ] = x [0] ;
259  X [2*k + 1] = x [1] ;
260  }
261  break ;
262 
263  case 3:
264 
265  for (k = k1 ; k < k2 ; k++)
266  {
267  pend = Offp [k+1] ;
268  x [0] = X [3*k ] ;
269  x [1] = X [3*k + 1] ;
270  x [2] = X [3*k + 2] ;
271  for (p = Offp [k] ; p < pend ; p++)
272  {
273  i = Offi [p] ;
274 #ifdef COMPLEX
275  if (conj_solve)
276  {
277  CONJ (offik, Offx [p]) ;
278  }
279  else
280 #endif
281  {
282  offik = Offx [p] ;
283  }
284  MULT_SUB (x [0], offik, X [3*i]) ;
285  MULT_SUB (x [1], offik, X [3*i + 1]) ;
286  MULT_SUB (x [2], offik, X [3*i + 2]) ;
287  }
288  X [3*k ] = x [0] ;
289  X [3*k + 1] = x [1] ;
290  X [3*k + 2] = x [2] ;
291  }
292  break ;
293 
294  case 4:
295 
296  for (k = k1 ; k < k2 ; k++)
297  {
298  pend = Offp [k+1] ;
299  x [0] = X [4*k ] ;
300  x [1] = X [4*k + 1] ;
301  x [2] = X [4*k + 2] ;
302  x [3] = X [4*k + 3] ;
303  for (p = Offp [k] ; p < pend ; p++)
304  {
305  i = Offi [p] ;
306 #ifdef COMPLEX
307  if (conj_solve)
308  {
309  CONJ(offik, Offx [p]) ;
310  }
311  else
312 #endif
313  {
314  offik = Offx [p] ;
315  }
316  MULT_SUB (x [0], offik, X [4*i]) ;
317  MULT_SUB (x [1], offik, X [4*i + 1]) ;
318  MULT_SUB (x [2], offik, X [4*i + 2]) ;
319  MULT_SUB (x [3], offik, X [4*i + 3]) ;
320  }
321  X [4*k ] = x [0] ;
322  X [4*k + 1] = x [1] ;
323  X [4*k + 2] = x [2] ;
324  X [4*k + 3] = x [3] ;
325  }
326  break ;
327  }
328  }
329 
330  /* -------------------------------------------------------------- */
331  /* solve the block system */
332  /* -------------------------------------------------------------- */
333 
334  if (nk == 1)
335  {
336 #ifdef COMPLEX
337  if (conj_solve)
338  {
339  CONJ (s, Udiag [k1]) ;
340  }
341  else
342 #endif
343  {
344  s = Udiag [k1] ;
345  }
346  switch (nr)
347  {
348 
349  case 1:
350  DIV (X [k1], X [k1], s) ;
351  break ;
352 
353  case 2:
354  DIV (X [2*k1], X [2*k1], s) ;
355  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
356  break ;
357 
358  case 3:
359  DIV (X [3*k1], X [3*k1], s) ;
360  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
361  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
362  break ;
363 
364  case 4:
365  DIV (X [4*k1], X [4*k1], s) ;
366  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
367  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
368  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
369  break ;
370 
371  }
372  }
373  else
374  {
375  KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
376  Udiag + k1, nr,
377 #ifdef COMPLEX
378  conj_solve,
379 #endif
380  X + nr*k1) ;
381  KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
382 #ifdef COMPLEX
383  conj_solve,
384 #endif
385  X + nr*k1) ;
386  }
387  }
388 
389  /* ------------------------------------------------------------------ */
390  /* scale and permute the result, Bz = P'(R\X) */
391  /* ------------------------------------------------------------------ */
392 
393  if (Rs == NULL)
394  {
395 
396  /* no scaling */
397  switch (nr)
398  {
399 
400  case 1:
401 
402  for (k = 0 ; k < n ; k++)
403  {
404  Bz [Pnum [k]] = X [k] ;
405  }
406  break ;
407 
408  case 2:
409 
410  for (k = 0 ; k < n ; k++)
411  {
412  i = Pnum [k] ;
413  Bz [i ] = X [2*k ] ;
414  Bz [i + d ] = X [2*k + 1] ;
415  }
416  break ;
417 
418  case 3:
419 
420  for (k = 0 ; k < n ; k++)
421  {
422  i = Pnum [k] ;
423  Bz [i ] = X [3*k ] ;
424  Bz [i + d ] = X [3*k + 1] ;
425  Bz [i + d*2] = X [3*k + 2] ;
426  }
427  break ;
428 
429  case 4:
430 
431  for (k = 0 ; k < n ; k++)
432  {
433  i = Pnum [k] ;
434  Bz [i ] = X [4*k ] ;
435  Bz [i + d ] = X [4*k + 1] ;
436  Bz [i + d*2] = X [4*k + 2] ;
437  Bz [i + d*3] = X [4*k + 3] ;
438  }
439  break ;
440  }
441 
442  }
443  else
444  {
445 
446  switch (nr)
447  {
448 
449  case 1:
450 
451  for (k = 0 ; k < n ; k++)
452  {
453  SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
454  }
455  break ;
456 
457  case 2:
458 
459  for (k = 0 ; k < n ; k++)
460  {
461  i = Pnum [k] ;
462  rs = Rs [k] ;
463  SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
464  SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
465  }
466  break ;
467 
468  case 3:
469 
470  for (k = 0 ; k < n ; k++)
471  {
472  i = Pnum [k] ;
473  rs = Rs [k] ;
474  SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
475  SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
476  SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
477  }
478  break ;
479 
480  case 4:
481 
482  for (k = 0 ; k < n ; k++)
483  {
484  i = Pnum [k] ;
485  rs = Rs [k] ;
486  SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
487  SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
488  SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
489  SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
490  }
491  break ;
492  }
493  }
494 
495  /* ------------------------------------------------------------------ */
496  /* go to the next chunk of B */
497  /* ------------------------------------------------------------------ */
498 
499  Bz += d*4 ;
500  }
501  return (TRUE) ;
502 }
503 
504 
505 // Require Xout and B to have equal number of entries, pre-allocated
506 template <typename Entry, typename Int>
507 Int KLU_tsolve2
508 (
509  /* inputs, not modified */
510  KLU_symbolic<Entry, Int> *Symbolic,
511  KLU_numeric<Entry, Int> *Numeric,
512  Int d, /* leading dimension of B */
513  Int nrhs, /* number of right-hand-sides */
514 
515  /* right-hand-side on input, overwritten with solution to Ax=b on output */
516  Entry B [ ], /* size n*1, in column-oriented form, with
517  * leading dimension d. */
518  Entry Xout [ ], /* size n*1, in column-oriented form, with
519  * leading dimension d. */
520 #ifdef COMPLEX
521  Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
522  * array transpose solve. Used for the complex
523  * case only. */
524 #endif
525  /* --------------- */
526  KLU_common<Entry, Int> *Common
527 )
528 {
529  Entry x [4], offik, s ;
530  double rs, *Rs ;
531  Entry *Offx, *X, *Bz, *Udiag ;
532  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
533  Unit **LUbx ;
534  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
535 
536  /* ---------------------------------------------------------------------- */
537  /* check inputs */
538  /* ---------------------------------------------------------------------- */
539 
540  if (Common == NULL)
541  {
542  return (FALSE) ;
543  }
544  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
545  B == NULL || Xout == NULL)
546  {
547  Common->status = KLU_INVALID ;
548  return (FALSE) ;
549  }
550  Common->status = KLU_OK ;
551 
552  /* ---------------------------------------------------------------------- */
553  /* get the contents of the Symbolic object */
554  /* ---------------------------------------------------------------------- */
555 
556  Bz = (Entry *) B ;
557  n = Symbolic->n ;
558  nblocks = Symbolic->nblocks ;
559  Q = Symbolic->Q ;
560  R = Symbolic->R ;
561 
562  /* ---------------------------------------------------------------------- */
563  /* get the contents of the Numeric object */
564  /* ---------------------------------------------------------------------- */
565 
566  ASSERT (nblocks == Numeric->nblocks) ;
567  Pnum = Numeric->Pnum ;
568  Offp = Numeric->Offp ;
569  Offi = Numeric->Offi ;
570  Offx = (Entry *) Numeric->Offx ;
571 
572  Lip = Numeric->Lip ;
573  Llen = Numeric->Llen ;
574  Uip = Numeric->Uip ;
575  Ulen = Numeric->Ulen ;
576  LUbx = (Unit **) Numeric->LUbx ;
577  Udiag = (Entry *) Numeric->Udiag ;
578 
579  Rs = Numeric->Rs ;
580  X = (Entry *) Numeric->Xwork ;
581  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
582 
583  /* ---------------------------------------------------------------------- */
584  /* solve in chunks of 4 columns at a time */
585  /* ---------------------------------------------------------------------- */
586 
587  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
588  {
589 
590  /* ------------------------------------------------------------------ */
591  /* get the size of the current chunk */
592  /* ------------------------------------------------------------------ */
593 
594  nr = MIN (nrhs - chunk, 4) ;
595 
596  /* ------------------------------------------------------------------ */
597  /* permute the right hand side, X = Q'*B */
598  /* ------------------------------------------------------------------ */
599 
600  switch (nr)
601  {
602 
603  case 1:
604 
605  for (k = 0 ; k < n ; k++)
606  {
607  X [k] = Bz [Q [k]] ;
608  }
609  break ;
610 
611  case 2:
612 
613  for (k = 0 ; k < n ; k++)
614  {
615  i = Q [k] ;
616  X [2*k ] = Bz [i ] ;
617  X [2*k + 1] = Bz [i + d ] ;
618  }
619  break ;
620 
621  case 3:
622 
623  for (k = 0 ; k < n ; k++)
624  {
625  i = Q [k] ;
626  X [3*k ] = Bz [i ] ;
627  X [3*k + 1] = Bz [i + d ] ;
628  X [3*k + 2] = Bz [i + d*2] ;
629  }
630  break ;
631 
632  case 4:
633 
634  for (k = 0 ; k < n ; k++)
635  {
636  i = Q [k] ;
637  X [4*k ] = Bz [i ] ;
638  X [4*k + 1] = Bz [i + d ] ;
639  X [4*k + 2] = Bz [i + d*2] ;
640  X [4*k + 3] = Bz [i + d*3] ;
641  }
642  break ;
643 
644  }
645 
646  /* ------------------------------------------------------------------ */
647  /* solve X = (L*U + Off)'\X */
648  /* ------------------------------------------------------------------ */
649 
650  for (block = 0 ; block < nblocks ; block++)
651  {
652 
653  /* -------------------------------------------------------------- */
654  /* the block of size nk is from rows/columns k1 to k2-1 */
655  /* -------------------------------------------------------------- */
656 
657  k1 = R [block] ;
658  k2 = R [block+1] ;
659  nk = k2 - k1 ;
660  PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
661 
662  /* -------------------------------------------------------------- */
663  /* block back-substitution for the off-diagonal-block entries */
664  /* -------------------------------------------------------------- */
665 
666  if (block > 0)
667  {
668  switch (nr)
669  {
670 
671  case 1:
672 
673  for (k = k1 ; k < k2 ; k++)
674  {
675  pend = Offp [k+1] ;
676  for (p = Offp [k] ; p < pend ; p++)
677  {
678 #ifdef COMPLEX
679  if (conj_solve)
680  {
681  MULT_SUB_CONJ (X [k], X [Offi [p]],
682  Offx [p]) ;
683  }
684  else
685 #endif
686  {
687  MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
688  }
689  }
690  }
691  break ;
692 
693  case 2:
694 
695  for (k = k1 ; k < k2 ; k++)
696  {
697  pend = Offp [k+1] ;
698  x [0] = X [2*k ] ;
699  x [1] = X [2*k + 1] ;
700  for (p = Offp [k] ; p < pend ; p++)
701  {
702  i = Offi [p] ;
703 #ifdef COMPLEX
704  if (conj_solve)
705  {
706  CONJ (offik, Offx [p]) ;
707  }
708  else
709 #endif
710  {
711  offik = Offx [p] ;
712  }
713  MULT_SUB (x [0], offik, X [2*i]) ;
714  MULT_SUB (x [1], offik, X [2*i + 1]) ;
715  }
716  X [2*k ] = x [0] ;
717  X [2*k + 1] = x [1] ;
718  }
719  break ;
720 
721  case 3:
722 
723  for (k = k1 ; k < k2 ; k++)
724  {
725  pend = Offp [k+1] ;
726  x [0] = X [3*k ] ;
727  x [1] = X [3*k + 1] ;
728  x [2] = X [3*k + 2] ;
729  for (p = Offp [k] ; p < pend ; p++)
730  {
731  i = Offi [p] ;
732 #ifdef COMPLEX
733  if (conj_solve)
734  {
735  CONJ (offik, Offx [p]) ;
736  }
737  else
738 #endif
739  {
740  offik = Offx [p] ;
741  }
742  MULT_SUB (x [0], offik, X [3*i]) ;
743  MULT_SUB (x [1], offik, X [3*i + 1]) ;
744  MULT_SUB (x [2], offik, X [3*i + 2]) ;
745  }
746  X [3*k ] = x [0] ;
747  X [3*k + 1] = x [1] ;
748  X [3*k + 2] = x [2] ;
749  }
750  break ;
751 
752  case 4:
753 
754  for (k = k1 ; k < k2 ; k++)
755  {
756  pend = Offp [k+1] ;
757  x [0] = X [4*k ] ;
758  x [1] = X [4*k + 1] ;
759  x [2] = X [4*k + 2] ;
760  x [3] = X [4*k + 3] ;
761  for (p = Offp [k] ; p < pend ; p++)
762  {
763  i = Offi [p] ;
764 #ifdef COMPLEX
765  if (conj_solve)
766  {
767  CONJ(offik, Offx [p]) ;
768  }
769  else
770 #endif
771  {
772  offik = Offx [p] ;
773  }
774  MULT_SUB (x [0], offik, X [4*i]) ;
775  MULT_SUB (x [1], offik, X [4*i + 1]) ;
776  MULT_SUB (x [2], offik, X [4*i + 2]) ;
777  MULT_SUB (x [3], offik, X [4*i + 3]) ;
778  }
779  X [4*k ] = x [0] ;
780  X [4*k + 1] = x [1] ;
781  X [4*k + 2] = x [2] ;
782  X [4*k + 3] = x [3] ;
783  }
784  break ;
785  }
786  }
787 
788  /* -------------------------------------------------------------- */
789  /* solve the block system */
790  /* -------------------------------------------------------------- */
791 
792  if (nk == 1)
793  {
794 #ifdef COMPLEX
795  if (conj_solve)
796  {
797  CONJ (s, Udiag [k1]) ;
798  }
799  else
800 #endif
801  {
802  s = Udiag [k1] ;
803  }
804  switch (nr)
805  {
806 
807  case 1:
808  DIV (X [k1], X [k1], s) ;
809  break ;
810 
811  case 2:
812  DIV (X [2*k1], X [2*k1], s) ;
813  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
814  break ;
815 
816  case 3:
817  DIV (X [3*k1], X [3*k1], s) ;
818  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
819  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
820  break ;
821 
822  case 4:
823  DIV (X [4*k1], X [4*k1], s) ;
824  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
825  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
826  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
827  break ;
828 
829  }
830  }
831  else
832  {
833  KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
834  Udiag + k1, nr,
835 #ifdef COMPLEX
836  conj_solve,
837 #endif
838  X + nr*k1) ;
839  KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
840 #ifdef COMPLEX
841  conj_solve,
842 #endif
843  X + nr*k1) ;
844  }
845  }
846 
847  /* ------------------------------------------------------------------ */
848  /* scale and permute the result, Bz = P'(R\X) */
849  /* store result in Xout */
850  /* ------------------------------------------------------------------ */
851 
852  if (Rs == NULL)
853  {
854 
855  /* no scaling */
856  switch (nr)
857  {
858 
859  case 1:
860 
861  for (k = 0 ; k < n ; k++)
862  {
863  Xout [Pnum [k]] = X [k] ;
864  }
865  break ;
866 
867  case 2:
868 
869  for (k = 0 ; k < n ; k++)
870  {
871  i = Pnum [k] ;
872  Xout [i ] = X [2*k ] ;
873  Xout [i + d ] = X [2*k + 1] ;
874  }
875  break ;
876 
877  case 3:
878 
879  for (k = 0 ; k < n ; k++)
880  {
881  i = Pnum [k] ;
882  Xout [i ] = X [3*k ] ;
883  Xout [i + d ] = X [3*k + 1] ;
884  Xout [i + d*2] = X [3*k + 2] ;
885  }
886  break ;
887 
888  case 4:
889 
890  for (k = 0 ; k < n ; k++)
891  {
892  i = Pnum [k] ;
893  Xout [i ] = X [4*k ] ;
894  Xout [i + d ] = X [4*k + 1] ;
895  Xout [i + d*2] = X [4*k + 2] ;
896  Xout [i + d*3] = X [4*k + 3] ;
897  }
898  break ;
899  }
900 
901  }
902  else
903  {
904 
905  switch (nr)
906  {
907 
908  case 1:
909 
910  for (k = 0 ; k < n ; k++)
911  {
912  SCALE_DIV_ASSIGN (Xout [Pnum [k]], X [k], Rs [k]) ;
913  }
914  break ;
915 
916  case 2:
917 
918  for (k = 0 ; k < n ; k++)
919  {
920  i = Pnum [k] ;
921  rs = Rs [k] ;
922  SCALE_DIV_ASSIGN (Xout [i], X [2*k], rs) ;
923  SCALE_DIV_ASSIGN (Xout [i + d], X [2*k + 1], rs) ;
924  }
925  break ;
926 
927  case 3:
928 
929  for (k = 0 ; k < n ; k++)
930  {
931  i = Pnum [k] ;
932  rs = Rs [k] ;
933  SCALE_DIV_ASSIGN (Xout [i], X [3*k], rs) ;
934  SCALE_DIV_ASSIGN (Xout [i + d], X [3*k + 1], rs) ;
935  SCALE_DIV_ASSIGN (Xout [i + d*2], X [3*k + 2], rs) ;
936  }
937  break ;
938 
939  case 4:
940 
941  for (k = 0 ; k < n ; k++)
942  {
943  i = Pnum [k] ;
944  rs = Rs [k] ;
945  SCALE_DIV_ASSIGN (Xout [i], X [4*k], rs) ;
946  SCALE_DIV_ASSIGN (Xout [i + d], X [4*k + 1], rs) ;
947  SCALE_DIV_ASSIGN (Xout [i + d*2], X [4*k + 2], rs) ;
948  SCALE_DIV_ASSIGN (Xout [i + d*3], X [4*k + 3], rs) ;
949  }
950  break ;
951  }
952  }
953 
954  /* ------------------------------------------------------------------ */
955  /* go to the next chunk of B */
956  /* ------------------------------------------------------------------ */
957 
958  Xout += d*4 ;
959  }
960  return (TRUE) ;
961 }
962 
963 #endif