Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_tsolve.hpp
1 /* ========================================================================== */
2 /* === KLU_tsolve =========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // *****************************************************************************
6 // KLU2: A Direct Linear Solver package
7 //
8 // Copyright 2011 NTESS and the KLU2 contributors.
9 // SPDX-License-Identifier: LGPL-2.1-or-later
10 // *****************************************************************************
11 // @HEADER
12 
13 /* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
14  * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
15  * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
16  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
17  * Numeric->Iwork).
18  */
19 
20 #ifndef KLU2_TSOLVE_HPP
21 #define KLU2_TSOLVE_HPP
22 
23 #include "klu2_internal.h"
24 #include "klu2.hpp"
25 
26 template <typename Entry, typename Int>
27 Int KLU_tsolve
28 (
29  /* inputs, not modified */
30  KLU_symbolic<Entry, Int> *Symbolic,
31  KLU_numeric<Entry, Int> *Numeric,
32  Int d, /* leading dimension of B */
33  Int nrhs, /* number of right-hand-sides */
34 
35  /* right-hand-side on input, overwritten with solution to Ax=b on output */
36  Entry B [ ], /* size n*nrhs, in column-oriented form, with
37  * leading dimension d. */
38 #ifdef COMPLEX
39  Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
40  * array transpose solve. Used for the complex
41  * case only. */
42 #endif
43  /* --------------- */
44  KLU_common<Entry, Int> *Common
45 )
46 {
47  Entry x [4], offik, s ;
48  double rs, *Rs ;
49  Entry *Offx, *X, *Bz, *Udiag ;
50  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
51  Unit **LUbx ;
52  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
53 
54  /* ---------------------------------------------------------------------- */
55  /* check inputs */
56  /* ---------------------------------------------------------------------- */
57 
58  if (Common == NULL)
59  {
60  return (FALSE) ;
61  }
62  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
63  B == NULL)
64  {
65  Common->status = KLU_INVALID ;
66  return (FALSE) ;
67  }
68  Common->status = KLU_OK ;
69 
70  /* ---------------------------------------------------------------------- */
71  /* get the contents of the Symbolic object */
72  /* ---------------------------------------------------------------------- */
73 
74  Bz = (Entry *) B ;
75  n = Symbolic->n ;
76  nblocks = Symbolic->nblocks ;
77  Q = Symbolic->Q ;
78  R = Symbolic->R ;
79 
80  /* ---------------------------------------------------------------------- */
81  /* get the contents of the Numeric object */
82  /* ---------------------------------------------------------------------- */
83 
84  ASSERT (nblocks == Numeric->nblocks) ;
85  Pnum = Numeric->Pnum ;
86  Offp = Numeric->Offp ;
87  Offi = Numeric->Offi ;
88  Offx = (Entry *) Numeric->Offx ;
89 
90  Lip = Numeric->Lip ;
91  Llen = Numeric->Llen ;
92  Uip = Numeric->Uip ;
93  Ulen = Numeric->Ulen ;
94  LUbx = (Unit **) Numeric->LUbx ;
95  Udiag = (Entry *) Numeric->Udiag ;
96 
97  Rs = Numeric->Rs ;
98  X = (Entry *) Numeric->Xwork ;
99  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
100 
101  /* ---------------------------------------------------------------------- */
102  /* solve in chunks of 4 columns at a time */
103  /* ---------------------------------------------------------------------- */
104 
105  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
106  {
107 
108  /* ------------------------------------------------------------------ */
109  /* get the size of the current chunk */
110  /* ------------------------------------------------------------------ */
111 
112  nr = MIN (nrhs - chunk, 4) ;
113 
114  /* ------------------------------------------------------------------ */
115  /* permute the right hand side, X = Q'*B */
116  /* ------------------------------------------------------------------ */
117 
118  switch (nr)
119  {
120 
121  case 1:
122 
123  for (k = 0 ; k < n ; k++)
124  {
125  X [k] = Bz [Q [k]] ;
126  }
127  break ;
128 
129  case 2:
130 
131  for (k = 0 ; k < n ; k++)
132  {
133  i = Q [k] ;
134  X [2*k ] = Bz [i ] ;
135  X [2*k + 1] = Bz [i + d ] ;
136  }
137  break ;
138 
139  case 3:
140 
141  for (k = 0 ; k < n ; k++)
142  {
143  i = Q [k] ;
144  X [3*k ] = Bz [i ] ;
145  X [3*k + 1] = Bz [i + d ] ;
146  X [3*k + 2] = Bz [i + d*2] ;
147  }
148  break ;
149 
150  case 4:
151 
152  for (k = 0 ; k < n ; k++)
153  {
154  i = Q [k] ;
155  X [4*k ] = Bz [i ] ;
156  X [4*k + 1] = Bz [i + d ] ;
157  X [4*k + 2] = Bz [i + d*2] ;
158  X [4*k + 3] = Bz [i + d*3] ;
159  }
160  break ;
161 
162  }
163 
164  /* ------------------------------------------------------------------ */
165  /* solve X = (L*U + Off)'\X */
166  /* ------------------------------------------------------------------ */
167 
168  for (block = 0 ; block < nblocks ; block++)
169  {
170 
171  /* -------------------------------------------------------------- */
172  /* the block of size nk is from rows/columns k1 to k2-1 */
173  /* -------------------------------------------------------------- */
174 
175  k1 = R [block] ;
176  k2 = R [block+1] ;
177  nk = k2 - k1 ;
178  PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
179 
180  /* -------------------------------------------------------------- */
181  /* block back-substitution for the off-diagonal-block entries */
182  /* -------------------------------------------------------------- */
183 
184  if (block > 0)
185  {
186  switch (nr)
187  {
188 
189  case 1:
190 
191  for (k = k1 ; k < k2 ; k++)
192  {
193  pend = Offp [k+1] ;
194  for (p = Offp [k] ; p < pend ; p++)
195  {
196 #ifdef COMPLEX
197  if (conj_solve)
198  {
199  MULT_SUB_CONJ (X [k], X [Offi [p]],
200  Offx [p]) ;
201  }
202  else
203 #endif
204  {
205  MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
206  }
207  }
208  }
209  break ;
210 
211  case 2:
212 
213  for (k = k1 ; k < k2 ; k++)
214  {
215  pend = Offp [k+1] ;
216  x [0] = X [2*k ] ;
217  x [1] = X [2*k + 1] ;
218  for (p = Offp [k] ; p < pend ; p++)
219  {
220  i = Offi [p] ;
221 #ifdef COMPLEX
222  if (conj_solve)
223  {
224  CONJ (offik, Offx [p]) ;
225  }
226  else
227 #endif
228  {
229  offik = Offx [p] ;
230  }
231  MULT_SUB (x [0], offik, X [2*i]) ;
232  MULT_SUB (x [1], offik, X [2*i + 1]) ;
233  }
234  X [2*k ] = x [0] ;
235  X [2*k + 1] = x [1] ;
236  }
237  break ;
238 
239  case 3:
240 
241  for (k = k1 ; k < k2 ; k++)
242  {
243  pend = Offp [k+1] ;
244  x [0] = X [3*k ] ;
245  x [1] = X [3*k + 1] ;
246  x [2] = X [3*k + 2] ;
247  for (p = Offp [k] ; p < pend ; p++)
248  {
249  i = Offi [p] ;
250 #ifdef COMPLEX
251  if (conj_solve)
252  {
253  CONJ (offik, Offx [p]) ;
254  }
255  else
256 #endif
257  {
258  offik = Offx [p] ;
259  }
260  MULT_SUB (x [0], offik, X [3*i]) ;
261  MULT_SUB (x [1], offik, X [3*i + 1]) ;
262  MULT_SUB (x [2], offik, X [3*i + 2]) ;
263  }
264  X [3*k ] = x [0] ;
265  X [3*k + 1] = x [1] ;
266  X [3*k + 2] = x [2] ;
267  }
268  break ;
269 
270  case 4:
271 
272  for (k = k1 ; k < k2 ; k++)
273  {
274  pend = Offp [k+1] ;
275  x [0] = X [4*k ] ;
276  x [1] = X [4*k + 1] ;
277  x [2] = X [4*k + 2] ;
278  x [3] = X [4*k + 3] ;
279  for (p = Offp [k] ; p < pend ; p++)
280  {
281  i = Offi [p] ;
282 #ifdef COMPLEX
283  if (conj_solve)
284  {
285  CONJ(offik, Offx [p]) ;
286  }
287  else
288 #endif
289  {
290  offik = Offx [p] ;
291  }
292  MULT_SUB (x [0], offik, X [4*i]) ;
293  MULT_SUB (x [1], offik, X [4*i + 1]) ;
294  MULT_SUB (x [2], offik, X [4*i + 2]) ;
295  MULT_SUB (x [3], offik, X [4*i + 3]) ;
296  }
297  X [4*k ] = x [0] ;
298  X [4*k + 1] = x [1] ;
299  X [4*k + 2] = x [2] ;
300  X [4*k + 3] = x [3] ;
301  }
302  break ;
303  }
304  }
305 
306  /* -------------------------------------------------------------- */
307  /* solve the block system */
308  /* -------------------------------------------------------------- */
309 
310  if (nk == 1)
311  {
312 #ifdef COMPLEX
313  if (conj_solve)
314  {
315  CONJ (s, Udiag [k1]) ;
316  }
317  else
318 #endif
319  {
320  s = Udiag [k1] ;
321  }
322  switch (nr)
323  {
324 
325  case 1:
326  DIV (X [k1], X [k1], s) ;
327  break ;
328 
329  case 2:
330  DIV (X [2*k1], X [2*k1], s) ;
331  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
332  break ;
333 
334  case 3:
335  DIV (X [3*k1], X [3*k1], s) ;
336  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
337  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
338  break ;
339 
340  case 4:
341  DIV (X [4*k1], X [4*k1], s) ;
342  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
343  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
344  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
345  break ;
346 
347  }
348  }
349  else
350  {
351  KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
352  Udiag + k1, nr,
353 #ifdef COMPLEX
354  conj_solve,
355 #endif
356  X + nr*k1) ;
357  KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
358 #ifdef COMPLEX
359  conj_solve,
360 #endif
361  X + nr*k1) ;
362  }
363  }
364 
365  /* ------------------------------------------------------------------ */
366  /* scale and permute the result, Bz = P'(R\X) */
367  /* ------------------------------------------------------------------ */
368 
369  if (Rs == NULL)
370  {
371 
372  /* no scaling */
373  switch (nr)
374  {
375 
376  case 1:
377 
378  for (k = 0 ; k < n ; k++)
379  {
380  Bz [Pnum [k]] = X [k] ;
381  }
382  break ;
383 
384  case 2:
385 
386  for (k = 0 ; k < n ; k++)
387  {
388  i = Pnum [k] ;
389  Bz [i ] = X [2*k ] ;
390  Bz [i + d ] = X [2*k + 1] ;
391  }
392  break ;
393 
394  case 3:
395 
396  for (k = 0 ; k < n ; k++)
397  {
398  i = Pnum [k] ;
399  Bz [i ] = X [3*k ] ;
400  Bz [i + d ] = X [3*k + 1] ;
401  Bz [i + d*2] = X [3*k + 2] ;
402  }
403  break ;
404 
405  case 4:
406 
407  for (k = 0 ; k < n ; k++)
408  {
409  i = Pnum [k] ;
410  Bz [i ] = X [4*k ] ;
411  Bz [i + d ] = X [4*k + 1] ;
412  Bz [i + d*2] = X [4*k + 2] ;
413  Bz [i + d*3] = X [4*k + 3] ;
414  }
415  break ;
416  }
417 
418  }
419  else
420  {
421 
422  switch (nr)
423  {
424 
425  case 1:
426 
427  for (k = 0 ; k < n ; k++)
428  {
429  SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
430  }
431  break ;
432 
433  case 2:
434 
435  for (k = 0 ; k < n ; k++)
436  {
437  i = Pnum [k] ;
438  rs = Rs [k] ;
439  SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
440  SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
441  }
442  break ;
443 
444  case 3:
445 
446  for (k = 0 ; k < n ; k++)
447  {
448  i = Pnum [k] ;
449  rs = Rs [k] ;
450  SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
451  SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
452  SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
453  }
454  break ;
455 
456  case 4:
457 
458  for (k = 0 ; k < n ; k++)
459  {
460  i = Pnum [k] ;
461  rs = Rs [k] ;
462  SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
463  SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
464  SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
465  SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
466  }
467  break ;
468  }
469  }
470 
471  /* ------------------------------------------------------------------ */
472  /* go to the next chunk of B */
473  /* ------------------------------------------------------------------ */
474 
475  Bz += d*4 ;
476  }
477  return (TRUE) ;
478 }
479 
480 
481 // Require Xout and B to have equal number of entries, pre-allocated
482 template <typename Entry, typename Int>
483 Int KLU_tsolve2
484 (
485  /* inputs, not modified */
486  KLU_symbolic<Entry, Int> *Symbolic,
487  KLU_numeric<Entry, Int> *Numeric,
488  Int d, /* leading dimension of B */
489  Int nrhs, /* number of right-hand-sides */
490 
491  /* right-hand-side on input, overwritten with solution to Ax=b on output */
492  Entry B [ ], /* size n*1, in column-oriented form, with
493  * leading dimension d. */
494  Entry Xout [ ], /* size n*1, in column-oriented form, with
495  * leading dimension d. */
496 #ifdef COMPLEX
497  Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
498  * array transpose solve. Used for the complex
499  * case only. */
500 #endif
501  /* --------------- */
502  KLU_common<Entry, Int> *Common
503 )
504 {
505  Entry x [4], offik, s ;
506  double rs, *Rs ;
507  Entry *Offx, *X, *Bz, *Udiag ;
508  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
509  Unit **LUbx ;
510  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
511 
512  /* ---------------------------------------------------------------------- */
513  /* check inputs */
514  /* ---------------------------------------------------------------------- */
515 
516  if (Common == NULL)
517  {
518  return (FALSE) ;
519  }
520  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
521  B == NULL || Xout == NULL)
522  {
523  Common->status = KLU_INVALID ;
524  return (FALSE) ;
525  }
526  Common->status = KLU_OK ;
527 
528  /* ---------------------------------------------------------------------- */
529  /* get the contents of the Symbolic object */
530  /* ---------------------------------------------------------------------- */
531 
532  Bz = (Entry *) B ;
533  n = Symbolic->n ;
534  nblocks = Symbolic->nblocks ;
535  Q = Symbolic->Q ;
536  R = Symbolic->R ;
537 
538  /* ---------------------------------------------------------------------- */
539  /* get the contents of the Numeric object */
540  /* ---------------------------------------------------------------------- */
541 
542  ASSERT (nblocks == Numeric->nblocks) ;
543  Pnum = Numeric->Pnum ;
544  Offp = Numeric->Offp ;
545  Offi = Numeric->Offi ;
546  Offx = (Entry *) Numeric->Offx ;
547 
548  Lip = Numeric->Lip ;
549  Llen = Numeric->Llen ;
550  Uip = Numeric->Uip ;
551  Ulen = Numeric->Ulen ;
552  LUbx = (Unit **) Numeric->LUbx ;
553  Udiag = (Entry *) Numeric->Udiag ;
554 
555  Rs = Numeric->Rs ;
556  X = (Entry *) Numeric->Xwork ;
557  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
558 
559  /* ---------------------------------------------------------------------- */
560  /* solve in chunks of 4 columns at a time */
561  /* ---------------------------------------------------------------------- */
562 
563  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
564  {
565 
566  /* ------------------------------------------------------------------ */
567  /* get the size of the current chunk */
568  /* ------------------------------------------------------------------ */
569 
570  nr = MIN (nrhs - chunk, 4) ;
571 
572  /* ------------------------------------------------------------------ */
573  /* permute the right hand side, X = Q'*B */
574  /* ------------------------------------------------------------------ */
575 
576  switch (nr)
577  {
578 
579  case 1:
580 
581  for (k = 0 ; k < n ; k++)
582  {
583  X [k] = Bz [Q [k]] ;
584  }
585  break ;
586 
587  case 2:
588 
589  for (k = 0 ; k < n ; k++)
590  {
591  i = Q [k] ;
592  X [2*k ] = Bz [i ] ;
593  X [2*k + 1] = Bz [i + d ] ;
594  }
595  break ;
596 
597  case 3:
598 
599  for (k = 0 ; k < n ; k++)
600  {
601  i = Q [k] ;
602  X [3*k ] = Bz [i ] ;
603  X [3*k + 1] = Bz [i + d ] ;
604  X [3*k + 2] = Bz [i + d*2] ;
605  }
606  break ;
607 
608  case 4:
609 
610  for (k = 0 ; k < n ; k++)
611  {
612  i = Q [k] ;
613  X [4*k ] = Bz [i ] ;
614  X [4*k + 1] = Bz [i + d ] ;
615  X [4*k + 2] = Bz [i + d*2] ;
616  X [4*k + 3] = Bz [i + d*3] ;
617  }
618  break ;
619 
620  }
621 
622  /* ------------------------------------------------------------------ */
623  /* solve X = (L*U + Off)'\X */
624  /* ------------------------------------------------------------------ */
625 
626  for (block = 0 ; block < nblocks ; block++)
627  {
628 
629  /* -------------------------------------------------------------- */
630  /* the block of size nk is from rows/columns k1 to k2-1 */
631  /* -------------------------------------------------------------- */
632 
633  k1 = R [block] ;
634  k2 = R [block+1] ;
635  nk = k2 - k1 ;
636  PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
637 
638  /* -------------------------------------------------------------- */
639  /* block back-substitution for the off-diagonal-block entries */
640  /* -------------------------------------------------------------- */
641 
642  if (block > 0)
643  {
644  switch (nr)
645  {
646 
647  case 1:
648 
649  for (k = k1 ; k < k2 ; k++)
650  {
651  pend = Offp [k+1] ;
652  for (p = Offp [k] ; p < pend ; p++)
653  {
654 #ifdef COMPLEX
655  if (conj_solve)
656  {
657  MULT_SUB_CONJ (X [k], X [Offi [p]],
658  Offx [p]) ;
659  }
660  else
661 #endif
662  {
663  MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
664  }
665  }
666  }
667  break ;
668 
669  case 2:
670 
671  for (k = k1 ; k < k2 ; k++)
672  {
673  pend = Offp [k+1] ;
674  x [0] = X [2*k ] ;
675  x [1] = X [2*k + 1] ;
676  for (p = Offp [k] ; p < pend ; p++)
677  {
678  i = Offi [p] ;
679 #ifdef COMPLEX
680  if (conj_solve)
681  {
682  CONJ (offik, Offx [p]) ;
683  }
684  else
685 #endif
686  {
687  offik = Offx [p] ;
688  }
689  MULT_SUB (x [0], offik, X [2*i]) ;
690  MULT_SUB (x [1], offik, X [2*i + 1]) ;
691  }
692  X [2*k ] = x [0] ;
693  X [2*k + 1] = x [1] ;
694  }
695  break ;
696 
697  case 3:
698 
699  for (k = k1 ; k < k2 ; k++)
700  {
701  pend = Offp [k+1] ;
702  x [0] = X [3*k ] ;
703  x [1] = X [3*k + 1] ;
704  x [2] = X [3*k + 2] ;
705  for (p = Offp [k] ; p < pend ; p++)
706  {
707  i = Offi [p] ;
708 #ifdef COMPLEX
709  if (conj_solve)
710  {
711  CONJ (offik, Offx [p]) ;
712  }
713  else
714 #endif
715  {
716  offik = Offx [p] ;
717  }
718  MULT_SUB (x [0], offik, X [3*i]) ;
719  MULT_SUB (x [1], offik, X [3*i + 1]) ;
720  MULT_SUB (x [2], offik, X [3*i + 2]) ;
721  }
722  X [3*k ] = x [0] ;
723  X [3*k + 1] = x [1] ;
724  X [3*k + 2] = x [2] ;
725  }
726  break ;
727 
728  case 4:
729 
730  for (k = k1 ; k < k2 ; k++)
731  {
732  pend = Offp [k+1] ;
733  x [0] = X [4*k ] ;
734  x [1] = X [4*k + 1] ;
735  x [2] = X [4*k + 2] ;
736  x [3] = X [4*k + 3] ;
737  for (p = Offp [k] ; p < pend ; p++)
738  {
739  i = Offi [p] ;
740 #ifdef COMPLEX
741  if (conj_solve)
742  {
743  CONJ(offik, Offx [p]) ;
744  }
745  else
746 #endif
747  {
748  offik = Offx [p] ;
749  }
750  MULT_SUB (x [0], offik, X [4*i]) ;
751  MULT_SUB (x [1], offik, X [4*i + 1]) ;
752  MULT_SUB (x [2], offik, X [4*i + 2]) ;
753  MULT_SUB (x [3], offik, X [4*i + 3]) ;
754  }
755  X [4*k ] = x [0] ;
756  X [4*k + 1] = x [1] ;
757  X [4*k + 2] = x [2] ;
758  X [4*k + 3] = x [3] ;
759  }
760  break ;
761  }
762  }
763 
764  /* -------------------------------------------------------------- */
765  /* solve the block system */
766  /* -------------------------------------------------------------- */
767 
768  if (nk == 1)
769  {
770 #ifdef COMPLEX
771  if (conj_solve)
772  {
773  CONJ (s, Udiag [k1]) ;
774  }
775  else
776 #endif
777  {
778  s = Udiag [k1] ;
779  }
780  switch (nr)
781  {
782 
783  case 1:
784  DIV (X [k1], X [k1], s) ;
785  break ;
786 
787  case 2:
788  DIV (X [2*k1], X [2*k1], s) ;
789  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
790  break ;
791 
792  case 3:
793  DIV (X [3*k1], X [3*k1], s) ;
794  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
795  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
796  break ;
797 
798  case 4:
799  DIV (X [4*k1], X [4*k1], s) ;
800  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
801  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
802  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
803  break ;
804 
805  }
806  }
807  else
808  {
809  KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
810  Udiag + k1, nr,
811 #ifdef COMPLEX
812  conj_solve,
813 #endif
814  X + nr*k1) ;
815  KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
816 #ifdef COMPLEX
817  conj_solve,
818 #endif
819  X + nr*k1) ;
820  }
821  }
822 
823  /* ------------------------------------------------------------------ */
824  /* scale and permute the result, Bz = P'(R\X) */
825  /* store result in Xout */
826  /* ------------------------------------------------------------------ */
827 
828  if (Rs == NULL)
829  {
830 
831  /* no scaling */
832  switch (nr)
833  {
834 
835  case 1:
836 
837  for (k = 0 ; k < n ; k++)
838  {
839  Xout [Pnum [k]] = X [k] ;
840  }
841  break ;
842 
843  case 2:
844 
845  for (k = 0 ; k < n ; k++)
846  {
847  i = Pnum [k] ;
848  Xout [i ] = X [2*k ] ;
849  Xout [i + d ] = X [2*k + 1] ;
850  }
851  break ;
852 
853  case 3:
854 
855  for (k = 0 ; k < n ; k++)
856  {
857  i = Pnum [k] ;
858  Xout [i ] = X [3*k ] ;
859  Xout [i + d ] = X [3*k + 1] ;
860  Xout [i + d*2] = X [3*k + 2] ;
861  }
862  break ;
863 
864  case 4:
865 
866  for (k = 0 ; k < n ; k++)
867  {
868  i = Pnum [k] ;
869  Xout [i ] = X [4*k ] ;
870  Xout [i + d ] = X [4*k + 1] ;
871  Xout [i + d*2] = X [4*k + 2] ;
872  Xout [i + d*3] = X [4*k + 3] ;
873  }
874  break ;
875  }
876 
877  }
878  else
879  {
880 
881  switch (nr)
882  {
883 
884  case 1:
885 
886  for (k = 0 ; k < n ; k++)
887  {
888  SCALE_DIV_ASSIGN (Xout [Pnum [k]], X [k], Rs [k]) ;
889  }
890  break ;
891 
892  case 2:
893 
894  for (k = 0 ; k < n ; k++)
895  {
896  i = Pnum [k] ;
897  rs = Rs [k] ;
898  SCALE_DIV_ASSIGN (Xout [i], X [2*k], rs) ;
899  SCALE_DIV_ASSIGN (Xout [i + d], X [2*k + 1], rs) ;
900  }
901  break ;
902 
903  case 3:
904 
905  for (k = 0 ; k < n ; k++)
906  {
907  i = Pnum [k] ;
908  rs = Rs [k] ;
909  SCALE_DIV_ASSIGN (Xout [i], X [3*k], rs) ;
910  SCALE_DIV_ASSIGN (Xout [i + d], X [3*k + 1], rs) ;
911  SCALE_DIV_ASSIGN (Xout [i + d*2], X [3*k + 2], rs) ;
912  }
913  break ;
914 
915  case 4:
916 
917  for (k = 0 ; k < n ; k++)
918  {
919  i = Pnum [k] ;
920  rs = Rs [k] ;
921  SCALE_DIV_ASSIGN (Xout [i], X [4*k], rs) ;
922  SCALE_DIV_ASSIGN (Xout [i + d], X [4*k + 1], rs) ;
923  SCALE_DIV_ASSIGN (Xout [i + d*2], X [4*k + 2], rs) ;
924  SCALE_DIV_ASSIGN (Xout [i + d*3], X [4*k + 3], rs) ;
925  }
926  break ;
927  }
928  }
929 
930  /* ------------------------------------------------------------------ */
931  /* go to the next chunk of B */
932  /* ------------------------------------------------------------------ */
933 
934  Xout += d*4 ;
935  }
936  return (TRUE) ;
937 }
938 
939 #endif