Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_solve.hpp
1 /* ========================================================================== */
2 /* === KLU_solve ============================================================ */
3 /* ========================================================================== */
4 // @HEADER
5 // *****************************************************************************
6 // KLU2: A Direct Linear Solver package
7 //
8 // Copyright 2011 NTESS and the KLU2 contributors.
9 // SPDX-License-Identifier: LGPL-2.1-or-later
10 // *****************************************************************************
11 // @HEADER
12 
13 /* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
14  * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
15  * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
16  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
17  * Numeric->Iwork).
18  */
19 
20 #ifndef KLU2_SOLVE_HPP
21 #define KLU2_SOLVE_HPP
22 
23 #include "klu2_internal.h"
24 #include "klu2.hpp"
25 
26 template <typename Entry, typename Int>
27 Int KLU_solve
28 (
29  /* inputs, not modified */
30  KLU_symbolic<Entry, Int> *Symbolic,
31  KLU_numeric<Entry, Int> *Numeric,
32  Int d, /* leading dimension of B */
33  Int nrhs, /* number of right-hand-sides */
34 
35  /* right-hand-side on input, overwritten with solution to Ax=b on output */
36  /* TODO: Switched from double to Entry, check for all cases */
37  Entry B [ ], /* size n*nrhs, in column-oriented form, with
38  * leading dimension d. */
39  /* --------------- */
40  KLU_common<Entry, Int> *Common
41 )
42 {
43  Entry x [4], offik, s ;
44  double rs, *Rs ;
45  Entry *Offx, *X, *Bz, *Udiag ;
46  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
47  Unit **LUbx ;
48  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
49 
50  /* ---------------------------------------------------------------------- */
51  /* check inputs */
52  /* ---------------------------------------------------------------------- */
53 
54  if (Common == NULL)
55  {
56  return (FALSE) ;
57  }
58  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
59  B == NULL)
60  {
61  Common->status = KLU_INVALID ;
62  return (FALSE) ;
63  }
64  Common->status = KLU_OK ;
65 
66  /* ---------------------------------------------------------------------- */
67  /* get the contents of the Symbolic object */
68  /* ---------------------------------------------------------------------- */
69 
70  Bz = (Entry *) B ;
71  n = Symbolic->n ;
72  nblocks = Symbolic->nblocks ;
73  Q = Symbolic->Q ;
74  R = Symbolic->R ;
75 
76  /* ---------------------------------------------------------------------- */
77  /* get the contents of the Numeric object */
78  /* ---------------------------------------------------------------------- */
79 
80  ASSERT (nblocks == Numeric->nblocks) ;
81  Pnum = Numeric->Pnum ;
82  Offp = Numeric->Offp ;
83  Offi = Numeric->Offi ;
84  Offx = (Entry *) Numeric->Offx ;
85 
86  Lip = Numeric->Lip ;
87  Llen = Numeric->Llen ;
88  Uip = Numeric->Uip ;
89  Ulen = Numeric->Ulen ;
90  LUbx = (Unit **) Numeric->LUbx ;
91  Udiag = (Entry *) Numeric->Udiag ;
92 
93  Rs = Numeric->Rs ;
94  X = (Entry *) Numeric->Xwork ;
95 
96  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
97 
98  /* ---------------------------------------------------------------------- */
99  /* solve in chunks of 4 columns at a time */
100  /* ---------------------------------------------------------------------- */
101 
102  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
103  {
104 
105  /* ------------------------------------------------------------------ */
106  /* get the size of the current chunk */
107  /* ------------------------------------------------------------------ */
108 
109  nr = MIN (nrhs - chunk, 4) ;
110 
111  /* ------------------------------------------------------------------ */
112  /* scale and permute the right hand side, X = P*(R\B) */
113  /* ------------------------------------------------------------------ */
114 
115  if (Rs == NULL)
116  {
117 
118  /* no scaling */
119  switch (nr)
120  {
121 
122  case 1:
123 
124  for (k = 0 ; k < n ; k++)
125  {
126  X [k] = Bz [Pnum [k]] ;
127  }
128  break ;
129 
130  case 2:
131 
132  for (k = 0 ; k < n ; k++)
133  {
134  i = Pnum [k] ;
135  X [2*k ] = Bz [i ] ;
136  X [2*k + 1] = Bz [i + d ] ;
137  }
138  break ;
139 
140  case 3:
141 
142  for (k = 0 ; k < n ; k++)
143  {
144  i = Pnum [k] ;
145  X [3*k ] = Bz [i ] ;
146  X [3*k + 1] = Bz [i + d ] ;
147  X [3*k + 2] = Bz [i + d*2] ;
148  }
149  break ;
150 
151  case 4:
152 
153  for (k = 0 ; k < n ; k++)
154  {
155  i = Pnum [k] ;
156  X [4*k ] = Bz [i ] ;
157  X [4*k + 1] = Bz [i + d ] ;
158  X [4*k + 2] = Bz [i + d*2] ;
159  X [4*k + 3] = Bz [i + d*3] ;
160  }
161  break ;
162  }
163 
164  }
165  else
166  {
167 
168  switch (nr)
169  {
170 
171  case 1:
172 
173  for (k = 0 ; k < n ; k++)
174  {
175  SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
176  }
177  break ;
178 
179  case 2:
180 
181  for (k = 0 ; k < n ; k++)
182  {
183  i = Pnum [k] ;
184  rs = Rs [k] ;
185  SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
186  SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
187  }
188  break ;
189 
190  case 3:
191 
192  for (k = 0 ; k < n ; k++)
193  {
194  i = Pnum [k] ;
195  rs = Rs [k] ;
196  SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
197  SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
198  SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
199  }
200  break ;
201 
202  case 4:
203 
204  for (k = 0 ; k < n ; k++)
205  {
206  i = Pnum [k] ;
207  rs = Rs [k] ;
208  SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
209  SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
210  SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
211  SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
212  }
213  break ;
214  }
215  }
216 
217  /* ------------------------------------------------------------------ */
218  /* solve X = (L*U + Off)\X */
219  /* ------------------------------------------------------------------ */
220 
221  for (block = nblocks-1 ; block >= 0 ; block--)
222  {
223 
224  /* -------------------------------------------------------------- */
225  /* the block of size nk is from rows/columns k1 to k2-1 */
226  /* -------------------------------------------------------------- */
227 
228  k1 = R [block] ;
229  k2 = R [block+1] ;
230  nk = k2 - k1 ;
231  PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
232 
233  /* solve the block system */
234  if (nk == 1)
235  {
236  s = Udiag [k1] ;
237  switch (nr)
238  {
239 
240  case 1:
241  DIV (X [k1], X [k1], s) ;
242  break ;
243 
244  case 2:
245  DIV (X [2*k1], X [2*k1], s) ;
246  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
247  break ;
248 
249  case 3:
250  DIV (X [3*k1], X [3*k1], s) ;
251  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
252  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
253  break ;
254 
255  case 4:
256  DIV (X [4*k1], X [4*k1], s) ;
257  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
258  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
259  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
260  break ;
261 
262  }
263  }
264  else
265  {
266  KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
267  X + nr*k1) ;
268  KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
269  Udiag + k1, nr, X + nr*k1) ;
270  }
271 
272  /* -------------------------------------------------------------- */
273  /* block back-substitution for the off-diagonal-block entries */
274  /* -------------------------------------------------------------- */
275 
276  if (block > 0)
277  {
278  switch (nr)
279  {
280 
281  case 1:
282 
283  for (k = k1 ; k < k2 ; k++)
284  {
285  pend = Offp [k+1] ;
286  x [0] = X [k] ;
287  for (p = Offp [k] ; p < pend ; p++)
288  {
289  MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
290  }
291  }
292  break ;
293 
294  case 2:
295 
296  for (k = k1 ; k < k2 ; k++)
297  {
298  pend = Offp [k+1] ;
299  x [0] = X [2*k ] ;
300  x [1] = X [2*k + 1] ;
301  for (p = Offp [k] ; p < pend ; p++)
302  {
303  i = Offi [p] ;
304  offik = Offx [p] ;
305  MULT_SUB (X [2*i], offik, x [0]) ;
306  MULT_SUB (X [2*i + 1], offik, x [1]) ;
307  }
308  }
309  break ;
310 
311  case 3:
312 
313  for (k = k1 ; k < k2 ; k++)
314  {
315  pend = Offp [k+1] ;
316  x [0] = X [3*k ] ;
317  x [1] = X [3*k + 1] ;
318  x [2] = X [3*k + 2] ;
319  for (p = Offp [k] ; p < pend ; p++)
320  {
321  i = Offi [p] ;
322  offik = Offx [p] ;
323  MULT_SUB (X [3*i], offik, x [0]) ;
324  MULT_SUB (X [3*i + 1], offik, x [1]) ;
325  MULT_SUB (X [3*i + 2], offik, x [2]) ;
326  }
327  }
328  break ;
329 
330  case 4:
331 
332  for (k = k1 ; k < k2 ; k++)
333  {
334  pend = Offp [k+1] ;
335  x [0] = X [4*k ] ;
336  x [1] = X [4*k + 1] ;
337  x [2] = X [4*k + 2] ;
338  x [3] = X [4*k + 3] ;
339  for (p = Offp [k] ; p < pend ; p++)
340  {
341  i = Offi [p] ;
342  offik = Offx [p] ;
343  MULT_SUB (X [4*i], offik, x [0]) ;
344  MULT_SUB (X [4*i + 1], offik, x [1]) ;
345  MULT_SUB (X [4*i + 2], offik, x [2]) ;
346  MULT_SUB (X [4*i + 3], offik, x [3]) ;
347  }
348  }
349  break ;
350  }
351  }
352  }
353 
354  /* ------------------------------------------------------------------ */
355  /* permute the result, Bz = Q*X */
356  /* ------------------------------------------------------------------ */
357 
358  switch (nr)
359  {
360 
361  case 1:
362 
363  for (k = 0 ; k < n ; k++)
364  {
365  Bz [Q [k]] = X [k] ;
366  }
367  break ;
368 
369  case 2:
370 
371  for (k = 0 ; k < n ; k++)
372  {
373  i = Q [k] ;
374  Bz [i ] = X [2*k ] ;
375  Bz [i + d ] = X [2*k + 1] ;
376  }
377  break ;
378 
379  case 3:
380 
381  for (k = 0 ; k < n ; k++)
382  {
383  i = Q [k] ;
384  Bz [i ] = X [3*k ] ;
385  Bz [i + d ] = X [3*k + 1] ;
386  Bz [i + d*2] = X [3*k + 2] ;
387  }
388  break ;
389 
390  case 4:
391 
392  for (k = 0 ; k < n ; k++)
393  {
394  i = Q [k] ;
395  Bz [i ] = X [4*k ] ;
396  Bz [i + d ] = X [4*k + 1] ;
397  Bz [i + d*2] = X [4*k + 2] ;
398  Bz [i + d*3] = X [4*k + 3] ;
399  }
400  break ;
401  }
402 
403  /* ------------------------------------------------------------------ */
404  /* go to the next chunk of B */
405  /* ------------------------------------------------------------------ */
406 
407  Bz += d*4 ;
408  }
409  return (TRUE) ;
410 }
411 
412 
413 // Require Xout and B to have equal number of entries, pre-allocated
414 template <typename Entry, typename Int>
415 Int KLU_solve2
416 (
417  /* inputs, not modified */
418  KLU_symbolic<Entry, Int> *Symbolic,
419  KLU_numeric<Entry, Int> *Numeric,
420  Int d, /* leading dimension of B */
421  Int nrhs, /* number of right-hand-sides */
422 
423  /* right-hand-side on input, overwritten with solution to Ax=b on output */
424  /* TODO: Switched from double to Entry, check for all cases */
425  Entry B [ ], /* size n*1, in column-oriented form, with
426  * leading dimension d. */
427  Entry Xout [ ], /* size n*1, in column-oriented form, with
428  * leading dimension d. */
429  /* --------------- */
430  KLU_common<Entry, Int> *Common
431 )
432 {
433  Entry x [4], offik, s ;
434  double rs, *Rs ;
435  Entry *Offx, *X, *Bz, *Udiag ;
436  Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
437  Unit **LUbx ;
438  Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
439 
440  /* ---------------------------------------------------------------------- */
441  /* check inputs */
442  /* ---------------------------------------------------------------------- */
443 
444  if (Common == NULL)
445  {
446  return (FALSE) ;
447  }
448  if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
449  B == NULL || Xout == NULL)
450  {
451  Common->status = KLU_INVALID ;
452  return (FALSE) ;
453  }
454  Common->status = KLU_OK ;
455 
456  /* ---------------------------------------------------------------------- */
457  /* get the contents of the Symbolic object */
458  /* ---------------------------------------------------------------------- */
459 
460  Bz = (Entry *) B ;
461  n = Symbolic->n ;
462  nblocks = Symbolic->nblocks ;
463  Q = Symbolic->Q ;
464  R = Symbolic->R ;
465 
466  /* ---------------------------------------------------------------------- */
467  /* get the contents of the Numeric object */
468  /* ---------------------------------------------------------------------- */
469 
470  ASSERT (nblocks == Numeric->nblocks) ;
471  Pnum = Numeric->Pnum ;
472  Offp = Numeric->Offp ;
473  Offi = Numeric->Offi ;
474  Offx = (Entry *) Numeric->Offx ;
475 
476  Lip = Numeric->Lip ;
477  Llen = Numeric->Llen ;
478  Uip = Numeric->Uip ;
479  Ulen = Numeric->Ulen ;
480  LUbx = (Unit **) Numeric->LUbx ;
481  Udiag = (Entry *) Numeric->Udiag ;
482 
483  Rs = Numeric->Rs ;
484  X = (Entry *) Numeric->Xwork ;
485 
486  ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
487 
488  /* ---------------------------------------------------------------------- */
489  /* solve in chunks of 4 columns at a time */
490  /* ---------------------------------------------------------------------- */
491 
492  for (chunk = 0 ; chunk < nrhs ; chunk += 4)
493  {
494 
495  /* ------------------------------------------------------------------ */
496  /* get the size of the current chunk */
497  /* ------------------------------------------------------------------ */
498 
499  nr = MIN (nrhs - chunk, 4) ;
500 
501  /* ------------------------------------------------------------------ */
502  /* scale and permute the right hand side, X = P*(R\B) */
503  /* ------------------------------------------------------------------ */
504 
505  if (Rs == NULL)
506  {
507 
508  /* no scaling */
509  switch (nr)
510  {
511 
512  case 1:
513 
514  for (k = 0 ; k < n ; k++)
515  {
516  X [k] = Bz [Pnum [k]] ;
517  }
518  break ;
519 
520  case 2:
521 
522  for (k = 0 ; k < n ; k++)
523  {
524  i = Pnum [k] ;
525  X [2*k ] = Bz [i ] ;
526  X [2*k + 1] = Bz [i + d ] ;
527  }
528  break ;
529 
530  case 3:
531 
532  for (k = 0 ; k < n ; k++)
533  {
534  i = Pnum [k] ;
535  X [3*k ] = Bz [i ] ;
536  X [3*k + 1] = Bz [i + d ] ;
537  X [3*k + 2] = Bz [i + d*2] ;
538  }
539  break ;
540 
541  case 4:
542 
543  for (k = 0 ; k < n ; k++)
544  {
545  i = Pnum [k] ;
546  X [4*k ] = Bz [i ] ;
547  X [4*k + 1] = Bz [i + d ] ;
548  X [4*k + 2] = Bz [i + d*2] ;
549  X [4*k + 3] = Bz [i + d*3] ;
550  }
551  break ;
552  }
553 
554  }
555  else
556  {
557 
558  switch (nr)
559  {
560 
561  case 1:
562 
563  for (k = 0 ; k < n ; k++)
564  {
565  SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
566  }
567  break ;
568 
569  case 2:
570 
571  for (k = 0 ; k < n ; k++)
572  {
573  i = Pnum [k] ;
574  rs = Rs [k] ;
575  SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
576  SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
577  }
578  break ;
579 
580  case 3:
581 
582  for (k = 0 ; k < n ; k++)
583  {
584  i = Pnum [k] ;
585  rs = Rs [k] ;
586  SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
587  SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
588  SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
589  }
590  break ;
591 
592  case 4:
593 
594  for (k = 0 ; k < n ; k++)
595  {
596  i = Pnum [k] ;
597  rs = Rs [k] ;
598  SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
599  SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
600  SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
601  SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
602  }
603  break ;
604  }
605  }
606 
607  /* ------------------------------------------------------------------ */
608  /* solve X = (L*U + Off)\X */
609  /* ------------------------------------------------------------------ */
610 
611  for (block = nblocks-1 ; block >= 0 ; block--)
612  {
613 
614  /* -------------------------------------------------------------- */
615  /* the block of size nk is from rows/columns k1 to k2-1 */
616  /* -------------------------------------------------------------- */
617 
618  k1 = R [block] ;
619  k2 = R [block+1] ;
620  nk = k2 - k1 ;
621  PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
622 
623  /* solve the block system */
624  if (nk == 1)
625  {
626  s = Udiag [k1] ;
627  switch (nr)
628  {
629 
630  case 1:
631  DIV (X [k1], X [k1], s) ;
632  break ;
633 
634  case 2:
635  DIV (X [2*k1], X [2*k1], s) ;
636  DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
637  break ;
638 
639  case 3:
640  DIV (X [3*k1], X [3*k1], s) ;
641  DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
642  DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
643  break ;
644 
645  case 4:
646  DIV (X [4*k1], X [4*k1], s) ;
647  DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
648  DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
649  DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
650  break ;
651 
652  }
653  }
654  else
655  {
656  KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
657  X + nr*k1) ;
658  KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
659  Udiag + k1, nr, X + nr*k1) ;
660  }
661 
662  /* -------------------------------------------------------------- */
663  /* block back-substitution for the off-diagonal-block entries */
664  /* -------------------------------------------------------------- */
665 
666  if (block > 0)
667  {
668  switch (nr)
669  {
670 
671  case 1:
672 
673  for (k = k1 ; k < k2 ; k++)
674  {
675  pend = Offp [k+1] ;
676  x [0] = X [k] ;
677  for (p = Offp [k] ; p < pend ; p++)
678  {
679  MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
680  }
681  }
682  break ;
683 
684  case 2:
685 
686  for (k = k1 ; k < k2 ; k++)
687  {
688  pend = Offp [k+1] ;
689  x [0] = X [2*k ] ;
690  x [1] = X [2*k + 1] ;
691  for (p = Offp [k] ; p < pend ; p++)
692  {
693  i = Offi [p] ;
694  offik = Offx [p] ;
695  MULT_SUB (X [2*i], offik, x [0]) ;
696  MULT_SUB (X [2*i + 1], offik, x [1]) ;
697  }
698  }
699  break ;
700 
701  case 3:
702 
703  for (k = k1 ; k < k2 ; k++)
704  {
705  pend = Offp [k+1] ;
706  x [0] = X [3*k ] ;
707  x [1] = X [3*k + 1] ;
708  x [2] = X [3*k + 2] ;
709  for (p = Offp [k] ; p < pend ; p++)
710  {
711  i = Offi [p] ;
712  offik = Offx [p] ;
713  MULT_SUB (X [3*i], offik, x [0]) ;
714  MULT_SUB (X [3*i + 1], offik, x [1]) ;
715  MULT_SUB (X [3*i + 2], offik, x [2]) ;
716  }
717  }
718  break ;
719 
720  case 4:
721 
722  for (k = k1 ; k < k2 ; k++)
723  {
724  pend = Offp [k+1] ;
725  x [0] = X [4*k ] ;
726  x [1] = X [4*k + 1] ;
727  x [2] = X [4*k + 2] ;
728  x [3] = X [4*k + 3] ;
729  for (p = Offp [k] ; p < pend ; p++)
730  {
731  i = Offi [p] ;
732  offik = Offx [p] ;
733  MULT_SUB (X [4*i], offik, x [0]) ;
734  MULT_SUB (X [4*i + 1], offik, x [1]) ;
735  MULT_SUB (X [4*i + 2], offik, x [2]) ;
736  MULT_SUB (X [4*i + 3], offik, x [3]) ;
737  }
738  }
739  break ;
740  }
741  }
742  }
743 
744  /* ------------------------------------------------------------------ */
745  /* permute the result, Bz = Q*X */
746  /* store result in Xout */
747  /* ------------------------------------------------------------------ */
748 
749  switch (nr)
750  {
751 
752  case 1:
753 
754  for (k = 0 ; k < n ; k++)
755  {
756  Xout [Q [k]] = X [k] ;
757  }
758  break ;
759 
760  case 2:
761 
762  for (k = 0 ; k < n ; k++)
763  {
764  i = Q [k] ;
765  Xout [i ] = X [2*k ] ;
766  Xout [i + d ] = X [2*k + 1] ;
767  }
768  break ;
769 
770  case 3:
771 
772  for (k = 0 ; k < n ; k++)
773  {
774  i = Q [k] ;
775  Xout [i ] = X [3*k ] ;
776  Xout [i + d ] = X [3*k + 1] ;
777  Xout [i + d*2] = X [3*k + 2] ;
778  }
779  break ;
780 
781  case 4:
782 
783  for (k = 0 ; k < n ; k++)
784  {
785  i = Q [k] ;
786  Xout [i ] = X [4*k ] ;
787  Xout [i + d ] = X [4*k + 1] ;
788  Xout [i + d*2] = X [4*k + 2] ;
789  Xout [i + d*3] = X [4*k + 3] ;
790  }
791  break ;
792  }
793 
794  /* ------------------------------------------------------------------ */
795  /* go to the next chunk of B */
796  /* ------------------------------------------------------------------ */
797 
798  Xout += d*4 ;
799  }
800  return (TRUE) ;
801 }
802 
803 
804 #endif