Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_analyze.hpp
1 /* ========================================================================== */
2 /* === klu_analyze ========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // *****************************************************************************
6 // KLU2: A Direct Linear Solver package
7 //
8 // Copyright 2011 NTESS and the KLU2 contributors.
9 // SPDX-License-Identifier: LGPL-2.1-or-later
10 // *****************************************************************************
11 // @HEADER
12 
13 /* Order the matrix using BTF (or not), and then AMD, COLAMD, the natural
14  * ordering, or the user-provided-function on the blocks. Does not support
15  * using a given ordering (use klu_analyze_given for that case). */
16 
17 #ifndef KLU2_ANALYZE_HPP
18 #define KLU2_ANALYZE_HPP
19 
20 #include "klu2_internal.h"
21 #include "klu2_analyze_given.hpp"
22 #include "klu2_memory.hpp"
23 
24 /* ========================================================================== */
25 /* === analyze_worker ======================================================= */
26 /* ========================================================================== */
27 
28 template <typename Entry, typename Int>
29 static Int analyze_worker /* returns KLU_OK or < 0 if error */
30 (
31  /* inputs, not modified */
32  Int n, /* A is n-by-n */
33  Int Ap [ ], /* size n+1, column pointers */
34  Int Ai [ ], /* size nz, row indices */
35  Int nblocks, /* # of blocks */
36  Int Pbtf [ ], /* BTF row permutation */
37  Int Qbtf [ ], /* BTF col permutation */
38  Int R [ ], /* size n+1, but only Rbtf [0..nblocks] is used */
39  Int ordering, /* what ordering to use (0, 1, or 3 for this routine) */
40 
41  /* output only, not defined on input */
42  Int P [ ], /* size n */
43  Int Q [ ], /* size n */
44  double Lnz [ ], /* size n, but only Lnz [0..nblocks-1] is used */
45 
46  /* workspace, not defined on input or output */
47  Int Pblk [ ], /* size maxblock */
48  Int Cp [ ], /* size maxblock+1 */
49  Int Ci [ ], /* size MAX (nz+1, Cilen) */
50  Int Cilen, /* nz+1, or COLAMD_recommend(nz,n,n) for COLAMD */
51  Int Pinv [ ], /* size maxblock */
52 
53  /* input/output */
54  KLU_symbolic<Entry, Int> *Symbolic,
55  KLU_common<Entry, Int> *Common
56 )
57 {
58  double amd_Info [TRILINOS_AMD_INFO], lnz, lnz1, flops, flops1 ;
59  Int k1, k2, nk, k, block, oldcol, pend, newcol, result, pc, p, newrow,
60  maxnz, nzoff, cstats [TRILINOS_COLAMD_STATS], ok, err = KLU_INVALID ;
61 
62  /* ---------------------------------------------------------------------- */
63  /* initializations */
64  /* ---------------------------------------------------------------------- */
65  for (k = 0 ; k < TRILINOS_AMD_INFO ; k++)
66  {
67  amd_Info[k] = 0;
68  }
69 
70  /* compute the inverse of Pbtf */
71 #ifndef NDEBUGKLU2
72  for (k = 0 ; k < n ; k++)
73  {
74  P [k] = AMESOS2_KLU2_EMPTY ;
75  Q [k] = AMESOS2_KLU2_EMPTY ;
76  Pinv [k] = AMESOS2_KLU2_EMPTY ;
77  }
78 #endif
79  for (k = 0 ; k < n ; k++)
80  {
81  ASSERT (Pbtf [k] >= 0 && Pbtf [k] < n) ;
82  Pinv [Pbtf [k]] = k ;
83  }
84 #ifndef NDEBUGKLU2
85  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
86 #endif
87  nzoff = 0 ;
88  lnz = 0 ;
89  maxnz = 0 ;
90  flops = 0 ;
91  Symbolic->symmetry = AMESOS2_KLU2_EMPTY ; /* only computed by AMD */
92 
93  /* ---------------------------------------------------------------------- */
94  /* order each block */
95  /* ---------------------------------------------------------------------- */
96 
97  for (block = 0 ; block < nblocks ; block++)
98  {
99 
100  /* ------------------------------------------------------------------ */
101  /* the block is from rows/columns k1 to k2-1 */
102  /* ------------------------------------------------------------------ */
103 
104  k1 = R [block] ;
105  k2 = R [block+1] ;
106  nk = k2 - k1 ;
107  PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ;
108 
109  /* ------------------------------------------------------------------ */
110  /* construct the kth block, C */
111  /* ------------------------------------------------------------------ */
112 
113  Lnz [block] = AMESOS2_KLU2_EMPTY ;
114  pc = 0 ;
115  for (k = k1 ; k < k2 ; k++)
116  {
117  newcol = k-k1 ;
118  Cp [newcol] = pc ;
119  oldcol = Qbtf [k] ;
120  pend = Ap [oldcol+1] ;
121  for (p = Ap [oldcol] ; p < pend ; p++)
122  {
123  newrow = Pinv [Ai [p]] ;
124  if (newrow < k1)
125  {
126  nzoff++ ;
127  }
128  else
129  {
130  /* (newrow,newcol) is an entry in the block */
131  ASSERT (newrow < k2) ;
132  newrow -= k1 ;
133  Ci [pc++] = newrow ;
134  }
135  }
136  }
137  Cp [nk] = pc ;
138  maxnz = MAX (maxnz, pc) ;
139  ASSERT (KLU_valid (nk, Cp, Ci, NULL)) ;
140 
141  /* ------------------------------------------------------------------ */
142  /* order the block C */
143  /* ------------------------------------------------------------------ */
144 
145  if (nk <= 3)
146  {
147 
148  /* -------------------------------------------------------------- */
149  /* use natural ordering for tiny blocks (3-by-3 or less) */
150  /* -------------------------------------------------------------- */
151 
152  for (k = 0 ; k < nk ; k++)
153  {
154  Pblk [k] = k ;
155  }
156  lnz1 = nk * (nk + 1) / 2 ;
157  flops1 = nk * (nk - 1) / 2 + (nk-1)*nk*(2*nk-1) / 6 ;
158  ok = TRUE ;
159 
160  }
161  else if (ordering == 0)
162  {
163 
164  /* -------------------------------------------------------------- */
165  /* order the block with AMD (C+C') */
166  /* -------------------------------------------------------------- */
167 
168  /*result = AMD_order (nk, Cp, Ci, Pblk, NULL, amd_Info) ;*/
169  result = KLU_OrdinalTraits<Int>::amd_order (nk, Cp, Ci, Pblk,
170  NULL, amd_Info) ;
171  ok = (result >= TRILINOS_AMD_OK) ;
172  if (result == TRILINOS_AMD_OUT_OF_MEMORY)
173  {
174  err = KLU_OUT_OF_MEMORY ;
175  }
176 
177  /* account for memory usage in AMD */
178  Common->mempeak = ( size_t) (MAX (Common->mempeak,
179  Common->memusage + amd_Info [TRILINOS_AMD_MEMORY])) ;
180 
181  /* get the ordering statistics from AMD */
182  lnz1 = (Int) (amd_Info [TRILINOS_AMD_LNZ]) + nk ;
183  flops1 = 2 * amd_Info [TRILINOS_AMD_NMULTSUBS_LU] + amd_Info [TRILINOS_AMD_NDIV] ;
184  if (pc == maxnz)
185  {
186  /* get the symmetry of the biggest block */
187  Symbolic->symmetry = amd_Info [TRILINOS_AMD_SYMMETRY] ;
188  }
189 
190  }
191  else if (ordering == 1)
192  {
193 
194  /* -------------------------------------------------------------- */
195  /* order the block with COLAMD (C) */
196  /* -------------------------------------------------------------- */
197 
198  /* order (and destroy) Ci, returning column permutation in Cp.
199  * COLAMD "cannot" fail since the matrix has already been checked,
200  * and Ci allocated. */
201 
202  /*ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ;*/
203  ok = KLU_OrdinalTraits<Int>::colamd (nk, nk, Cilen, Ci, Cp,
204  NULL, cstats) ;
205  lnz1 = AMESOS2_KLU2_EMPTY ;
206  flops1 = AMESOS2_KLU2_EMPTY ;
207 
208  /* copy the permutation from Cp to Pblk */
209  for (k = 0 ; k < nk ; k++)
210  {
211  Pblk [k] = Cp [k] ;
212  }
213 
214  }
215  else
216  {
217 
218  /* -------------------------------------------------------------- */
219  /* pass the block to the user-provided ordering function */
220  /* -------------------------------------------------------------- */
221 
222  lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ;
223  flops1 = AMESOS2_KLU2_EMPTY ;
224  ok = (lnz1 != 0) ;
225  }
226 
227  if (!ok)
228  {
229  return (err) ; /* ordering method failed */
230  }
231 
232  /* ------------------------------------------------------------------ */
233  /* keep track of nnz(L) and flops statistics */
234  /* ------------------------------------------------------------------ */
235 
236  Lnz [block] = lnz1 ;
237  lnz = (lnz == AMESOS2_KLU2_EMPTY || lnz1 == AMESOS2_KLU2_EMPTY) ? AMESOS2_KLU2_EMPTY : (lnz + lnz1) ;
238  flops = (flops == AMESOS2_KLU2_EMPTY || flops1 == AMESOS2_KLU2_EMPTY) ? AMESOS2_KLU2_EMPTY : (flops + flops1) ;
239 
240  /* ------------------------------------------------------------------ */
241  /* combine the preordering with the BTF ordering */
242  /* ------------------------------------------------------------------ */
243 
244  PRINTF (("Pblk, 1-based:\n")) ;
245  for (k = 0 ; k < nk ; k++)
246  {
247  ASSERT (k + k1 < n) ;
248  ASSERT (Pblk [k] + k1 < n) ;
249  Q [k + k1] = Qbtf [Pblk [k] + k1] ;
250  }
251  for (k = 0 ; k < nk ; k++)
252  {
253  ASSERT (k + k1 < n) ;
254  ASSERT (Pblk [k] + k1 < n) ;
255  P [k + k1] = Pbtf [Pblk [k] + k1] ;
256  }
257  }
258 
259  PRINTF (("nzoff %d Ap[n] %d\n", nzoff, Ap [n])) ;
260  ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ;
261 
262  /* return estimates of # of nonzeros in L including diagonal */
263  Symbolic->lnz = lnz ; /* AMESOS2_KLU2_EMPTY if COLAMD used */
264  Symbolic->unz = lnz ;
265  Symbolic->nzoff = nzoff ;
266  Symbolic->est_flops = flops ; /* AMESOS2_KLU2_EMPTY if COLAMD or user-ordering used */
267  return (KLU_OK) ;
268 }
269 
270 
271 /* ========================================================================== */
272 /* === order_and_analyze ==================================================== */
273 /* ========================================================================== */
274 
275 /* Orders the matrix with or with BTF, then orders each block with AMD, COLAMD,
276  * or the user ordering function. Does not handle the natural or given
277  * ordering cases. */
278 
279 template <typename Entry, typename Int>
280 static KLU_symbolic<Entry, Int> *order_and_analyze /* returns NULL if error, or a valid
281  KLU_symbolic object if successful */
282 (
283  /* inputs, not modified */
284  Int n, /* A is n-by-n */
285  Int Ap [ ], /* size n+1, column pointers */
286  Int Ai [ ], /* size nz, row indices */
287  /* --------------------- */
288  KLU_common<Entry, Int> *Common
289 )
290 {
291  double work ;
292  KLU_symbolic<Entry, Int> *Symbolic ;
293  double *Lnz ;
294  Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ;
295  Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen,
296  *Work ;
297 
298  /* ---------------------------------------------------------------------- */
299  /* allocate the Symbolic object, and check input matrix */
300  /* ---------------------------------------------------------------------- */
301 
302  Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
303  if (Symbolic == NULL)
304  {
305  return (NULL) ;
306  }
307  P = Symbolic->P ;
308  Q = Symbolic->Q ;
309  R = Symbolic->R ;
310  Lnz = Symbolic->Lnz ;
311  nz = Symbolic->nz ;
312 
313  ordering = Common->ordering ;
314  if (ordering == 1)
315  {
316  /* COLAMD TODO : Should call the right version of COLAMD and other
317  external functions below */
318  /*Cilen = COLAMD_recommended (nz, n, n) ;*/
319  Cilen = KLU_OrdinalTraits<Int>::colamd_recommended (nz, n, n) ;
320  }
321  else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL))
322  {
323  /* AMD or user ordering function */
324  Cilen = nz+1 ;
325  }
326  else
327  {
328  /* invalid ordering */
329  Common->status = KLU_INVALID ;
330  KLU_free_symbolic (&Symbolic, Common) ;
331  return (NULL) ;
332  }
333 
334  /* AMD memory management routines */
335  trilinos_amd_malloc = Common->malloc_memory ;
336  trilinos_amd_free = Common->free_memory ;
337  trilinos_amd_calloc = Common->calloc_memory ;
338  trilinos_amd_realloc = Common->realloc_memory ;
339 
340  /* ---------------------------------------------------------------------- */
341  /* allocate workspace for BTF permutation */
342  /* ---------------------------------------------------------------------- */
343 
344  Pbtf = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
345  Qbtf = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
346  if (Common->status < KLU_OK)
347  {
348  KLU_free (Pbtf, n, sizeof (Int), Common) ;
349  KLU_free (Qbtf, n, sizeof (Int), Common) ;
350  KLU_free_symbolic (&Symbolic, Common) ;
351  return (NULL) ;
352  }
353 
354  /* ---------------------------------------------------------------------- */
355  /* get the common parameters for BTF and ordering method */
356  /* ---------------------------------------------------------------------- */
357 
358  do_btf = Common->btf ;
359  do_btf = (do_btf) ? TRUE : FALSE ;
360  Symbolic->ordering = ordering ;
361  Symbolic->do_btf = do_btf ;
362  Symbolic->structural_rank = AMESOS2_KLU2_EMPTY ;
363 
364  /* ---------------------------------------------------------------------- */
365  /* find the block triangular form (if requested) */
366  /* ---------------------------------------------------------------------- */
367 
368  Common->work = 0 ;
369  work = 0;
370 
371  if (do_btf)
372  {
373  Work = (Int *) KLU_malloc (5*n, sizeof (Int), Common) ;
374  if (Common->status < KLU_OK)
375  {
376  /* out of memory */
377  KLU_free (Pbtf, n, sizeof (Int), Common) ;
378  KLU_free (Qbtf, n, sizeof (Int), Common) ;
379  KLU_free_symbolic (&Symbolic, Common) ;
380  return (NULL) ;
381  }
382 
383  /* TODO : Correct version of BTF */
384  /*nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R,
385  &(Symbolic->structural_rank), Work) ;*/
386  nblocks = KLU_OrdinalTraits<Int>::btf_order (n, Ap, Ai,
387  Common->maxwork, &work, Pbtf, Qbtf, R,
388  &(Symbolic->structural_rank), Work) ;
389  Common->structural_rank = Symbolic->structural_rank ;
390  Common->work += work ;
391 
392  KLU_free (Work, 5*n, sizeof (Int), Common) ;
393 
394  /* unflip Qbtf if the matrix does not have full structural rank */
395  if (Symbolic->structural_rank < n)
396  {
397  for (k = 0 ; k < n ; k++)
398  {
399  Qbtf [k] = TRILINOS_BTF_UNFLIP (Qbtf [k]) ;
400  }
401  }
402 
403  /* find the size of the largest block */
404  maxblock = 1 ;
405  for (block = 0 ; block < nblocks ; block++)
406  {
407  k1 = R [block] ;
408  k2 = R [block+1] ;
409  nk = k2 - k1 ;
410  PRINTF (("block %d size %d\n", block, nk)) ;
411  maxblock = MAX (maxblock, nk) ;
412  }
413  }
414  else
415  {
416  /* BTF not requested */
417  nblocks = 1 ;
418  maxblock = n ;
419  R [0] = 0 ;
420  R [1] = n ;
421  for (k = 0 ; k < n ; k++)
422  {
423  Pbtf [k] = k ;
424  Qbtf [k] = k ;
425  }
426  }
427 
428  Symbolic->nblocks = nblocks ;
429 
430  PRINTF (("maxblock size %d\n", maxblock)) ;
431  Symbolic->maxblock = maxblock ;
432 
433  /* ---------------------------------------------------------------------- */
434  /* allocate more workspace, for analyze_worker */
435  /* ---------------------------------------------------------------------- */
436 
437  Pblk = (Int *) KLU_malloc (maxblock, sizeof (Int), Common) ;
438  Cp = (Int *) KLU_malloc (maxblock + 1, sizeof (Int), Common) ;
439  Ci = (Int *) KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ;
440  Pinv = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
441 
442  /* ---------------------------------------------------------------------- */
443  /* order each block of the BTF ordering, and a fill-reducing ordering */
444  /* ---------------------------------------------------------------------- */
445 
446  if (Common->status == KLU_OK)
447  {
448  PRINTF (("calling analyze_worker\n")) ;
449  Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R,
450  ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ;
451  PRINTF (("analyze_worker done\n")) ;
452  }
453 
454  /* ---------------------------------------------------------------------- */
455  /* free all workspace */
456  /* ---------------------------------------------------------------------- */
457 
458  KLU_free (Pblk, maxblock, sizeof (Int), Common) ;
459  KLU_free (Cp, maxblock+1, sizeof (Int), Common) ;
460  KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ;
461  KLU_free (Pinv, n, sizeof (Int), Common) ;
462  KLU_free (Pbtf, n, sizeof (Int), Common) ;
463  KLU_free (Qbtf, n, sizeof (Int), Common) ;
464 
465  /* ---------------------------------------------------------------------- */
466  /* return the symbolic object */
467  /* ---------------------------------------------------------------------- */
468 
469  if (Common->status < KLU_OK)
470  {
471  KLU_free_symbolic (&Symbolic, Common) ;
472  }
473  return (Symbolic) ;
474 }
475 
476 
477 /* ========================================================================== */
478 /* === KLU_analyze ========================================================== */
479 /* ========================================================================== */
480 
481 template <typename Entry, typename Int>
482 KLU_symbolic<Entry, Int> *KLU_analyze /* returns NULL if error, or a valid
483  KLU_symbolic object if successful */
484 (
485  /* inputs, not modified */
486  Int n, /* A is n-by-n */
487  Int Ap [ ], /* size n+1, column pointers */
488  Int Ai [ ], /* size nz, row indices */
489  /* -------------------- */
490  KLU_common<Entry, Int> *Common
491 )
492 {
493 
494  /* ---------------------------------------------------------------------- */
495  /* get the control parameters for BTF and ordering method */
496  /* ---------------------------------------------------------------------- */
497 
498  if (Common == NULL)
499  {
500  return (NULL) ;
501  }
502  Common->status = KLU_OK ;
503  Common->structural_rank = AMESOS2_KLU2_EMPTY ;
504 
505  /* ---------------------------------------------------------------------- */
506  /* order and analyze */
507  /* ---------------------------------------------------------------------- */
508 
509  if (Common->ordering == 2)
510  {
511  /* natural ordering */
512  /* TODO : c++ sees Ap, Ai as int *& Why ? */
513  /* TODO : c++ does not allow NULL to be passed directly */
514  Int *dummy = NULL ;
515  return (KLU_analyze_given (n, Ap, Ai, dummy, dummy, Common)) ;
516  }
517  else
518  {
519  /* order with P and Q */
520  return (order_and_analyze (n, Ap, Ai, Common)) ;
521  }
522 }
523 
524 #endif
int Ap[]
Column offsets.
Definition: klu2_simple.cpp:52
int Ai[]
Row values.
Definition: klu2_simple.cpp:54