Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
klu2_analyze.hpp
1 /* ========================================================================== */
2 /* === klu_analyze ========================================================== */
3 /* ========================================================================== */
4 // @HEADER
5 // ***********************************************************************
6 //
7 // KLU2: A Direct Linear Solver package
8 // Copyright 2011 Sandia Corporation
9 //
10 // Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11 // U.S. Government retains certain rights in this software.
12 //
13 // This library is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU Lesser General Public License as
15 // published by the Free Software Foundation; either version 2.1 of the
16 // License, or (at your option) any later version.
17 //
18 // This library is distributed in the hope that it will be useful, but
19 // WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 // Lesser General Public License for more details.
22 //
23 // You should have received a copy of the GNU Lesser General Public
24 // License along with this library; if not, write to the Free Software
25 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 // USA
27 // Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28 //
29 // KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30 // University of Florida. The Authors of KLU are Timothy A. Davis and
31 // Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32 // information for KLU.
33 //
34 // ***********************************************************************
35 // @HEADER
36 
37 /* Order the matrix using BTF (or not), and then AMD, COLAMD, the natural
38  * ordering, or the user-provided-function on the blocks. Does not support
39  * using a given ordering (use klu_analyze_given for that case). */
40 
41 #ifndef KLU2_ANALYZE_HPP
42 #define KLU2_ANALYZE_HPP
43 
44 #include "klu2_internal.h"
45 #include "klu2_analyze_given.hpp"
46 #include "klu2_memory.hpp"
47 
48 /* ========================================================================== */
49 /* === analyze_worker ======================================================= */
50 /* ========================================================================== */
51 
52 template <typename Entry, typename Int>
53 static Int analyze_worker /* returns KLU_OK or < 0 if error */
54 (
55  /* inputs, not modified */
56  Int n, /* A is n-by-n */
57  Int Ap [ ], /* size n+1, column pointers */
58  Int Ai [ ], /* size nz, row indices */
59  Int nblocks, /* # of blocks */
60  Int Pbtf [ ], /* BTF row permutation */
61  Int Qbtf [ ], /* BTF col permutation */
62  Int R [ ], /* size n+1, but only Rbtf [0..nblocks] is used */
63  Int ordering, /* what ordering to use (0, 1, or 3 for this routine) */
64 
65  /* output only, not defined on input */
66  Int P [ ], /* size n */
67  Int Q [ ], /* size n */
68  double Lnz [ ], /* size n, but only Lnz [0..nblocks-1] is used */
69 
70  /* workspace, not defined on input or output */
71  Int Pblk [ ], /* size maxblock */
72  Int Cp [ ], /* size maxblock+1 */
73  Int Ci [ ], /* size MAX (nz+1, Cilen) */
74  Int Cilen, /* nz+1, or COLAMD_recommend(nz,n,n) for COLAMD */
75  Int Pinv [ ], /* size maxblock */
76 
77  /* input/output */
78  KLU_symbolic<Entry, Int> *Symbolic,
79  KLU_common<Entry, Int> *Common
80 )
81 {
82  double amd_Info [TRILINOS_AMD_INFO], lnz, lnz1, flops, flops1 ;
83  Int k1, k2, nk, k, block, oldcol, pend, newcol, result, pc, p, newrow,
84  maxnz, nzoff, cstats [TRILINOS_COLAMD_STATS], ok, err = KLU_INVALID ;
85 
86  /* ---------------------------------------------------------------------- */
87  /* initializations */
88  /* ---------------------------------------------------------------------- */
89  for (k = 0 ; k < TRILINOS_AMD_INFO ; k++)
90  {
91  amd_Info[k] = 0;
92  }
93 
94  /* compute the inverse of Pbtf */
95 #ifndef NDEBUGKLU2
96  for (k = 0 ; k < n ; k++)
97  {
98  P [k] = EMPTY ;
99  Q [k] = EMPTY ;
100  Pinv [k] = EMPTY ;
101  }
102 #endif
103  for (k = 0 ; k < n ; k++)
104  {
105  ASSERT (Pbtf [k] >= 0 && Pbtf [k] < n) ;
106  Pinv [Pbtf [k]] = k ;
107  }
108 #ifndef NDEBUGKLU2
109  for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
110 #endif
111  nzoff = 0 ;
112  lnz = 0 ;
113  maxnz = 0 ;
114  flops = 0 ;
115  Symbolic->symmetry = EMPTY ; /* only computed by AMD */
116 
117  /* ---------------------------------------------------------------------- */
118  /* order each block */
119  /* ---------------------------------------------------------------------- */
120 
121  for (block = 0 ; block < nblocks ; block++)
122  {
123 
124  /* ------------------------------------------------------------------ */
125  /* the block is from rows/columns k1 to k2-1 */
126  /* ------------------------------------------------------------------ */
127 
128  k1 = R [block] ;
129  k2 = R [block+1] ;
130  nk = k2 - k1 ;
131  PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ;
132 
133  /* ------------------------------------------------------------------ */
134  /* construct the kth block, C */
135  /* ------------------------------------------------------------------ */
136 
137  Lnz [block] = EMPTY ;
138  pc = 0 ;
139  for (k = k1 ; k < k2 ; k++)
140  {
141  newcol = k-k1 ;
142  Cp [newcol] = pc ;
143  oldcol = Qbtf [k] ;
144  pend = Ap [oldcol+1] ;
145  for (p = Ap [oldcol] ; p < pend ; p++)
146  {
147  newrow = Pinv [Ai [p]] ;
148  if (newrow < k1)
149  {
150  nzoff++ ;
151  }
152  else
153  {
154  /* (newrow,newcol) is an entry in the block */
155  ASSERT (newrow < k2) ;
156  newrow -= k1 ;
157  Ci [pc++] = newrow ;
158  }
159  }
160  }
161  Cp [nk] = pc ;
162  maxnz = MAX (maxnz, pc) ;
163  ASSERT (KLU_valid (nk, Cp, Ci, NULL)) ;
164 
165  /* ------------------------------------------------------------------ */
166  /* order the block C */
167  /* ------------------------------------------------------------------ */
168 
169  if (nk <= 3)
170  {
171 
172  /* -------------------------------------------------------------- */
173  /* use natural ordering for tiny blocks (3-by-3 or less) */
174  /* -------------------------------------------------------------- */
175 
176  for (k = 0 ; k < nk ; k++)
177  {
178  Pblk [k] = k ;
179  }
180  lnz1 = nk * (nk + 1) / 2 ;
181  flops1 = nk * (nk - 1) / 2 + (nk-1)*nk*(2*nk-1) / 6 ;
182  ok = TRUE ;
183 
184  }
185  else if (ordering == 0)
186  {
187 
188  /* -------------------------------------------------------------- */
189  /* order the block with AMD (C+C') */
190  /* -------------------------------------------------------------- */
191 
192  /*result = AMD_order (nk, Cp, Ci, Pblk, NULL, amd_Info) ;*/
193  result = KLU_OrdinalTraits<Int>::amd_order (nk, Cp, Ci, Pblk,
194  NULL, amd_Info) ;
195  ok = (result >= TRILINOS_AMD_OK) ;
196  if (result == TRILINOS_AMD_OUT_OF_MEMORY)
197  {
198  err = KLU_OUT_OF_MEMORY ;
199  }
200 
201  /* account for memory usage in AMD */
202  Common->mempeak = ( size_t) (MAX (Common->mempeak,
203  Common->memusage + amd_Info [TRILINOS_AMD_MEMORY])) ;
204 
205  /* get the ordering statistics from AMD */
206  lnz1 = (Int) (amd_Info [TRILINOS_AMD_LNZ]) + nk ;
207  flops1 = 2 * amd_Info [TRILINOS_AMD_NMULTSUBS_LU] + amd_Info [TRILINOS_AMD_NDIV] ;
208  if (pc == maxnz)
209  {
210  /* get the symmetry of the biggest block */
211  Symbolic->symmetry = amd_Info [TRILINOS_AMD_SYMMETRY] ;
212  }
213 
214  }
215  else if (ordering == 1)
216  {
217 
218  /* -------------------------------------------------------------- */
219  /* order the block with COLAMD (C) */
220  /* -------------------------------------------------------------- */
221 
222  /* order (and destroy) Ci, returning column permutation in Cp.
223  * COLAMD "cannot" fail since the matrix has already been checked,
224  * and Ci allocated. */
225 
226  /*ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ;*/
227  ok = KLU_OrdinalTraits<Int>::colamd (nk, nk, Cilen, Ci, Cp,
228  NULL, cstats) ;
229  lnz1 = EMPTY ;
230  flops1 = EMPTY ;
231 
232  /* copy the permutation from Cp to Pblk */
233  for (k = 0 ; k < nk ; k++)
234  {
235  Pblk [k] = Cp [k] ;
236  }
237 
238  }
239  else
240  {
241 
242  /* -------------------------------------------------------------- */
243  /* pass the block to the user-provided ordering function */
244  /* -------------------------------------------------------------- */
245 
246  lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ;
247  flops1 = EMPTY ;
248  ok = (lnz1 != 0) ;
249  }
250 
251  if (!ok)
252  {
253  return (err) ; /* ordering method failed */
254  }
255 
256  /* ------------------------------------------------------------------ */
257  /* keep track of nnz(L) and flops statistics */
258  /* ------------------------------------------------------------------ */
259 
260  Lnz [block] = lnz1 ;
261  lnz = (lnz == EMPTY || lnz1 == EMPTY) ? EMPTY : (lnz + lnz1) ;
262  flops = (flops == EMPTY || flops1 == EMPTY) ? EMPTY : (flops + flops1) ;
263 
264  /* ------------------------------------------------------------------ */
265  /* combine the preordering with the BTF ordering */
266  /* ------------------------------------------------------------------ */
267 
268  PRINTF (("Pblk, 1-based:\n")) ;
269  for (k = 0 ; k < nk ; k++)
270  {
271  ASSERT (k + k1 < n) ;
272  ASSERT (Pblk [k] + k1 < n) ;
273  Q [k + k1] = Qbtf [Pblk [k] + k1] ;
274  }
275  for (k = 0 ; k < nk ; k++)
276  {
277  ASSERT (k + k1 < n) ;
278  ASSERT (Pblk [k] + k1 < n) ;
279  P [k + k1] = Pbtf [Pblk [k] + k1] ;
280  }
281  }
282 
283  PRINTF (("nzoff %d Ap[n] %d\n", nzoff, Ap [n])) ;
284  ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ;
285 
286  /* return estimates of # of nonzeros in L including diagonal */
287  Symbolic->lnz = lnz ; /* EMPTY if COLAMD used */
288  Symbolic->unz = lnz ;
289  Symbolic->nzoff = nzoff ;
290  Symbolic->est_flops = flops ; /* EMPTY if COLAMD or user-ordering used */
291  return (KLU_OK) ;
292 }
293 
294 
295 /* ========================================================================== */
296 /* === order_and_analyze ==================================================== */
297 /* ========================================================================== */
298 
299 /* Orders the matrix with or with BTF, then orders each block with AMD, COLAMD,
300  * or the user ordering function. Does not handle the natural or given
301  * ordering cases. */
302 
303 template <typename Entry, typename Int>
304 static KLU_symbolic<Entry, Int> *order_and_analyze /* returns NULL if error, or a valid
305  KLU_symbolic object if successful */
306 (
307  /* inputs, not modified */
308  Int n, /* A is n-by-n */
309  Int Ap [ ], /* size n+1, column pointers */
310  Int Ai [ ], /* size nz, row indices */
311  /* --------------------- */
312  KLU_common<Entry, Int> *Common
313 )
314 {
315  double work ;
316  KLU_symbolic<Entry, Int> *Symbolic ;
317  double *Lnz ;
318  Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ;
319  Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen,
320  *Work ;
321 
322  /* ---------------------------------------------------------------------- */
323  /* allocate the Symbolic object, and check input matrix */
324  /* ---------------------------------------------------------------------- */
325 
326  Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
327  if (Symbolic == NULL)
328  {
329  return (NULL) ;
330  }
331  P = Symbolic->P ;
332  Q = Symbolic->Q ;
333  R = Symbolic->R ;
334  Lnz = Symbolic->Lnz ;
335  nz = Symbolic->nz ;
336 
337  ordering = Common->ordering ;
338  if (ordering == 1)
339  {
340  /* COLAMD TODO : Should call the right version of COLAMD and other
341  external functions below */
342  /*Cilen = COLAMD_recommended (nz, n, n) ;*/
343  Cilen = KLU_OrdinalTraits<Int>::colamd_recommended (nz, n, n) ;
344  }
345  else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL))
346  {
347  /* AMD or user ordering function */
348  Cilen = nz+1 ;
349  }
350  else
351  {
352  /* invalid ordering */
353  Common->status = KLU_INVALID ;
354  KLU_free_symbolic (&Symbolic, Common) ;
355  return (NULL) ;
356  }
357 
358  /* AMD memory management routines */
359  trilinos_amd_malloc = Common->malloc_memory ;
360  trilinos_amd_free = Common->free_memory ;
361  trilinos_amd_calloc = Common->calloc_memory ;
362  trilinos_amd_realloc = Common->realloc_memory ;
363 
364  /* ---------------------------------------------------------------------- */
365  /* allocate workspace for BTF permutation */
366  /* ---------------------------------------------------------------------- */
367 
368  Pbtf = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
369  Qbtf = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
370  if (Common->status < KLU_OK)
371  {
372  KLU_free (Pbtf, n, sizeof (Int), Common) ;
373  KLU_free (Qbtf, n, sizeof (Int), Common) ;
374  KLU_free_symbolic (&Symbolic, Common) ;
375  return (NULL) ;
376  }
377 
378  /* ---------------------------------------------------------------------- */
379  /* get the common parameters for BTF and ordering method */
380  /* ---------------------------------------------------------------------- */
381 
382  do_btf = Common->btf ;
383  do_btf = (do_btf) ? TRUE : FALSE ;
384  Symbolic->ordering = ordering ;
385  Symbolic->do_btf = do_btf ;
386  Symbolic->structural_rank = EMPTY ;
387 
388  /* ---------------------------------------------------------------------- */
389  /* find the block triangular form (if requested) */
390  /* ---------------------------------------------------------------------- */
391 
392  Common->work = 0 ;
393  work = 0;
394 
395  if (do_btf)
396  {
397  Work = (Int *) KLU_malloc (5*n, sizeof (Int), Common) ;
398  if (Common->status < KLU_OK)
399  {
400  /* out of memory */
401  KLU_free (Pbtf, n, sizeof (Int), Common) ;
402  KLU_free (Qbtf, n, sizeof (Int), Common) ;
403  KLU_free_symbolic (&Symbolic, Common) ;
404  return (NULL) ;
405  }
406 
407  /* TODO : Correct version of BTF */
408  /*nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R,
409  &(Symbolic->structural_rank), Work) ;*/
410  nblocks = KLU_OrdinalTraits<Int>::btf_order (n, Ap, Ai,
411  Common->maxwork, &work, Pbtf, Qbtf, R,
412  &(Symbolic->structural_rank), Work) ;
413  Common->structural_rank = Symbolic->structural_rank ;
414  Common->work += work ;
415 
416  KLU_free (Work, 5*n, sizeof (Int), Common) ;
417 
418  /* unflip Qbtf if the matrix does not have full structural rank */
419  if (Symbolic->structural_rank < n)
420  {
421  for (k = 0 ; k < n ; k++)
422  {
423  Qbtf [k] = TRILINOS_BTF_UNFLIP (Qbtf [k]) ;
424  }
425  }
426 
427  /* find the size of the largest block */
428  maxblock = 1 ;
429  for (block = 0 ; block < nblocks ; block++)
430  {
431  k1 = R [block] ;
432  k2 = R [block+1] ;
433  nk = k2 - k1 ;
434  PRINTF (("block %d size %d\n", block, nk)) ;
435  maxblock = MAX (maxblock, nk) ;
436  }
437  }
438  else
439  {
440  /* BTF not requested */
441  nblocks = 1 ;
442  maxblock = n ;
443  R [0] = 0 ;
444  R [1] = n ;
445  for (k = 0 ; k < n ; k++)
446  {
447  Pbtf [k] = k ;
448  Qbtf [k] = k ;
449  }
450  }
451 
452  Symbolic->nblocks = nblocks ;
453 
454  PRINTF (("maxblock size %d\n", maxblock)) ;
455  Symbolic->maxblock = maxblock ;
456 
457  /* ---------------------------------------------------------------------- */
458  /* allocate more workspace, for analyze_worker */
459  /* ---------------------------------------------------------------------- */
460 
461  Pblk = (Int *) KLU_malloc (maxblock, sizeof (Int), Common) ;
462  Cp = (Int *) KLU_malloc (maxblock + 1, sizeof (Int), Common) ;
463  Ci = (Int *) KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ;
464  Pinv = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
465 
466  /* ---------------------------------------------------------------------- */
467  /* order each block of the BTF ordering, and a fill-reducing ordering */
468  /* ---------------------------------------------------------------------- */
469 
470  if (Common->status == KLU_OK)
471  {
472  PRINTF (("calling analyze_worker\n")) ;
473  Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R,
474  ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ;
475  PRINTF (("analyze_worker done\n")) ;
476  }
477 
478  /* ---------------------------------------------------------------------- */
479  /* free all workspace */
480  /* ---------------------------------------------------------------------- */
481 
482  KLU_free (Pblk, maxblock, sizeof (Int), Common) ;
483  KLU_free (Cp, maxblock+1, sizeof (Int), Common) ;
484  KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ;
485  KLU_free (Pinv, n, sizeof (Int), Common) ;
486  KLU_free (Pbtf, n, sizeof (Int), Common) ;
487  KLU_free (Qbtf, n, sizeof (Int), Common) ;
488 
489  /* ---------------------------------------------------------------------- */
490  /* return the symbolic object */
491  /* ---------------------------------------------------------------------- */
492 
493  if (Common->status < KLU_OK)
494  {
495  KLU_free_symbolic (&Symbolic, Common) ;
496  }
497  return (Symbolic) ;
498 }
499 
500 
501 /* ========================================================================== */
502 /* === KLU_analyze ========================================================== */
503 /* ========================================================================== */
504 
505 template <typename Entry, typename Int>
506 KLU_symbolic<Entry, Int> *KLU_analyze /* returns NULL if error, or a valid
507  KLU_symbolic object if successful */
508 (
509  /* inputs, not modified */
510  Int n, /* A is n-by-n */
511  Int Ap [ ], /* size n+1, column pointers */
512  Int Ai [ ], /* size nz, row indices */
513  /* -------------------- */
514  KLU_common<Entry, Int> *Common
515 )
516 {
517 
518  /* ---------------------------------------------------------------------- */
519  /* get the control parameters for BTF and ordering method */
520  /* ---------------------------------------------------------------------- */
521 
522  if (Common == NULL)
523  {
524  return (NULL) ;
525  }
526  Common->status = KLU_OK ;
527  Common->structural_rank = EMPTY ;
528 
529  /* ---------------------------------------------------------------------- */
530  /* order and analyze */
531  /* ---------------------------------------------------------------------- */
532 
533  if (Common->ordering == 2)
534  {
535  /* natural ordering */
536  /* TODO : c++ sees Ap, Ai as int *& Why ? */
537  /* TODO : c++ does not allow NULL to be passed directly */
538  Int *dummy = NULL ;
539  return (KLU_analyze_given (n, Ap, Ai, dummy, dummy, Common)) ;
540  }
541  else
542  {
543  /* order with P and Q */
544  return (order_and_analyze (n, Ap, Ai, Common)) ;
545  }
546 }
547 
548 #endif