IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
Ifpack_IKLU_Utils.cpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Ifpack_ConfigDefs.h"
44 #include "Ifpack_IKLU_Utils.h"
45 
46 //-----------------------------------------------------------------
47 // Allocation and Destruction
48 //-----------------------------------------------------------------
49 
50 /* allocate a sparse matrix (triplet form or compressed-column form) */
51 csr *csr_spalloc (int m, int n, int nzmax, int values, int triplet)
52 {
53  csr *A = (csr*)calloc (1, sizeof (csr)) ; /* allocate the csr struct */
54  if (!A) return (NULL) ; /* out of memory */
55  A->m = m ; /* define dimensions and nzmax */
56  A->n = n ;
57  A->nzmax = nzmax = CS_MAX (nzmax, 1) ;
58  A->nz = triplet ? 0 : -1 ; /* allocate triplet or comp.row */
59  A->p = (int*)malloc (triplet ? CS_MAX(nzmax,1) * sizeof (int) : CS_MAX(m+1,1) * sizeof (int)) ;
60  A->j = (int*)malloc (CS_MAX(nzmax,1) * sizeof (int)) ;
61  A->x = values ? (double*)malloc (CS_MAX(nzmax,1) * sizeof (double)) : NULL ;
62  return ((!A->p || !A->j || (values && !A->x)) ? csr_spfree (A) : A) ;
63 }
64 
65 /* change the max # of entries sparse matrix */
66 int csr_sprealloc (csr *A, int nzmax)
67 {
68  int ok, oki, okj = 1, okx = 1 ;
69  if (!A) return (0) ;
70  nzmax = (nzmax <= 0) ? (A->p [A->m]) : nzmax ;
71  A->j = (int*)csr_realloc (A->j, nzmax, sizeof (int), &oki) ;
72  if (CS_TRIPLET (A)) A->p = (int*)csr_realloc (A->p, nzmax, sizeof (int), &okj) ;
73  if (A->x) A->x = (double*)csr_realloc (A->x, nzmax, sizeof (double), &okx) ;
74  ok = (oki && okj && okx) ;
75  if (ok) A->nzmax = nzmax ;
76  return (ok) ;
77 }
78 
79 /* wrapper for realloc */
80 void *csr_realloc (void *p, int n, size_t size, int *ok)
81 {
82  void *pnew ;
83  pnew = realloc (p, CS_MAX (n,1) * size) ; /* realloc the block */
84  *ok = (pnew != NULL) ; /* realloc fails if pnew is NULL */
85  return ((*ok) ? pnew : p) ; /* return original p if failure */
86 }
87 
88 /* free a sparse matrix */
89 csr *csr_spfree (csr *A)
90 {
91  if (!A) return (NULL); /* do nothing if A already NULL */
92  if (A->p) free(A->p);
93  if (A->j) free(A->j);
94  if (A->x) free(A->x);
95  if (A) free(A);
96  return (NULL) ; /* free the csr struct and return NULL */
97 }
98 
99 /* free a symbolic factorization */
100 css *csr_sfree (css *S)
101 {
102  if (!S) return (NULL) ; /* do nothing if S already NULL */
103  if (S->pinv) free(S->pinv);
104  if (S->q) free(S->q);
105  if (S->parent) free(S->parent);
106  if (S->cp) free(S->cp);
107  if (S->leftmost) free(S->leftmost);
108  if (S) free(S);
109  return (NULL) ; /* free the css struct and return NULL */
110 }
111 
112 csrn *csr_nfree (csrn *N)
113 {
114  if (!N) return (NULL) ; /* do nothing if N already NULL */
115  csr_spfree (N->L) ;
116  csr_spfree (N->U) ;
117  if (N->pinv) free(N->pinv);
118  if (N->perm) free(N->perm);
119  if (N->B) free(N->B);
120  if (N) free(N);
121  return (NULL) ; /* free the csn struct and return NULL */
122 }
123 
124 /* free workspace and return a sparse matrix result */
125 csr *csr_done (csr *C, void *w, void *x, int ok)
126 {
127  if (w) free(w); /* free workspace */
128  if (x) free(x);
129  return (ok ? C : csr_spfree (C)) ; /* return result if OK, else free it */
130 }
131 
132 /* free workspace and return a numeric factorization (Cholesky, LU, or QR) */
133 csrn *csr_ndone (csrn *N, csr *C, void *w, void *x, int ok)
134 {
135  csr_spfree (C) ; /* free temporary matrix */
136  if (w) free(w); /* free workspace */
137  if (x) free(x);
138  return (ok ? N : csr_nfree (N)) ; /* return result if OK, else free it */
139 }
140 
141 /* free workspace and return int array result */
142 int *csr_idone (int *p, csr *C, void *w, int ok)
143 {
144  csr_spfree (C) ; /* free temporary matrix */
145  if (w) free(w); /* free workspace */
146  /* return result if OK, else free it */
147  if (ok)
148  return p;
149  else {
150  if (p) free(p);
151  return NULL;
152  }
153 }
154 
155 //-----------------------------------------------------------------
156 // Basic CSR math routines
157 //-----------------------------------------------------------------
158 
159 /* p [0..n] = cumulative sum of c [0..n-1], and then copy p [0..n-1] into c */
160 double csr_cumsum (int *p, int *c, int n)
161 {
162  int i, nz = 0 ;
163  double nz2 = 0 ;
164  if (!p || !c) return (-1) ; /* check inputs */
165  for (i = 0 ; i < n ; i++)
166  {
167  p [i] = nz ;
168  nz += c [i] ;
169  nz2 += c [i] ; /* also in double to avoid int overflow */
170  c [i] = p [i] ; /* also copy p[0..n-1] back into c[0..n-1]*/
171  }
172  p [n] = nz ;
173  return (nz2) ; /* return sum (c [0..n-1]) */
174 }
175 
176 /* x = x + alpha * B(i,:), where x is a dense vector and B(i,:) is sparse */
177 int csr_scatter (const csr *B, int i, double alpha, int *w, double *x, int mark,
178  csr *C, int nz)
179 {
180  int j, p, *Bp, *Bj, *Cj ;
181  double *Bx ;
182  if (!CS_CSC (B) || !w || !CS_CSC (C)) return (-1) ; /* check inputs */
183  Bp = B->p ; Bj = B->j ; Bx = B->x ; Cj = C->j ;
184  for (p = Bp [i] ; p < Bp [i+1] ; p++)
185  {
186  j = Bj [p] ; /* B(i,j) is nonzero */
187  if (w [j] < mark)
188  {
189  w [j] = mark ; /* j is new entry in row i */
190  Cj [nz++] = j ; /* add j to pattern of C(i,:) */
191  if (x) x [j] = alpha * Bx [p] ; /* x(j) = alpha*B(i,j) */
192  }
193  else if (x) x [j] += alpha * Bx [p] ; /* j exists in C(i,:) already */
194  }
195  return (nz) ;
196 }
197 
198 /* C = alpha*A + beta*B */
199 csr *csr_add (const csr *A, const csr *B, double alpha, double beta)
200 {
201  int p, j, nz = 0, anz, *Cp, *Cj, *Bp, m, n, bnz, *w, values ;
202  double *x, *Bx, *Cx ;
203  csr *C ;
204  if (!CS_CSC (A) || !CS_CSC (B)) return (NULL) ; /* check inputs */
205  if ( (A->m != B->m) || (A->n != B->n) ) return (NULL);
206  m = A->m ; anz = A->p [m] ;
207  n = B->n ; Bp = B->p ; Bx = B->x ; bnz = Bp [m] ;
208  w = (int*)calloc (CS_MAX(n,1), sizeof (int)) ; /* get workspace */
209  values = (A->x != NULL) && (Bx != NULL) ;
210  x = values ? (double*)malloc (n * sizeof (double)) : NULL ; /* get workspace */
211  C = csr_spalloc (m, n, anz + bnz, values, 0) ; /* allocate result*/
212  if (!C || !w || (values && !x)) return (csr_done (C, w, x, 0)) ;
213  Cp = C->p ; Cj = C->j ; Cx = C->x ;
214  for (j = 0 ; j < n ; j++)
215  {
216  Cp [j] = nz ; /* row j of C starts here */
217  nz = csr_scatter (A, j, alpha, w, x, j+1, C, nz) ; /* alpha*A(j,:)*/
218  nz = csr_scatter (B, j, beta, w, x, j+1, C, nz) ; /* beta*B(j,:) */
219  if (values) for (p = Cp [j] ; p < nz ; p++) Cx [p] = x [Cj [p]] ;
220  }
221  Cp [m] = nz ; /* finalize the last row of C */
222  csr_sprealloc (C, 0) ; /* remove extra space from C */
223  return (csr_done (C, w, x, 1)) ; /* success; free workspace, return C */
224 }
225 
226 /* C = A' */
227 csr *csr_transpose (const csr *A, int values)
228 {
229  int p, q, i, *Cp, *Cj, n, m, *Ap, *Aj, *w ;
230  double *Cx, *Ax ;
231  csr *C ;
232  if (!CS_CSC (A)) return (NULL) ; /* check inputs */
233  m = A->m ; n = A->n ; Ap = A->p ; Aj = A->j ; Ax = A->x ;
234  C = csr_spalloc (n, m, Ap [m], values && Ax, 0) ; /* allocate result */
235  w = (int*)calloc (CS_MAX(n,1), sizeof (int)) ; /* get workspace */
236  if (!C || !w) return (csr_done (C, w, NULL, 0)) ; /* out of memory */
237  Cp = C->p ; Cj = C->j ; Cx = C->x ;
238  for (p = 0 ; p < Ap [m] ; p++) w [Aj [p]]++ ; /* col counts */
239  csr_cumsum (Cp, w, n) ; /* col pointers */
240  for (i = 0 ; i < m ; i++)
241  {
242  for (p = Ap [i] ; p < Ap [i+1] ; p++)
243  {
244  Cj [q = w [Aj [p]]++] = i ; /* place A(i,j) as entry C(j,i) */
245  if (Cx) Cx [q] = Ax [p] ;
246  }
247  }
248  return (csr_done (C, w, NULL, 1)) ; /* success; free w and return C */
249 }
250 
251 /* C = A*B */
252 csr *csr_multiply (const csr *A, const csr *B)
253 {
254  int p, j, nz = 0, anz, *Cp, *Cj, *Ap, m, k, n, bnz, *w, values, *Aj;
255  double *x, *Ax, *Cx ;
256  csr *C ;
257  if (!CS_CSC (A) || !CS_CSC (B)) return (NULL) ; /* check inputs */
258  k = A->n;
259  if (k != B->m ) return(NULL);
260  m = A->m ; anz = A->p [A->m] ;
261  n = B->n ; Ap = A->p ; Aj = A->j ; Ax = A->x ; bnz = B->p [k] ;
262  w = (int*)calloc (CS_MAX(n,1), sizeof (int)) ; /* get workspace */
263  values = (Ax != NULL) && (B->x != NULL) ;
264  x = values ? (double*)malloc (n * sizeof (double)) : NULL ; /* get workspace */
265  C = csr_spalloc (m, n, anz + bnz, values, 0) ; /* allocate result */
266  if (!C || !w || (values && !x)) return (csr_done (C, w, x, 0)) ;
267  Cp = C->p ;
268  for (j = 0 ; j < m ; j++)
269  {
270  if (nz + n > C->nzmax && !csr_sprealloc (C, 2*(C->nzmax)+m))
271  {
272  return (csr_done (C, w, x, 0)) ; /* out of memory */
273  }
274  Cj = C->j ; Cx = C->x ; /* C->j and C->x may be reallocated */
275  Cp [j] = nz ; /* row j of C starts here */
276  for (p = Ap [j] ; p < Ap [j+1] ; p++)
277  {
278  nz = csr_scatter (B, Aj [p], Ax ? Ax [p] : 1, w, x, j+1, C, nz) ;
279  }
280  if (values) for (p = Cp [j] ; p < nz ; p++) Cx [p] = x [Cj [p]] ;
281  }
282  Cp [m] = nz ; /* finalize the last column of C */
283  csr_sprealloc (C, 0) ; /* remove extra space from C */
284  return (csr_done (C, w, x, 1)) ; /* success; free workspace, return C */
285 }
286 
287 
288 //-----------------------------------------------------------------
289 // Symbolic analysis
290 //-----------------------------------------------------------------
291 
292 /* symbolic ordering and analysis for LU */
293 css *csr_sqr (int order, const csr *A )
294 {
295  int n, ok = 1;
296  css *S ;
297  if (!CS_CSC (A)) return (NULL) ; /* check inputs */
298  n = A->n ;
299  S = (css*)calloc(1, sizeof (css)) ; /* allocate result S */
300  if (!S) return (NULL) ; /* out of memory */
301  S->q = csr_amd (order, A) ; /* fill-reducing ordering */
302  if (!S->q)
303  {
304  printf(" csr_sqr error no permutation\n");
305  }
306  if (order && !S->q) return (csr_sfree (S)) ;
307 
308  /* LU factorization */
309  S->unz = (double) CS_MIN(4*(A->p [n]) + n, n * n );
310  S->lnz = S->unz ; /* guess nnz(L) and nnz(U) */
311  return (ok ? S : csr_sfree (S)) ; /* return result S */
312 }
313 
314 
315 /* xi [top...n-1] = nodes reachable from graph of G*P' via nodes in B(:,k).
316  * xi [n...2n-1] used as workspace */
317 int csr_reach (csr *G, const csr *B, int k, int *xi, const int *pinv)
318 {
319  int p, m, top, *Bp, *Bj, *Gp ;
320  if (!CS_CSC (G) || !CS_CSC (B) || !xi) return (-1) ; /* check inputs */
321  m = G->m ; Bp = B->p ; Bj = B->j ; Gp = G->p ;
322  top = m ;
323  for (p = Bp [k] ; p < Bp [k+1] ; p++)
324  {
325  if (!CS_MARKED (Gp, Bj [p])) /* start a dfs at unmarked node i */
326  {
327  top = csr_dfs (Bj [p], G, top, xi, xi+m, pinv) ;
328  }
329  }
330  for (p = top ; p < m ; p++) CS_MARK (Gp, xi [p]) ; /* restore G */
331  return (top) ;
332 }
333 
334 /* depth-first-search of the graph of a csr matrix, starting at node j */
335 int csr_dfs (int j, csr *G, int top, int *xi, int *pstack, const int *pinv)
336 {
337  int i, p, p2, done, jnew, head = 0, *Gp, *Gj ;
338  if (!CS_CSC (G) || !xi || !pstack) return (-1) ; /* check inputs */
339  Gp = G->p ; Gj = G->j ;
340  xi [0] = j ; /* initialize the recursion stack */
341  while (head >= 0)
342  {
343  j = xi [head] ; /* get j from the top of the recursion stack */
344  jnew = pinv ? (pinv [j]) : j ;
345  if (!CS_MARKED (Gp, j))
346  {
347  CS_MARK (Gp, j) ; /* mark node j as visited */
348  pstack [head] = (jnew < 0) ? 0 : CS_UNFLIP (Gp [jnew]) ;
349  }
350  done = 1 ; /* node j done if no unvisited neighbors */
351  p2 = (jnew < 0) ? 0 : CS_UNFLIP (Gp [jnew+1]) ;
352  for (p = pstack [head] ; p < p2 ; p++) /* examine all neighbors of j */
353  {
354  i = Gj [p] ; /* consider neighbor node i */
355  if (CS_MARKED (Gp, i)) continue ; /* skip visited node i */
356  pstack [head] = p ; /* pause depth-first search of node j */
357  xi [++head] = i ; /* start dfs at node i */
358  done = 0 ; /* node j is not done */
359  break ; /* break, to start dfs (i) */
360  }
361  if (done) /* depth-first search at node j is done */
362  {
363  head-- ; /* remove j from the recursion stack */
364  xi [--top] = j ; /* and place in the output stack */
365  }
366  }
367  return (top) ;
368 }
369 
370 /* depth-first search and postorder of a tree rooted at node j */
371 int csr_tdfs (int j, int k, int *head, const int *next, int *post, int *stack)
372 {
373  int i, p, top = 0 ;
374  if (!head || !next || !post || !stack) return (-1) ; /* check inputs */
375  stack [0] = j ; /* place j on the stack */
376  while (top >= 0) /* while (stack is not empty) */
377  {
378  p = stack [top] ; /* p = top of stack */
379  i = head [p] ; /* i = youngest child of p */
380  if (i == -1)
381  {
382  top-- ; /* p has no unordered children left */
383  post [k++] = p ; /* node p is the kth postordered node */
384  }
385  else
386  {
387  head [p] = next [i] ; /* remove i from children of p */
388  stack [++top] = i ; /* start dfs on child node i */
389  }
390  }
391  return (k) ;
392 }
393 
394 //-----------------------------------------------------------------
395 // LU factorization
396 //-----------------------------------------------------------------
397 
398 /*
399  * Given sparse A,
400  * [L,U,pinv]=lu(A, [q lnz unz]). lnz and unz can be guesses
401  * Hypotheses: m=n, Lj[Lp[i+1]-1]=i and Uj[Lp[i]]=i
402  */
403 csrn *csr_lu (const csr *A, const css *S, double tol)
404 {
405  csr *L, *U ;
406  csrn *N ;
407  double pivot, *Lx, *Ux, *x, a, t;
408  int *Lp, *Lj, *Up, *Uj, *pinv, *xi, *q, n, ipiv, k, top, p, i, row, lnz, unz;
409  int debug = 0;
410 
411  if (!CS_CSC (A) ) printf(" error csrlu: A not csc\n");
412  if (!CS_CSC (A) || !S) return (NULL) ; /* check inputs */
413  n = A->n ;
414  if (n != A->m) return (NULL) ; /* check inputs */
415  q = S->q ; lnz = (int)S->lnz ; unz = (int)S->unz ;
416  x = (double*)malloc(n * sizeof (double)) ; /* get double workspace */
417  xi = (int*)malloc (2*n * sizeof (int)) ; /* get int workspace */
418  N = (csrn*)calloc (1, sizeof (csrn)) ; /* allocate result */
419  if (!(A) ) printf(" error csrlu: allocation of N failed\n");
420  if (!x || !xi || !N) return (csr_ndone (N, NULL, xi, x, 0)) ;
421 
422  N->L = L = csr_spalloc (n, n, lnz, 1, 0) ; /* allocate result L */
423  N->U = U = csr_spalloc (n, n, unz, 1, 0) ; /* allocate result U */
424  N->pinv = pinv = (int*)malloc (n * sizeof (int)) ; /* allocate result pinv */
425  N->perm = (int*)malloc (n * sizeof (int)) ; /* allocate result perm */
426  if (!L || !U || !pinv) return (csr_ndone (N, NULL, xi, x, 0)) ;
427  Lp = L->p ; Up = U->p ;
428  for (i = 0 ; i < n ; i++) x [i] = 0 ; /* clear workspace */
429  for (i = 0 ; i < n ; i++) pinv [i] = -1 ; /* no rows pivotal yet */
430  for (k = 1 ; k <= n ; k++) Up [k] = 0 ; /* no rows of U yet */
431  for (k = 1 ; k <= n ; k++) Lp [k] = 0 ; /* no rows of L yet either */
432  lnz = unz = 0 ;
433  if( debug )
434  {
435  printf ("A:\n") ; csr_print (A, 0) ;
436  }
437  for (k = 0 ; k < n ; k++) /* compute L(:,k) and U(:,k) */
438  {
439  /* --- Triangular solve --------------------------------------------- */
440  Lp [k] = lnz ; /* L(:,k) starts here */
441  Up [k] = unz ; /* U(:,k) starts here */
442  if ((lnz + n > L->nzmax && !csr_sprealloc (L, 2*L->nzmax + n)) ||
443  (unz + n > U->nzmax && !csr_sprealloc (U, 2*U->nzmax + n)))
444  {
445  return (csr_ndone (N, NULL, xi, x, 0)) ;
446  }
447  Lj = L->j ; Lx = L->x ; Uj = U->j ; Ux = U->x ;
448  row = q ? (q [k]) : k ;
449  if( debug > 1 )
450  {
451  printf("--------------------------------\n");
452  printf(" %d spsolve row=%d \n",k, row);
453  printf(" pinv = %d %d %d %d \n", pinv[0], pinv[1], pinv[2], pinv[3]);
454  }
455  top = csr_spsolve (U, A, row, xi, x, pinv, 1) ; /* x = U\A(row,:) */
456  if( debug > 1 ) printf("top=%d x = %g %g %g %g \n", top,x[0],x[1],x[2],x[3]);
457  /* --- Find pivot --------------------------------------------------- */
458  ipiv = -1 ;
459  a = -1 ;
460  for (p = top ; p < n ; p++)
461  {
462  i = xi [p] ; /* x(i) is nonzero */
463  if (pinv [i] < 0) /* row i is not yet pivotal */
464  {
465  if ((t = fabs (x [i])) > a)
466  {
467  a = t ; /* largest pivot candidate so far */
468  ipiv = i ;
469  }
470  }
471  else /* x(i) is the entry L(pinv[i],k) */
472  {
473  Lj [lnz] = pinv [i] ;
474  Lx [lnz++] = x [i] ;
475  }
476  }
477  if (ipiv == -1 || a <= 0) return (csr_ndone (N, NULL, xi, x, 0)) ;
478  if (pinv [row] < 0 && fabs (x [row]) >= a*tol) ipiv = row;
479  pivot = x [ipiv] ; /* the chosen pivot */
480  Lj [lnz] = k ; /* last entry in L(:,k) is L(k,k) */
481  Lx [lnz++] = pivot ;
482  if( debug > 1 ) { printf ("L:") ; csr_print (L, 0) ; }
483 
484  /* --- Divide by pivot ---------------------------------------------- */
485  pinv [ipiv] = k ; /* ipiv is the kth pivot row */
486  Uj [unz] = ipiv ; /* first entry in U(:,k) is U(k,k) = 1 */
487  Ux [unz++] = 1 ;
488  for (p = top ; p < n ; p++) /* U(k+1:n,k) = x / pivot */
489  {
490  i = xi [p] ;
491  if (pinv [i] < 0) /* x(i) is an entry in U(:,k) */
492  {
493  Uj [unz] = i ; /* save unpermuted row in U */
494  Ux [unz++] = x [i] / pivot ; /* scale pivot row */
495  }
496  x [i] = 0 ; /* x [0..n-1] = 0 for next k */
497  }
498  if( debug > 1 )
499  {
500  printf ("U:") ; csr_print (U, 0) ;
501  printf("------------------------------------\n");
502  }
503  }
504  /* --- Finalize U and L ------------------------------------------------- */
505  Lp [n] = lnz ;
506  if( debug ) { printf ("L:") ; csr_print (L, 0) ; }
507  Up [n] = unz ;
508  Uj = U->j ; /* fix column indices of U for final pinv */
509  for (p = 0 ; p < unz ; p++) Uj [p] = pinv [Uj [p]] ;
510 
511  csr_sprealloc (L, 0) ; /* remove extra space from L and U */
512  csr_sprealloc (U, 0) ;
513  if( debug ) { printf ("U:") ; csr_print (U, 0) ; }
514  return (csr_ndone (N, NULL, xi, x, 1)) ; /* success */
515 }
516 
517 //-----------------------------------------------------------------
518 // Triangular solves
519 //-----------------------------------------------------------------
520 
521 /* solve xG=b(k,:), where G is either upper (up=1) or lower (up=0) triangular */
522 int csr_spsolve (csr *G, const csr *B, int k, int *xi, double *x, const int *pinv, int up)
523 {
524  int i, I, p, q, px, top, n, *Gp, *Gj, *Bp, *Bj ;
525  int debug = 0;
526  double *Gx, *Bx ;
527  if (!CS_CSC (G) || !CS_CSC (B) || !xi || !x) return (-1) ;
528  Gp = G->p ; Gj = G->j ; Gx = G->x ; n = G->n ;
529  Bp = B->p ; Bj = B->j ; Bx = B->x ;
530  top = csr_reach (G, B, k, xi, pinv) ; /* xi[top..n-1]=Reach(B(:,k)) */
531  for (p = top ; p < n ; p++) x [xi [p]] = 0 ; /* clear x */
532  for (p = Bp [k] ; p < Bp [k+1] ; p++) x [Bj [p]] = Bx [p] ; /* scatter B */
533  if( debug )
534  printf("solve k=%d x= %g %g %g %g \n", k, x[0], x[1], x[2], x[3]);
535  if( debug )
536  printf("top=%d xi= %d %d %d %d \n", top , xi[0], xi[1], xi[2], xi[3]);
537  for (px = top ; px < n ; px++)
538  {
539  i = xi [px] ; /* x(i) is nonzero */
540  I = pinv ? (pinv [i]) : i ; /* i maps to col I of G */
541  if (I < 0) continue ; /* row I is empty */
542  /* dmd */
543  x [i] /= Gx [up ? (Gp [I]) : (Gp [I+1]-1)] ;/* x(i) /= G(i,i) */
544  p = up ? (Gp [I]+1) : (Gp [I]) ; /* up: L(i,i) 1st entry */
545  q = up ? (Gp [I+1]) : (Gp [I+1]-1) ; /* up: U(i,i) last entry */
546  for ( ; p < q ; p++)
547  {
548  if( debug )
549  printf("%d %d solve %d %g %g \n", px, i ,p, Gx [p] , x [Gj [p]] );
550 
551  x [Gj[p]] -= Gx [p] * x [i] ; /* x(i) -= G(i,j) * x(j) */
552  }
553  if( debug )
554  printf(" x= %g %g %g %g \n", x[0], x[1], x[2], x[3]);
555  }
556  return (top) ; /* return top of stack */
557 }
558 
559 //-----------------------------------------------------------------
560 // AMD routine
561 //-----------------------------------------------------------------
562 
563 /*
564  * amd for csr matrices , and order =1
565  * Hypothesis: m = n
566  */
567 /* clear w */
568 static int csr_wclear (int mark, int lemax, int *w, int n)
569 {
570  int k ;
571  if (mark < 2 || (mark + lemax < 0))
572  {
573  for (k = 0 ; k < n ; k++) if (w [k] != 0) w [k] = 1 ;
574  mark = 2 ;
575  }
576  return (mark) ; /* at this point, w [0..n-1] < mark holds */
577 }
578 
579 /* keep off-diagonal entries; drop diagonal entries */
580 static int csr_diag (int i, int j, double /* aij */, void * /* other */) { return (i != j) ; }
581 
582 /* p = amd(A+A') if symmetric is true, or amd(A'A) otherwise */
583 int *csr_amd (int order, const csr *A) /* order 0:natural, 1:Chol, 2:LU, 3:QR */
584 {
585  csr *C, *A2, *AT ;
586  int *Cp, *Cj, *last, *W, *len, *nv, *next, *P, *head, *elen, *degree, *w,
587  *hhead, *ATp, *ATj, d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1,
588  k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi,
589  ok, cnz, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, n, m, t ;
590  unsigned int h ;
591  /* --- Construct matrix C ----------------------------------------------- */
592  if (!CS_CSC (A) || order <= 0 || order > 3) return (NULL) ; /* check */
593  AT = csr_transpose (A, 0) ; /* compute A' */
594  if (!AT) return (NULL) ;
595  m = A->m ; n = A->n ;
596  if ( n != m) return(NULL); /* For rectangular matrices, use csr_amd */
597  dense = (int)CS_MAX (16, 10 * sqrt ((double) n)) ; /* find dense threshold */
598  dense = CS_MIN (n-2, dense) ;
599  if (order == 1 && n == m)
600  {
601  C = csr_add (A, AT, 0, 0) ; /* C = A+A' */
602  }
603  else if (order == 2)
604  {
605  ATp = AT->p ; /* drop dense columns from AT */
606  ATj = AT->j ;
607  for (p2 = 0, j = 0 ; j < m ; j++)
608  {
609  p = ATp [j] ; /* column j of AT starts here */
610  ATp [j] = p2 ; /* new column j starts here */
611  if (ATp [j+1] - p > dense) continue ; /* skip dense col j */
612  for ( ; p < ATp [j+1] ; p++) ATj [p2++] = ATj [p] ;
613  }
614  ATp [m] = p2 ; /* finalize AT */
615  A2 = csr_transpose (AT, 0) ; /* A2 = AT' */
616  C = A2 ? csr_multiply (AT, A2) : NULL ; /* C=A'*A with no dense rows */
617  csr_spfree (A2) ;
618  }
619  else
620  {
621  C = csr_multiply (AT, A) ; /* C=A'*A */
622  }
623  csr_spfree (AT) ;
624  if (!C) return (NULL) ;
625  csr_fkeep (C, &csr_diag, NULL) ; /* drop diagonal entries */
626  Cp = C->p ;
627  cnz = Cp [n] ;
628  P = (int*)malloc (CS_MAX(n+1,1) * sizeof (int)) ; /* allocate result */
629  W = (int*)malloc (CS_MAX(8*(n+1),1) * sizeof (int)) ; /* get workspace */
630  t = cnz + cnz/5 + 2*n ; /* add elbow room to C */
631  if (!P || !W || !csr_sprealloc (C, t)) return (csr_idone (P, C, W, 0)) ;
632  len = W ; nv = W + (n+1) ; next = W + 2*(n+1) ;
633  head = W + 3*(n+1) ; elen = W + 4*(n+1) ; degree = W + 5*(n+1) ;
634  w = W + 6*(n+1) ; hhead = W + 7*(n+1) ;
635  last = P ; /* use P as workspace for last */
636  /* --- Initialize quotient graph ---------------------------------------- */
637  for (k = 0 ; k < n ; k++) len [k] = Cp [k+1] - Cp [k] ;
638  len [n] = 0 ;
639  nzmax = C->nzmax ;
640  Cj = C->j ;
641  for (i = 0 ; i <= n ; i++)
642  {
643  head [i] = -1 ; /* degree list i is empty */
644  last [i] = -1 ;
645  next [i] = -1 ;
646  hhead [i] = -1 ; /* hash list i is empty */
647  nv [i] = 1 ; /* node i is just one node */
648  w [i] = 1 ; /* node i is alive */
649  elen [i] = 0 ; /* Ek of node i is empty */
650  degree [i] = len [i] ; /* degree of node i */
651  }
652  mark = csr_wclear (0, 0, w, n) ; /* clear w */
653  elen [n] = -2 ; /* n is a dead element */
654  Cp [n] = -1 ; /* n is a root of assembly tree */
655  w [n] = 0 ; /* n is a dead element */
656  /* --- Initialize degree lists ------------------------------------------ */
657  for (i = 0 ; i < n ; i++)
658  {
659  d = degree [i] ;
660  if (d == 0) /* node i is empty */
661  {
662  elen [i] = -2 ; /* element i is dead */
663  nel++ ;
664  Cp [i] = -1 ; /* i is a root of assemby tree */
665  w [i] = 0 ;
666  }
667  else if (d > dense) /* node i is dense */
668  {
669  nv [i] = 0 ; /* absorb i into element n */
670  elen [i] = -1 ; /* node i is dead */
671  nel++ ;
672  Cp [i] = CS_FLIP (n) ;
673  nv [n]++ ;
674  }
675  else
676  {
677  if (head [d] != -1) last [head [d]] = i ;
678  next [i] = head [d] ; /* put node i in degree list d */
679  head [d] = i ;
680  }
681  }
682  while (nel < n) /* while (selecting pivots) do */
683  {
684  /* --- Select node of minimum approximate degree -------------------- */
685  for (k = -1 ; mindeg < n && (k = head [mindeg]) == -1 ; mindeg++) ;
686  if (next [k] != -1) last [next [k]] = -1 ;
687  head [mindeg] = next [k] ; /* remove k from degree list */
688  elenk = elen [k] ; /* elenk = |Ek| */
689  nvk = nv [k] ; /* # of nodes k represents */
690  nel += nvk ; /* nv[k] nodes of A eliminated */
691  /* --- Garbage collection ------------------------------------------- */
692  if (elenk > 0 && cnz + mindeg >= nzmax)
693  {
694  for (j = 0 ; j < n ; j++)
695  {
696  if ((p = Cp [j]) >= 0) /* j is a live node or element */
697  {
698  Cp [j] = Cj [p] ; /* save first entry of object */
699  Cj [p] = CS_FLIP (j) ; /* first entry is now CS_FLIP(j) */
700  }
701  }
702  for (q = 0, p = 0 ; p < cnz ; ) /* scan all of memory */
703  {
704  if ((j = CS_FLIP (Cj [p++])) >= 0) /* found object j */
705  {
706  Cj [q] = Cp [j] ; /* restore first entry of object */
707  Cp [j] = q++ ; /* new pointer to object j */
708  for (k3 = 0 ; k3 < len [j]-1 ; k3++) Cj [q++] = Cj [p++] ;
709  }
710  }
711  cnz = q ; /* Cj [cnz...nzmax-1] now free */
712  }
713  /* --- Construct new element ---------------------------------------- */
714  dk = 0 ;
715  nv [k] = -nvk ; /* flag k as in Lk */
716  p = Cp [k] ;
717  pk1 = (elenk == 0) ? p : cnz ; /* do in place if elen[k] == 0 */
718  pk2 = pk1 ;
719  for (k1 = 1 ; k1 <= elenk + 1 ; k1++)
720  {
721  if (k1 > elenk)
722  {
723  e = k ; /* search the nodes in k */
724  pj = p ; /* list of nodes starts at Cj[pj]*/
725  ln = len [k] - elenk ; /* length of list of nodes in k */
726  }
727  else
728  {
729  e = Cj [p++] ; /* search the nodes in e */
730  pj = Cp [e] ;
731  ln = len [e] ; /* length of list of nodes in e */
732  }
733  for (k2 = 1 ; k2 <= ln ; k2++)
734  {
735  i = Cj [pj++] ;
736  if ((nvi = nv [i]) <= 0) continue ; /* node i dead, or seen */
737  dk += nvi ; /* degree[Lk] += size of node i */
738  nv [i] = -nvi ; /* negate nv[i] to denote i in Lk*/
739  Cj [pk2++] = i ; /* place i in Lk */
740  if (next [i] != -1) last [next [i]] = last [i] ;
741  if (last [i] != -1) /* remove i from degree list */
742  {
743  next [last [i]] = next [i] ;
744  }
745  else
746  {
747  head [degree [i]] = next [i] ;
748  }
749  }
750  if (e != k)
751  {
752  Cp [e] = CS_FLIP (k) ; /* absorb e into k */
753  w [e] = 0 ; /* e is now a dead element */
754  }
755  }
756  if (elenk != 0) cnz = pk2 ; /* Cj [cnz...nzmax] is free */
757  degree [k] = dk ; /* external degree of k - |Lk\i| */
758  Cp [k] = pk1 ; /* element k is in Cj[pk1..pk2-1] */
759  len [k] = pk2 - pk1 ;
760  elen [k] = -2 ; /* k is now an element */
761  /* --- Find set differences ----------------------------------------- */
762  mark = csr_wclear (mark, lemax, w, n) ; /* clear w if necessary */
763  for (pk = pk1 ; pk < pk2 ; pk++) /* scan 1: find |Le\Lk| */
764  {
765  i = Cj [pk] ;
766  if ((eln = elen [i]) <= 0) continue ;/* skip if elen[i] empty */
767  nvi = -nv [i] ; /* nv [i] was negated */
768  wnvi = mark - nvi ;
769  for (p = Cp [i] ; p <= Cp [i] + eln - 1 ; p++) /* scan Ei */
770  {
771  e = Cj [p] ;
772  if (w [e] >= mark)
773  {
774  w [e] -= nvi ; /* decrement |Le\Lk| */
775  }
776  else if (w [e] != 0) /* ensure e is a live element */
777  {
778  w [e] = degree [e] + wnvi ; /* 1st time e seen in scan 1 */
779  }
780  }
781  }
782  /* --- Degree update ------------------------------------------------ */
783  for (pk = pk1 ; pk < pk2 ; pk++) /* scan2: degree update */
784  {
785  i = Cj [pk] ; /* consider node i in Lk */
786  p1 = Cp [i] ;
787  p2 = p1 + elen [i] - 1 ;
788  pn = p1 ;
789  for (h = 0, d = 0, p = p1 ; p <= p2 ; p++) /* scan Ei */
790  {
791  e = Cj [p] ;
792  if (w [e] != 0) /* e is an unabsorbed element */
793  {
794  dext = w [e] - mark ; /* dext = |Le\Lk| */
795  if (dext > 0)
796  {
797  d += dext ; /* sum up the set differences */
798  Cj [pn++] = e ; /* keep e in Ei */
799  h += e ; /* compute the hash of node i */
800  }
801  else
802  {
803  Cp [e] = CS_FLIP (k) ; /* aggressive absorb. e->k */
804  w [e] = 0 ; /* e is a dead element */
805  }
806  }
807  }
808  elen [i] = pn - p1 + 1 ; /* elen[i] = |Ei| */
809  p3 = pn ;
810  p4 = p1 + len [i] ;
811  for (p = p2 + 1 ; p < p4 ; p++) /* prune edges in Ai */
812  {
813  j = Cj [p] ;
814  if ((nvj = nv [j]) <= 0) continue ; /* node j dead or in Lk */
815  d += nvj ; /* degree(i) += |j| */
816  Cj [pn++] = j ; /* place j in node list of i */
817  h += j ; /* compute hash for node i */
818  }
819  if (d == 0) /* check for mass elimination */
820  {
821  Cp [i] = CS_FLIP (k) ; /* absorb i into k */
822  nvi = -nv [i] ;
823  dk -= nvi ; /* |Lk| -= |i| */
824  nvk += nvi ; /* |k| += nv[i] */
825  nel += nvi ;
826  nv [i] = 0 ;
827  elen [i] = -1 ; /* node i is dead */
828  }
829  else
830  {
831  degree [i] = CS_MIN (degree [i], d) ; /* update degree(i) */
832  Cj [pn] = Cj [p3] ; /* move first node to end */
833  Cj [p3] = Cj [p1] ; /* move 1st el. to end of Ei */
834  Cj [p1] = k ; /* add k as 1st element in of Ei */
835 
836  len [i] = pn - p1 + 1 ; /* new len of adj. list of node i */
837  h %= n ; /* finalize hash of i */
838  next [i] = hhead [h] ; /* place i in hash bucket */
839  hhead [h] = i ;
840  last [i] = h ; /* save hash of i in last[i] */
841  }
842  } /* scan2 is done */
843  degree [k] = dk ; /* finalize |Lk| */
844  lemax = CS_MAX (lemax, dk) ;
845  mark = csr_wclear (mark+lemax, lemax, w, n) ; /* clear w */
846  /* --- Supernode detection ------------------------------------------ */
847  for (pk = pk1 ; pk < pk2 ; pk++)
848  {
849  i = Cj [pk] ;
850  if (nv [i] >= 0) continue ; /* skip if i is dead */
851  h = last [i] ; /* scan hash bucket of node i */
852  i = hhead [h] ;
853  hhead [h] = -1 ; /* hash bucket will be empty */
854  for ( ; i != -1 && next [i] != -1 ; i = next [i], mark++)
855  {
856  ln = len [i] ;
857  eln = elen [i] ;
858  for (p = Cp [i]+1 ; p <= Cp [i] + ln-1 ; p++) w [Cj [p]] = mark;
859  jlast = i ;
860  for (j = next [i] ; j != -1 ; ) /* compare i with all j */
861  {
862  ok = (len [j] == ln) && (elen [j] == eln) ;
863  for (p = Cp [j] + 1 ; ok && p <= Cp [j] + ln - 1 ; p++)
864  {
865  if (w [Cj [p]] != mark) ok = 0 ; /* compare i and j*/
866  }
867  if (ok) /* i and j are identical */
868  {
869  Cp [j] = CS_FLIP (i) ; /* absorb j into i */
870  nv [i] += nv [j] ;
871  nv [j] = 0 ;
872  elen [j] = -1 ; /* node j is dead */
873  j = next [j] ; /* delete j from hash bucket */
874  next [jlast] = j ;
875  }
876  else
877  {
878  jlast = j ; /* j and i are different */
879  j = next [j] ;
880  }
881  }
882  }
883  }
884  /* --- Finalize new element------------------------------------------ */
885  for (p = pk1, pk = pk1 ; pk < pk2 ; pk++) /* finalize Lk */
886  {
887  i = Cj [pk] ;
888  if ((nvi = -nv [i]) <= 0) continue ;/* skip if i is dead */
889  nv [i] = nvi ; /* restore nv[i] */
890  d = degree [i] + dk - nvi ; /* compute external degree(i) */
891  d = CS_MIN (d, n - nel - nvi) ;
892  if (head [d] != -1) last [head [d]] = i ;
893  next [i] = head [d] ; /* put i back in degree list */
894  last [i] = -1 ;
895  head [d] = i ;
896  mindeg = CS_MIN (mindeg, d) ; /* find new minimum degree */
897  degree [i] = d ;
898  Cj [p++] = i ; /* place i in Lk */
899  }
900  nv [k] = nvk ; /* # nodes absorbed into k */
901  if ((len [k] = p-pk1) == 0) /* length of adj list of element k*/
902  {
903  Cp [k] = -1 ; /* k is a root of the tree */
904  w [k] = 0 ; /* k is now a dead element */
905  }
906  if (elenk != 0) cnz = p ; /* free unused space in Lk */
907  }
908  /* --- Postordering ----------------------------------------------------- */
909  for (i = 0 ; i < n ; i++) Cp [i] = CS_FLIP (Cp [i]) ;/* fix assembly tree */
910  for (j = 0 ; j <= n ; j++) head [j] = -1 ;
911  for (j = n ; j >= 0 ; j--) /* place unordered nodes in lists */
912  {
913  if (nv [j] > 0) continue ; /* skip if j is an element */
914  next [j] = head [Cp [j]] ; /* place j in list of its parent */
915  head [Cp [j]] = j ;
916  }
917  for (e = n ; e >= 0 ; e--) /* place elements in lists */
918  {
919  if (nv [e] <= 0) continue ; /* skip unless e is an element */
920  if (Cp [e] != -1)
921  {
922  next [e] = head [Cp [e]] ; /* place e in list of its parent */
923  head [Cp [e]] = e ;
924  }
925  }
926  for (k = 0, i = 0 ; i <= n ; i++) /* postorder the assembly tree */
927  {
928  if (Cp [i] == -1) k = csr_tdfs (i, k, head, next, P, w) ;
929  }
930  return (csr_idone (P, C, W, 1)) ;
931 }
932 
933 
934 //-----------------------------------------------------------------
935 // Misc utilities
936 //-----------------------------------------------------------------
937 
938 /* print a sparse matrix */
939 int csr_print (const csr *A, int brief)
940 {
941  int p, j, m, n, nzmax, nz, nnz, *Ap, *Aj ;
942  double *Ax ;
943  if (!A) { printf ("(null)\n") ; return (0) ; }
944  m = A->m ; n = A->n ; Ap = A->p ; Aj = A->j ; Ax = A->x ;
945  nzmax = A->nzmax ; nz = A->nz ; nnz = Ap [m];
946  if (nz < 0)
947  {
948  if( nnz == 0)
949  { /* print intermeidate matrices from csr_lu */
950  while ((Ap[m] == 0) && (m > 0))
951  {
952  --m;
953  }
954  nnz = Ap [m];
955  }
956  if( nnz > 0)
957  {
958  printf ("%d-by-%d, nzmax: %d nnz: %d, mxnorm: %g\n", m, n, nzmax,
959  Ap [m], csr_norm (A)) ;
960  for (j = 0 ; j < m ; j++)
961  {
962  printf (" row %d : locations %d to %d\n", j, Ap [j], Ap [j+1]-1);
963  for (p = Ap [j] ; p < Ap [j+1] ; p++)
964  {
965  printf (" %d : %g\n", Aj [p], Ax ? Ax [p] : 1) ;
966  if (brief && p > 20) { printf (" ...\n") ; return (1) ; }
967  }
968  }
969  }else{
970  printf ("%d-by-%d, zero matrix with nzmax: %d\n", m, n, nzmax);
971  }
972  }
973  else
974  {
975  printf ("triplet: %d-by-%d, nzmax: %d nnz: %d\n", m, n, nzmax, nz) ;
976  for (p = 0 ; p < nz ; p++)
977  {
978  printf (" %d %d : %g\n", Aj [p], Ap [p], Ax ? Ax [p] : 1) ;
979  if (brief && p > 20) { printf (" ...\n") ; return (1) ; }
980  }
981  }
982  return (1) ;
983 }
984 
985 /* infinity-norm of a sparse matrix = norm(A,inf), max row sum */
986 double csr_norm (const csr *A)
987 {
988  int p, j, m, *Ap ;
989  double *Ax, norm = 0, s ;
990  if (!CS_CSC (A) || !A->x) return (-1) ; /* check inputs */
991  m = A->m ; Ap = A->p ; Ax = A->x ;
992  for (j = 0 ; j < m ; j++)
993  {
994  for (s = 0, p = Ap [j] ; p < Ap [j+1] ; p++) s += fabs (Ax [p]);
995  norm = CS_MAX (norm, s) ;
996  }
997  return (norm) ;
998 }
999 
1000 /* drop entries for which fkeep(A(i,j)) is false; return nz if OK, else -1 */
1001 int csr_fkeep (csr *A, int (*fkeep) (int, int, double, void *), void *other)
1002 {
1003  int j, p, nz = 0, m, *Ap, *Aj ;
1004  double *Ax ;
1005  if (!CS_CSC (A) || !fkeep) return (-1) ; /* check inputs */
1006  m = A->m ; Ap = A->p ; Aj = A->j ; Ax = A->x ;
1007  for (j = 0 ; j < m ; j++)
1008  {
1009  p = Ap [j] ; /* get current location of col j */
1010  Ap [j] = nz ; /* record new location of col j */
1011  for ( ; p < Ap [j+1] ; p++)
1012  {
1013  if (fkeep (Aj [p], j, Ax ? Ax [p] : 1, other))
1014  {
1015  if (Ax) Ax [nz] = Ax [p] ; /* keep A(i,j) */
1016  Aj [nz++] = Aj [p] ;
1017  }
1018  }
1019  }
1020  Ap [m] = nz ; /* finalize A */
1021  csr_sprealloc (A, 0) ; /* remove extra space from A */
1022  return (nz) ;
1023 }