43 #include "Ifpack_ConfigDefs.h"
44 #include "Ifpack_IKLU_Utils.h"
51 csr *csr_spalloc (
int m,
int n,
int nzmax,
int values,
int triplet)
54 if (!A)
return (NULL) ;
57 A->nzmax = nzmax = CS_MAX (nzmax, 1) ;
58 A->nz = triplet ? 0 : -1 ;
59 A->p = (
int*)malloc (triplet ? CS_MAX(nzmax,1) *
sizeof (int) : CS_MAX(m+1,1) *
sizeof (int)) ;
60 A->j = (
int*)malloc (CS_MAX(nzmax,1) *
sizeof (int)) ;
61 A->x = values ? (
double*)malloc (CS_MAX(nzmax,1) *
sizeof (double)) : NULL ;
62 return ((!A->p || !A->j || (values && !A->x)) ? csr_spfree (A) : A) ;
66 int csr_sprealloc (
csr *A,
int nzmax)
68 int ok, oki, okj = 1, okx = 1 ;
70 nzmax = (nzmax <= 0) ? (A->p [A->m]) : nzmax ;
71 A->j = (
int*)csr_realloc (A->j, nzmax, sizeof (
int), &oki) ;
72 if (CS_TRIPLET (A)) A->p = (
int*)csr_realloc (A->p, nzmax, sizeof (
int), &okj) ;
73 if (A->x) A->x = (
double*)csr_realloc (A->x, nzmax, sizeof (
double), &okx) ;
74 ok = (oki && okj && okx) ;
75 if (ok) A->nzmax = nzmax ;
80 void *csr_realloc (
void *p,
int n,
size_t size,
int *ok)
83 pnew = realloc (p, CS_MAX (n,1) * size) ;
84 *ok = (pnew != NULL) ;
85 return ((*ok) ? pnew : p) ;
91 if (!A)
return (NULL);
102 if (!S)
return (NULL) ;
103 if (S->pinv) free(S->pinv);
104 if (S->q) free(S->q);
105 if (S->parent) free(S->parent);
106 if (S->cp) free(S->cp);
107 if (S->leftmost) free(S->leftmost);
114 if (!N)
return (NULL) ;
117 if (N->pinv) free(N->pinv);
118 if (N->perm) free(N->perm);
119 if (N->B) free(N->B);
125 csr *csr_done (
csr *C,
void *w,
void *x,
int ok)
129 return (ok ? C : csr_spfree (C)) ;
133 csrn *csr_ndone (
csrn *N,
csr *C,
void *w,
void *x,
int ok)
138 return (ok ? N : csr_nfree (N)) ;
142 int *csr_idone (
int *p,
csr *C,
void *w,
int ok)
160 double csr_cumsum (
int *p,
int *c,
int n)
164 if (!p || !c)
return (-1) ;
165 for (i = 0 ; i < n ; i++)
177 int csr_scatter (
const csr *B,
int i,
double alpha,
int *w,
double *x,
int mark,
180 int j, p, *Bp, *Bj, *Cj ;
182 if (!CS_CSC (B) || !w || !CS_CSC (C))
return (-1) ;
183 Bp = B->p ; Bj = B->j ; Bx = B->x ; Cj = C->j ;
184 for (p = Bp [i] ; p < Bp [i+1] ; p++)
191 if (x) x [j] = alpha * Bx [p] ;
193 else if (x) x [j] += alpha * Bx [p] ;
199 csr *csr_add (
const csr *A,
const csr *B,
double alpha,
double beta)
201 int p, j, nz = 0, anz, *Cp, *Cj, *Bp, m, n, bnz, *w, values ;
202 double *x, *Bx, *Cx ;
204 if (!CS_CSC (A) || !CS_CSC (B))
return (NULL) ;
205 if ( (A->m != B->m) || (A->n != B->n) )
return (NULL);
206 m = A->m ; anz = A->p [m] ;
207 n = B->n ; Bp = B->p ; Bx = B->x ; bnz = Bp [m] ;
208 w = (
int*)calloc (CS_MAX(n,1),
sizeof (int)) ;
209 values = (A->x != NULL) && (Bx != NULL) ;
210 x = values ? (
double*)malloc (n *
sizeof (
double)) : NULL ;
211 C = csr_spalloc (m, n, anz + bnz, values, 0) ;
212 if (!C || !w || (values && !x))
return (csr_done (C, w, x, 0)) ;
213 Cp = C->p ; Cj = C->j ; Cx = C->x ;
214 for (j = 0 ; j < n ; j++)
217 nz = csr_scatter (A, j, alpha, w, x, j+1, C, nz) ;
218 nz = csr_scatter (B, j, beta, w, x, j+1, C, nz) ;
219 if (values)
for (p = Cp [j] ; p < nz ; p++) Cx [p] = x [Cj [p]] ;
222 csr_sprealloc (C, 0) ;
223 return (csr_done (C, w, x, 1)) ;
227 csr *csr_transpose (
const csr *A,
int values)
229 int p, q, i, *Cp, *Cj, n, m, *Ap, *Aj, *w ;
232 if (!CS_CSC (A))
return (NULL) ;
233 m = A->m ; n = A->n ; Ap = A->p ; Aj = A->j ; Ax = A->x ;
234 C = csr_spalloc (n, m, Ap [m], values && Ax, 0) ;
235 w = (
int*)calloc (CS_MAX(n,1),
sizeof (int)) ;
236 if (!C || !w)
return (csr_done (C, w, NULL, 0)) ;
237 Cp = C->p ; Cj = C->j ; Cx = C->x ;
238 for (p = 0 ; p < Ap [m] ; p++) w [Aj [p]]++ ;
239 csr_cumsum (Cp, w, n) ;
240 for (i = 0 ; i < m ; i++)
242 for (p = Ap [i] ; p < Ap [i+1] ; p++)
244 Cj [q = w [Aj [p]]++] = i ;
245 if (Cx) Cx [q] = Ax [p] ;
248 return (csr_done (C, w, NULL, 1)) ;
252 csr *csr_multiply (
const csr *A,
const csr *B)
254 int p, j, nz = 0, anz, *Cp, *Cj, *Ap, m, k, n, bnz, *w, values, *Aj;
255 double *x, *Ax, *Cx ;
257 if (!CS_CSC (A) || !CS_CSC (B))
return (NULL) ;
259 if (k != B->m )
return(NULL);
260 m = A->m ; anz = A->p [A->m] ;
261 n = B->n ; Ap = A->p ; Aj = A->j ; Ax = A->x ; bnz = B->p [k] ;
262 w = (
int*)calloc (CS_MAX(n,1),
sizeof (int)) ;
263 values = (Ax != NULL) && (B->x != NULL) ;
264 x = values ? (
double*)malloc (n *
sizeof (
double)) : NULL ;
265 C = csr_spalloc (m, n, anz + bnz, values, 0) ;
266 if (!C || !w || (values && !x))
return (csr_done (C, w, x, 0)) ;
268 for (j = 0 ; j < m ; j++)
270 if (nz + n > C->nzmax && !csr_sprealloc (C, 2*(C->nzmax)+m))
272 return (csr_done (C, w, x, 0)) ;
274 Cj = C->j ; Cx = C->x ;
276 for (p = Ap [j] ; p < Ap [j+1] ; p++)
278 nz = csr_scatter (B, Aj [p], Ax ? Ax [p] : 1, w, x, j+1, C, nz) ;
280 if (values)
for (p = Cp [j] ; p < nz ; p++) Cx [p] = x [Cj [p]] ;
283 csr_sprealloc (C, 0) ;
284 return (csr_done (C, w, x, 1)) ;
293 css *csr_sqr (
int order,
const csr *A )
297 if (!CS_CSC (A))
return (NULL) ;
299 S = (
css*)calloc(1,
sizeof (
css)) ;
300 if (!S)
return (NULL) ;
301 S->q = csr_amd (order, A) ;
304 printf(
" csr_sqr error no permutation\n");
306 if (order && !S->q)
return (csr_sfree (S)) ;
309 S->unz = (double) CS_MIN(4*(A->p [n]) + n, n * n );
311 return (ok ? S : csr_sfree (S)) ;
317 int csr_reach (
csr *G,
const csr *B,
int k,
int *xi,
const int *pinv)
319 int p, m, top, *Bp, *Bj, *Gp ;
320 if (!CS_CSC (G) || !CS_CSC (B) || !xi)
return (-1) ;
321 m = G->m ; Bp = B->p ; Bj = B->j ; Gp = G->p ;
323 for (p = Bp [k] ; p < Bp [k+1] ; p++)
325 if (!CS_MARKED (Gp, Bj [p]))
327 top = csr_dfs (Bj [p], G, top, xi, xi+m, pinv) ;
330 for (p = top ; p < m ; p++) CS_MARK (Gp, xi [p]) ;
335 int csr_dfs (
int j,
csr *G,
int top,
int *xi,
int *pstack,
const int *pinv)
337 int i, p, p2, done, jnew, head = 0, *Gp, *Gj ;
338 if (!CS_CSC (G) || !xi || !pstack)
return (-1) ;
339 Gp = G->p ; Gj = G->j ;
344 jnew = pinv ? (pinv [j]) : j ;
345 if (!CS_MARKED (Gp, j))
348 pstack [head] = (jnew < 0) ? 0 : CS_UNFLIP (Gp [jnew]) ;
351 p2 = (jnew < 0) ? 0 : CS_UNFLIP (Gp [jnew+1]) ;
352 for (p = pstack [head] ; p < p2 ; p++)
355 if (CS_MARKED (Gp, i))
continue ;
371 int csr_tdfs (
int j,
int k,
int *head,
const int *next,
int *post,
int *stack)
374 if (!head || !next || !post || !stack)
return (-1) ;
387 head [p] = next [i] ;
403 csrn *csr_lu (
const csr *A,
const css *S,
double tol)
407 double pivot, *Lx, *Ux, *x, a, t;
408 int *Lp, *Lj, *Up, *Uj, *pinv, *xi, *q, n, ipiv, k, top, p, i, row, lnz, unz;
411 if (!CS_CSC (A) ) printf(
" error csrlu: A not csc\n");
412 if (!CS_CSC (A) || !S)
return (NULL) ;
414 if (n != A->m)
return (NULL) ;
415 q = S->q ; lnz = (int)S->lnz ; unz = (
int)S->unz ;
416 x = (
double*)malloc(n *
sizeof (
double)) ;
417 xi = (
int*)malloc (2*n *
sizeof (
int)) ;
418 N = (
csrn*)calloc (1,
sizeof (
csrn)) ;
419 if (!(A) ) printf(
" error csrlu: allocation of N failed\n");
420 if (!x || !xi || !N)
return (csr_ndone (N, NULL, xi, x, 0)) ;
422 N->L = L = csr_spalloc (n, n, lnz, 1, 0) ;
423 N->U = U = csr_spalloc (n, n, unz, 1, 0) ;
424 N->pinv = pinv = (
int*)malloc (n *
sizeof (
int)) ;
425 N->perm = (
int*)malloc (n *
sizeof (
int)) ;
426 if (!L || !U || !pinv)
return (csr_ndone (N, NULL, xi, x, 0)) ;
427 Lp = L->p ; Up = U->p ;
428 for (i = 0 ; i < n ; i++) x [i] = 0 ;
429 for (i = 0 ; i < n ; i++) pinv [i] = -1 ;
430 for (k = 1 ; k <= n ; k++) Up [k] = 0 ;
431 for (k = 1 ; k <= n ; k++) Lp [k] = 0 ;
435 printf (
"A:\n") ; csr_print (A, 0) ;
437 for (k = 0 ; k < n ; k++)
442 if ((lnz + n > L->nzmax && !csr_sprealloc (L, 2*L->nzmax + n)) ||
443 (unz + n > U->nzmax && !csr_sprealloc (U, 2*U->nzmax + n)))
445 return (csr_ndone (N, NULL, xi, x, 0)) ;
447 Lj = L->j ; Lx = L->x ; Uj = U->j ; Ux = U->x ;
448 row = q ? (q [k]) : k ;
451 printf(
"--------------------------------\n");
452 printf(
" %d spsolve row=%d \n",k, row);
453 printf(
" pinv = %d %d %d %d \n", pinv[0], pinv[1], pinv[2], pinv[3]);
455 top = csr_spsolve (U, A, row, xi, x, pinv, 1) ;
456 if( debug > 1 ) printf(
"top=%d x = %g %g %g %g \n", top,x[0],x[1],x[2],x[3]);
460 for (p = top ; p < n ; p++)
465 if ((t = fabs (x [i])) > a)
473 Lj [lnz] = pinv [i] ;
477 if (ipiv == -1 || a <= 0)
return (csr_ndone (N, NULL, xi, x, 0)) ;
478 if (pinv [row] < 0 && fabs (x [row]) >= a*tol) ipiv = row;
482 if( debug > 1 ) { printf (
"L:") ; csr_print (L, 0) ; }
488 for (p = top ; p < n ; p++)
494 Ux [unz++] = x [i] / pivot ;
500 printf (
"U:") ; csr_print (U, 0) ;
501 printf(
"------------------------------------\n");
506 if( debug ) { printf (
"L:") ; csr_print (L, 0) ; }
509 for (p = 0 ; p < unz ; p++) Uj [p] = pinv [Uj [p]] ;
511 csr_sprealloc (L, 0) ;
512 csr_sprealloc (U, 0) ;
513 if( debug ) { printf (
"U:") ; csr_print (U, 0) ; }
514 return (csr_ndone (N, NULL, xi, x, 1)) ;
522 int csr_spsolve (
csr *G,
const csr *B,
int k,
int *xi,
double *x,
const int *pinv,
int up)
524 int i, I, p, q, px, top, n, *Gp, *Gj, *Bp, *Bj ;
527 if (!CS_CSC (G) || !CS_CSC (B) || !xi || !x)
return (-1) ;
528 Gp = G->p ; Gj = G->j ; Gx = G->x ; n = G->n ;
529 Bp = B->p ; Bj = B->j ; Bx = B->x ;
530 top = csr_reach (G, B, k, xi, pinv) ;
531 for (p = top ; p < n ; p++) x [xi [p]] = 0 ;
532 for (p = Bp [k] ; p < Bp [k+1] ; p++) x [Bj [p]] = Bx [p] ;
534 printf(
"solve k=%d x= %g %g %g %g \n", k, x[0], x[1], x[2], x[3]);
536 printf(
"top=%d xi= %d %d %d %d \n", top , xi[0], xi[1], xi[2], xi[3]);
537 for (px = top ; px < n ; px++)
540 I = pinv ? (pinv [i]) : i ;
541 if (I < 0) continue ;
543 x [i] /= Gx [up ? (Gp [I]) : (Gp [I+1]-1)] ;
544 p = up ? (Gp [I]+1) : (Gp [I]) ;
545 q = up ? (Gp [I+1]) : (Gp [I+1]-1) ;
549 printf(
"%d %d solve %d %g %g \n", px, i ,p, Gx [p] , x [Gj [p]] );
551 x [Gj[p]] -= Gx [p] * x [i] ;
554 printf(
" x= %g %g %g %g \n", x[0], x[1], x[2], x[3]);
568 static int csr_wclear (
int mark,
int lemax,
int *w,
int n)
571 if (mark < 2 || (mark + lemax < 0))
573 for (k = 0 ; k < n ; k++)
if (w [k] != 0) w [k] = 1 ;
580 static int csr_diag (
int i,
int j,
double ,
void * ) {
return (i != j) ; }
583 int *csr_amd (
int order,
const csr *A)
586 int *Cp, *Cj, *last, *W, *len, *nv, *next, *P, *head, *elen, *degree, *w,
587 *hhead, *ATp, *ATj, d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1,
588 k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi,
589 ok, cnz, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, n, m, t ;
592 if (!CS_CSC (A) || order <= 0 || order > 3)
return (NULL) ;
593 AT = csr_transpose (A, 0) ;
594 if (!AT)
return (NULL) ;
595 m = A->m ; n = A->n ;
596 if ( n != m)
return(NULL);
597 dense = (int)CS_MAX (16, 10 * sqrt ((
double) n)) ;
598 dense = CS_MIN (n-2, dense) ;
599 if (order == 1 && n == m)
601 C = csr_add (A, AT, 0, 0) ;
607 for (p2 = 0, j = 0 ; j < m ; j++)
611 if (ATp [j+1] - p > dense) continue ;
612 for ( ; p < ATp [j+1] ; p++) ATj [p2++] = ATj [p] ;
615 A2 = csr_transpose (AT, 0) ;
616 C = A2 ? csr_multiply (AT, A2) : NULL ;
621 C = csr_multiply (AT, A) ;
624 if (!C)
return (NULL) ;
625 csr_fkeep (C, &csr_diag, NULL) ;
628 P = (
int*)malloc (CS_MAX(n+1,1) *
sizeof (int)) ;
629 W = (
int*)malloc (CS_MAX(8*(n+1),1) *
sizeof (
int)) ;
630 t = cnz + cnz/5 + 2*n ;
631 if (!P || !W || !csr_sprealloc (C, t))
return (csr_idone (P, C, W, 0)) ;
632 len = W ; nv = W + (n+1) ; next = W + 2*(n+1) ;
633 head = W + 3*(n+1) ; elen = W + 4*(n+1) ; degree = W + 5*(n+1) ;
634 w = W + 6*(n+1) ; hhead = W + 7*(n+1) ;
637 for (k = 0 ; k < n ; k++) len [k] = Cp [k+1] - Cp [k] ;
641 for (i = 0 ; i <= n ; i++)
650 degree [i] = len [i] ;
652 mark = csr_wclear (0, 0, w, n) ;
657 for (i = 0 ; i < n ; i++)
672 Cp [i] = CS_FLIP (n) ;
677 if (head [d] != -1) last [head [d]] = i ;
678 next [i] = head [d] ;
685 for (k = -1 ; mindeg < n && (k = head [mindeg]) == -1 ; mindeg++) ;
686 if (next [k] != -1) last [next [k]] = -1 ;
687 head [mindeg] = next [k] ;
692 if (elenk > 0 && cnz + mindeg >= nzmax)
694 for (j = 0 ; j < n ; j++)
696 if ((p = Cp [j]) >= 0)
699 Cj [p] = CS_FLIP (j) ;
702 for (q = 0, p = 0 ; p < cnz ; )
704 if ((j = CS_FLIP (Cj [p++])) >= 0)
708 for (k3 = 0 ; k3 < len [j]-1 ; k3++) Cj [q++] = Cj [p++] ;
717 pk1 = (elenk == 0) ? p : cnz ;
719 for (k1 = 1 ; k1 <= elenk + 1 ; k1++)
725 ln = len [k] - elenk ;
733 for (k2 = 1 ; k2 <= ln ; k2++)
736 if ((nvi = nv [i]) <= 0)
continue ;
740 if (next [i] != -1) last [next [i]] = last [i] ;
743 next [last [i]] = next [i] ;
747 head [degree [i]] = next [i] ;
752 Cp [e] = CS_FLIP (k) ;
756 if (elenk != 0) cnz = pk2 ;
759 len [k] = pk2 - pk1 ;
762 mark = csr_wclear (mark, lemax, w, n) ;
763 for (pk = pk1 ; pk < pk2 ; pk++)
766 if ((eln = elen [i]) <= 0) continue ;
769 for (p = Cp [i] ; p <= Cp [i] + eln - 1 ; p++)
778 w [e] = degree [e] + wnvi ;
783 for (pk = pk1 ; pk < pk2 ; pk++)
787 p2 = p1 + elen [i] - 1 ;
789 for (h = 0, d = 0, p = p1 ; p <= p2 ; p++)
794 dext = w [e] - mark ;
803 Cp [e] = CS_FLIP (k) ;
808 elen [i] = pn - p1 + 1 ;
811 for (p = p2 + 1 ; p < p4 ; p++)
814 if ((nvj = nv [j]) <= 0) continue ;
821 Cp [i] = CS_FLIP (k) ;
831 degree [i] = CS_MIN (degree [i], d) ;
836 len [i] = pn - p1 + 1 ;
838 next [i] = hhead [h] ;
844 lemax = CS_MAX (lemax, dk) ;
845 mark = csr_wclear (mark+lemax, lemax, w, n) ;
847 for (pk = pk1 ; pk < pk2 ; pk++)
850 if (nv [i] >= 0) continue ;
854 for ( ; i != -1 && next [i] != -1 ; i = next [i], mark++)
858 for (p = Cp [i]+1 ; p <= Cp [i] + ln-1 ; p++) w [Cj [p]] = mark;
860 for (j = next [i] ; j != -1 ; )
862 ok = (len [j] == ln) && (elen [j] == eln) ;
863 for (p = Cp [j] + 1 ; ok && p <= Cp [j] + ln - 1 ; p++)
865 if (w [Cj [p]] != mark) ok = 0 ;
869 Cp [j] = CS_FLIP (i) ;
885 for (p = pk1, pk = pk1 ; pk < pk2 ; pk++)
888 if ((nvi = -nv [i]) <= 0) continue ;
890 d = degree [i] + dk - nvi ;
891 d = CS_MIN (d, n - nel - nvi) ;
892 if (head [d] != -1) last [head [d]] = i ;
893 next [i] = head [d] ;
896 mindeg = CS_MIN (mindeg, d) ;
901 if ((len [k] = p-pk1) == 0)
906 if (elenk != 0) cnz = p ;
909 for (i = 0 ; i < n ; i++) Cp [i] = CS_FLIP (Cp [i]) ;
910 for (j = 0 ; j <= n ; j++) head [j] = -1 ;
911 for (j = n ; j >= 0 ; j--)
913 if (nv [j] > 0) continue ;
914 next [j] = head [Cp [j]] ;
917 for (e = n ; e >= 0 ; e--)
919 if (nv [e] <= 0) continue ;
922 next [e] = head [Cp [e]] ;
926 for (k = 0, i = 0 ; i <= n ; i++)
928 if (Cp [i] == -1) k = csr_tdfs (i, k, head, next, P, w) ;
930 return (csr_idone (P, C, W, 1)) ;
939 int csr_print (
const csr *A,
int brief)
941 int p, j, m, n, nzmax, nz, nnz, *Ap, *Aj ;
943 if (!A) { printf (
"(null)\n") ;
return (0) ; }
944 m = A->m ; n = A->n ; Ap = A->p ; Aj = A->j ; Ax = A->x ;
945 nzmax = A->nzmax ; nz = A->nz ; nnz = Ap [m];
950 while ((Ap[m] == 0) && (m > 0))
958 printf (
"%d-by-%d, nzmax: %d nnz: %d, mxnorm: %g\n", m, n, nzmax,
959 Ap [m], csr_norm (A)) ;
960 for (j = 0 ; j < m ; j++)
962 printf (
" row %d : locations %d to %d\n", j, Ap [j], Ap [j+1]-1);
963 for (p = Ap [j] ; p < Ap [j+1] ; p++)
965 printf (
" %d : %g\n", Aj [p], Ax ? Ax [p] : 1) ;
966 if (brief && p > 20) { printf (
" ...\n") ;
return (1) ; }
970 printf (
"%d-by-%d, zero matrix with nzmax: %d\n", m, n, nzmax);
975 printf (
"triplet: %d-by-%d, nzmax: %d nnz: %d\n", m, n, nzmax, nz) ;
976 for (p = 0 ; p < nz ; p++)
978 printf (
" %d %d : %g\n", Aj [p], Ap [p], Ax ? Ax [p] : 1) ;
979 if (brief && p > 20) { printf (
" ...\n") ;
return (1) ; }
986 double csr_norm (
const csr *A)
989 double *Ax, norm = 0, s ;
990 if (!CS_CSC (A) || !A->x)
return (-1) ;
991 m = A->m ; Ap = A->p ; Ax = A->x ;
992 for (j = 0 ; j < m ; j++)
994 for (s = 0, p = Ap [j] ; p < Ap [j+1] ; p++) s += fabs (Ax [p]);
995 norm = CS_MAX (norm, s) ;
1001 int csr_fkeep (
csr *A,
int (*fkeep) (
int,
int,
double,
void *),
void *other)
1003 int j, p, nz = 0, m, *Ap, *Aj ;
1005 if (!CS_CSC (A) || !fkeep)
return (-1) ;
1006 m = A->m ; Ap = A->p ; Aj = A->j ; Ax = A->x ;
1007 for (j = 0 ; j < m ; j++)
1011 for ( ; p < Ap [j+1] ; p++)
1013 if (fkeep (Aj [p], j, Ax ? Ax [p] : 1, other))
1015 if (Ax) Ax [nz] = Ax [p] ;
1016 Aj [nz++] = Aj [p] ;
1021 csr_sprealloc (A, 0) ;