Amesos Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
amesos_paraklete_factorize.c
Go to the documentation of this file.
1 /* ========================================================================== */
2 /* === paraklete_factorize ================================================== */
3 /* ========================================================================== */
4 
6 
7 /* LU = paraklete_factorize (A, LUsymbolic, Common) factorizes P*A*Q into L*U.
8  * Returns NULL if A is singular or if memory is exhausted.
9  *
10  * PARAKLETE version 0.3: parallel sparse LU factorization. Nov 13, 2007
11  * Copyright (C) 2007, Univ. of Florida. Author: Timothy A. Davis
12  * See License.txt for the Version 2.1 of the GNU Lesser General Public License
13  * http://www.cise.ufl.edu/research/sparse
14  */
15 
16 /* ========================================================================== */
17 /* === paraklete_allocate_numeric =========================================== */
18 /* ========================================================================== */
19 
20 /* Allocate initial part of LU factors */
21 
23 (
24  paraklete_symbolic *LUsymbolic,
25  paraklete_common *Common
26 )
27 {
28  paraklete_numeric *LU ;
29  paraklete_node *LUnode ;
30  cholmod_common *cm ;
31  Int *Cstart, *Sched, *Childp ;
32  Int c, n, ncomponents, myid ;
33 
34  cm = &(Common->cm) ;
35  myid = Common->myid ;
36  LU = CHOLMOD (malloc) (1, sizeof (paraklete_numeric), cm) ;
37 
38  if (cm->status != CHOLMOD_OK)
39  {
40  /* TODO return NULL, and broadcast error to all processes */
41  PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
42  }
43 
44  n = LUsymbolic->n ;
45  ncomponents = LUsymbolic->ncomponents ;
46 
47  LU->magic = PARAKLETE_MAGIC ;
48  LU->n = n ;
49  LU->ncomponents = ncomponents ;
50  LU->LUnode = CHOLMOD (malloc) (ncomponents, sizeof (paraklete_node *), cm) ;
51  LU->P = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
52  LU->Q = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
53  LU->Pinv = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
54  LU->Qinv = CHOLMOD (malloc) (n, sizeof (Int), cm) ;
55  LU->W = NULL ;
56  LU->Ep2 = NULL ;
57  LU->E = NULL ;
58 
59  if (myid == 0)
60  {
61  /* allocate workspace for subsequent solve */
62  LU->W = CHOLMOD (malloc) (n, sizeof (double), cm) ;
63  /* allocate workspace for distributing the input matrix to the nodes */
64  LU->Ep2 = CHOLMOD (malloc) (n+1, sizeof (Int), cm) ;
65  }
66 
67  if (LU->LUnode != NULL)
68  {
69  for (c = 0 ; c < ncomponents ; c++)
70  {
71  LU->LUnode [c] = CHOLMOD (malloc) (1, sizeof (paraklete_node), cm) ;
72  }
73  }
74 
75  Cstart = LUsymbolic->Cstart ;
76  Sched = LUsymbolic->Sched ;
77  Childp = LUsymbolic->Childp ;
78 
79  if (cm->status != CHOLMOD_OK)
80  {
81  /* TODO return NULL, and broadcast error to all processes, and
82  * free the LU object */
83  PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
84  }
85 
86  for (c = 0 ; c < ncomponents ; c++)
87  {
88  /* Each process has an LUnode [c] for each node in the tree, but it
89  * will be populated only with the parts this process needs */
90  LUnode = (LU->LUnode == NULL) ? NULL : (LU->LUnode [c]) ;
91 
92  if (LUnode != NULL)
93  {
94 
95  LUnode->nk = Cstart [c+1] - Cstart [c] ;
96  LUnode->nchild = Childp [c+1] - Childp [c] ;
97 
98  /* no LU factorization of this node yet */
99  LUnode->PK_STATUS = PK_UNKNOWN ;
100  LUnode->PK_NN = 0 ;
101  LUnode->PK_NPIV = 0 ;
102  LUnode->PK_NFOUND = 0 ;
103  LUnode->PK_NLOST = 0 ;
104  LUnode->lusize = 0 ;
105  LUnode->llen = NULL ;
106  LUnode->lp = NULL ;
107  LUnode->ulen = NULL ;
108  LUnode->up = NULL ;
109  LUnode->ix = NULL ;
110 
111  /* information and messages from each child of node c */
112  if (Sched [c] == myid)
113  {
114  LUnode->Lost = CHOLMOD (malloc) (LUnode->nchild,
115  sizeof (Int), cm) ;
116  LUnode->Lostp = CHOLMOD (malloc) (LUnode->nchild+1,
117  sizeof (Int), cm);
118  MPI (LUnode->Req = CHOLMOD (malloc) (LUnode->nchild,
119  sizeof (MPI_Request), cm)) ;
120  }
121  else
122  {
123  LUnode->Lost = NULL ;
124  LUnode->Lostp = NULL ;
125  MPI (LUnode->Req = NULL) ;
126  }
127 
128  /* no permutation vectors yet */
129  LUnode->Pglobal = NULL ;
130  LUnode->Qglobal = NULL ;
131  LUnode->Plocal = NULL ;
132  LUnode->Qlocal = NULL ;
133  LUnode->Pinv = NULL ;
134  LUnode->Qinv = NULL ;
135 
136  /* no Schur complement yet */
137  LUnode->PK_SSIZE = 0 ;
138  LUnode->PK_SNZ = 0 ;
139  LUnode->PK_SN = 0 ;
140  LUnode->slen = NULL ;
141  LUnode->sp = NULL ;
142  LUnode->sx = NULL ;
143 
144  /* no solution and right-hand-side yet */
145  LUnode->B = NULL ;
146  LUnode->X = NULL ;
147 
148  /* no input matrix yet */
149  LUnode->A = NULL ;
150  LUnode->C = NULL ;
151 
152  /* no workspace yet */
153  LUnode->W2 = NULL ;
154  }
155  }
156 
157  if (cm->status != CHOLMOD_OK)
158  {
159  /* TODO return NULL, and broadcast error to all processes, and */
160  /* free the LU object */
161  PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
162  }
163 
164  PR1 ((Common->file, "proc "ID" LU done\n", myid)) ;
165  return (LU) ;
166 }
167 
168 
169 /* ========================================================================== */
170 /* === paraklete_free_numeric =============================================== */
171 /* ========================================================================== */
172 
173 /* Free the numeric object on all processors */
174 
176 (
177  paraklete_numeric **LUHandle,
178  paraklete_common *Common
179 )
180 {
181  paraklete_numeric *LU ;
182  paraklete_node *LUnode ;
183  cholmod_common *cm ;
184  Int c ;
185 
186  if (LUHandle == NULL)
187  {
188  /* nothing to do */
189  return ;
190  }
191  LU = *LUHandle ;
192  if (LU == NULL)
193  {
194  /* nothing to do */
195  return ;
196  }
197 
198  cm = &(Common->cm) ;
199 
200  /* global P and Q, broadcast to all processors */
201  CHOLMOD (free) (LU->n, sizeof (Int), LU->P, cm) ;
202  CHOLMOD (free) (LU->n, sizeof (Int), LU->Q, cm) ;
203  CHOLMOD (free) (LU->n, sizeof (Int), LU->Pinv, cm) ;
204  CHOLMOD (free) (LU->n, sizeof (Int), LU->Qinv, cm) ;
205  CHOLMOD (free) (LU->n, sizeof (double), LU->W, cm) ;
206  CHOLMOD (free) (LU->n+1, sizeof (Int), LU->Ep2, cm) ;
207  CHOLMOD (free_sparse) (&(LU->E), cm) ;
208 
209  if (LU->LUnode != NULL)
210  {
211  for (c = 0 ; c < LU->ncomponents ; c++)
212  {
213  LUnode = LU->LUnode [c] ;
214  if (LUnode != NULL)
215  {
216  /* solution and right-hand-side at this node */
217  PR2 ((Common->file,"proc "ID" node "ID" free numeric, nk "ID" B %p\n",
218  Common->myid, c, LUnode->nk, (void *) (LUnode->B))) ;
219  CHOLMOD (free) (LUnode->nk, sizeof (double), LUnode->B, cm) ;
220  CHOLMOD (free) (LUnode->PK_NN, sizeof (double), LUnode->X, cm) ;
221 
222  /* LU factors at this node */
223  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->llen, cm) ;
224  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->lp, cm) ;
225  CHOLMOD (free) (LUnode->PK_NN, sizeof (Int), LUnode->ulen, cm) ;
226  CHOLMOD (free) (LUnode->PK_NN, sizeof (Int), LUnode->up, cm) ;
227  CHOLMOD (free) (LUnode->lusize, sizeof (double), LUnode->ix, cm) ;
228  CHOLMOD (free) (LUnode->nchild, sizeof (Int), LUnode->Lost, cm) ;
229  CHOLMOD (free) (LUnode->nchild+1, sizeof (Int),
230  LUnode->Lostp, cm) ;
231  MPI (CHOLMOD (free) (LUnode->nchild,
232  sizeof (MPI_Request), LUnode->Req, cm)) ;
233 
234  /* P and Q at this node */
235  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
236  LUnode->Pglobal, cm) ;
237  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
238  LUnode->Qglobal, cm) ;
239  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
240  LUnode->Plocal, cm) ;
241  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int),
242  LUnode->Qlocal, cm) ;
243  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->Pinv, cm) ;
244  CHOLMOD (free) (LUnode->PK_NPIV, sizeof (Int), LUnode->Qinv, cm) ;
245 
246  /* Schur complement of this node */
247  CHOLMOD (free) (LUnode->PK_SN, sizeof (Int), LUnode->sp, cm) ;
248  CHOLMOD (free) (LUnode->PK_SN, sizeof (Int), LUnode->slen, cm) ;
249  CHOLMOD (free) (LUnode->PK_SSIZE, sizeof (double),
250  LUnode->sx, cm) ;
251 
252  /* input matrix and sum of Schur complements at this node */
253  CHOLMOD (free_sparse) (&(LUnode->A), cm) ;
254  CHOLMOD (free_sparse) (&(LUnode->C), cm) ;
255 
256  CHOLMOD (free) (2*(LUnode->nlost_in), sizeof (Int),
257  LUnode->W2, cm) ;
258 
259  /* free the LUnode itself */
260  CHOLMOD (free) (1, sizeof (paraklete_node), LUnode, cm) ;
261  }
262  }
263  }
264 
265  CHOLMOD (free) (LU->ncomponents, sizeof (paraklete_node *), LU->LUnode, cm) ;
266  *LUHandle = CHOLMOD (free) (1, sizeof (paraklete_numeric), (*LUHandle), cm) ;
267 }
268 
269 
270 /* ========================================================================== */
271 /* === paraklete_permute ==================================================== */
272 /* ========================================================================== */
273 
274 /* E = A(p,q) */
275 
277 (
278  cholmod_sparse *A,
279  paraklete_numeric *LU,
280  paraklete_symbolic *LUsymbolic,
281  paraklete_common *Common
282 )
283 {
284  double *Ax, *Ex ;
285  Int *Ap, *Ai, *Ep2, *Ep, *Ei, *Cperm, *RpermInv, *Rperm ;
286  cholmod_common *cm ;
287  cholmod_sparse *E ;
288  Int n, enz, anz, k, j, p ;
289 
290  cm = &(Common->cm) ;
291  ASSERT (Common->myid == 0) ;
292 
293  Cperm = LUsymbolic->Cperm ;
294  Rperm = LUsymbolic->Rperm ;
295  Rperm = (Rperm == NULL) ? Cperm : Rperm ;
296  RpermInv = LUsymbolic->RpermInv ;
297 
298  n = A->nrow ;
299  ASSERT (n == LUsymbolic->n) ;
300  Ap = A->p ;
301  Ai = A->i ;
302  Ax = A->x ;
303  anz = Ap [n] ;
304  Ep2 = LU->Ep2 ;
305 
306 #ifndef NDEBUG
307  for (k = 0 ; k < n ; k++)
308  {
309  PR3 ((Common->file, "Cperm ("ID") = "ID" ;\n", k+1, Cperm[k]+1));
310  }
311  for (k = 0 ; k < n ; k++)
312  {
313  PR3 ((Common->file, "Rperm ("ID") = "ID" ;\n", k+1, Rperm[k]+1));
314  }
315  for (k = 0 ; k < n ; k++)
316  {
317  PR3 ((Common->file, "RpermInv ("ID") = "ID" ;\n", k+1, RpermInv [k]+1)) ;
318  }
319 #endif
320 
321  E = CHOLMOD (allocate_sparse) (n, n, anz, FALSE, TRUE, 0, TRUE, cm) ;
322 
323  if (cm->status != CHOLMOD_OK)
324  {
325  /* out of memory */
326  PR0 ((Common->file, "failed to allocate E\n")) ;
327  return (NULL) ;
328  }
329 
330  Ep = E->p ;
331  Ei = E->i ;
332  Ex = E->x ;
333  enz = 0 ;
334  for (k = 0 ; k < n ; k++)
335  {
336  /* column j of A becomes column k of E */
337  j = Cperm [k] ;
338  PR1 ((Common->file, "Col "ID" of A becomes col "ID" of E\n", j, k)) ;
339  Ep [k] = enz ;
340  for (p = Ap [j] ; p < Ap [j+1] ; p++)
341  {
342  /* row i = Ai [p] becomes row RpermInv[i] of E */
343  PR2 ((Common->file, " "ID" %g -> "ID"\n",
344  Ai [p], Ax [p], RpermInv [Ai [p]])) ;
345  Ei [enz] = RpermInv [Ai [p]] ;
346  Ex [enz] = Ax [p] ;
347  enz++ ;
348  ASSERT (enz <= anz) ;
349  }
350  }
351  Ep [n] = enz ;
352  DEBUG (CHOLMOD (print_sparse) (E, "E = A(p,p)", cm)) ;
353  CHOLMOD (sort) (E, cm) ;
354 
355  if (cm->status != CHOLMOD_OK)
356  {
357  /* out of memory */
358  PR0 ((Common->file, "out of memory to sort E\n")) ;
359  CHOLMOD (free_sparse) (&E, cm) ;
360  return (NULL) ;
361  }
362 
363  DEBUG (CHOLMOD (print_sparse) (E, "E = A(p,p), sorted", cm)) ;
364  for (k = 0 ; k <= n ; k++)
365  {
366  Ep2 [k] = Ep [k] ;
367  }
368 
369  return (E) ;
370 }
371 
372 
373 /* ========================================================================== */
374 /* === paraklete_factorize ================================================== */
375 /* ========================================================================== */
376 
378 (
379  /* inputs, not modified */
380  cholmod_sparse *A,
381  paraklete_symbolic *LUsymbolic,
382  paraklete_common *Common
383 )
384 {
385  paraklete_numeric *LU ;
386  cholmod_common *cm ;
387  cholmod_sparse *E, *C ;
388  double *Ex, *Cx ;
389  Int *Cperm, *Cn, *Cnz, *Ep, *Ei, *Ep2, *Map, *Cparent, *Rperm,
390  *Cstart, *P, *Q, *Cp, *Ci, *Pc, *Qc, *Pinv, *Qinv, *Sched ;
391  Int cj, i, n, ncomponents, k, p, c, a, cn, k1, k2, kk, cnz,
392  nfound, myid, npiv ;
393  int ok, all_ok ;
394 
395  MPI (MPI_Status ms) ;
396  MPI (MPI_Request req) ;
397 
398  /* TODO: return NULL if any input argument is NULL */
399 
400  Common->status = PK_OK ;
401 
402  /* ---------------------------------------------------------------------- */
403  /* get inputs */
404  /* ---------------------------------------------------------------------- */
405 
406  cm = &(Common->cm) ;
407  cm->status = CHOLMOD_OK ;
408 
409  myid = Common->myid ;
410 
411  PR0 ((Common->file, "proc "ID" start factorize\n", myid)) ;
412 
413  n = LUsymbolic->n ;
414  /*
415  printf (" factorize n "ID" LUsymbolic %p\n", n, (void *) LUsymbolic) ;
416  */
417  ncomponents = LUsymbolic->ncomponents ;
418  Cperm = LUsymbolic->Cperm ;
419  Rperm = LUsymbolic->Rperm ;
420  Rperm = (Rperm == NULL) ? Cperm : Rperm ;
421  Cn = LUsymbolic->Cn ;
422  Cnz = LUsymbolic->Cnz ;
423  Cparent = LUsymbolic->Cparent ;
424  Cstart = LUsymbolic->Cstart ;
425  Sched = LUsymbolic->Sched ;
426 
427  PR0 ((Common->file, "in factor: proc "ID" my_tries "ID"\n", myid, my_tries)) ;
428 
429 #ifndef NDEBUG
430  for (c = 0 ; c < ncomponents ; c++)
431  {
432  PR1 ((Common->file, "proc: "ID" node "ID": "ID" to "ID", parent "ID" Sched "ID"\n",
433  Common->myid, c, Cstart [c], Cstart [c+1]-1, Cparent [c],
434  Sched [c])) ;
435  }
436 #endif
437 
438  /* ---------------------------------------------------------------------- */
439  /* allocate and initialize the global LU factors */
440  /* ---------------------------------------------------------------------- */
441 
442  LU = amesos_paraklete_allocate_numeric (LUsymbolic, Common) ;
443  ok = (LU != NULL) ;
444  PR1 ((Common->file, "factor proc "ID" alloc ok: "ID"\n", myid, ok)) ;
445 
446  /* ---------------------------------------------------------------------- */
447  /* E = A(p,p), with sorted column indices */
448  /* ---------------------------------------------------------------------- */
449 
450  E = NULL ;
451  Ep2 = NULL ;
452  if (ok && myid == 0)
453  {
454  /* only the root constructs the matrix E = A (p,p) */
455  Ep2 = LU->Ep2 ;
456  E = LU->E = paraklete_permute (A, LU, LUsymbolic, Common) ;
457  ok = (E != NULL) ;
458  DEBUG (CHOLMOD (print_sparse) (E, "E = A(p,p) on master node", cm)) ;
459  }
460 
461  /* ---------------------------------------------------------------------- */
462  /* allocate space for submatrices at each node */
463  /* ---------------------------------------------------------------------- */
464 
465  for (c = 0 ; ok && c < ncomponents ; c++)
466  {
467  if (myid == 0 || Sched [c] == myid)
468  {
469  /* Two copies are made, one on the root process and one on the
470  * process that factorizes that submatrix. If the root process
471  * is factorizing the node, then only one copy is made. */
472  C = CHOLMOD (allocate_sparse) (Cn [c], Cn [c], Cnz [c], TRUE, TRUE,
473  0, TRUE, cm) ;
474  LU->LUnode [c]->A = C ;
475  ok = (C != NULL) ;
476  }
477  }
478 
479  /* ---------------------------------------------------------------------- */
480  /* all processes find out if initial allocation fails for any process */
481  /* ---------------------------------------------------------------------- */
482 
483  all_ok = ok ;
484  MPI (MPI_Allreduce (&ok, &all_ok, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD)) ;
485  if (!all_ok)
486  {
487  /* out of memory; inform all processes */
488  PR0 ((Common->file, "proc "ID" all fail factorize\n", Common->myid)) ;
489  Common->status = PK_OUT_OF_MEMORY ;
490  amesos_paraklete_free_numeric (&LU, Common) ;
491  return (NULL) ;
492  }
493 
494  /* ---------------------------------------------------------------------- */
495  /* distribute the permuted matrix to the nodes */
496  /* ---------------------------------------------------------------------- */
497 
498  if (myid == 0)
499  {
500 
501  /* ------------------------------------------------------------------ */
502  /* process 0 partitions the input matrix and sends to all processes */
503  /* ------------------------------------------------------------------ */
504 
505  Ep = E->p ;
506  Ei = E->i ;
507  Ex = E->x ;
508 
509 #ifndef NDEBUG
510  for (k = 0 ; k < n ; k++)
511  {
512  for (p = Ep [k] ; p < Ep [k+1] ; p++)
513  {
514  PR3 ((Common->file, "E ("ID","ID") = %g ;\n", 1+Ei[p], 1+k, Ex[p]));
515  }
516  }
517  for (c = 0 ; c <= ncomponents ; c++)
518  {
519  PR3 ((Common->file, "Cstart ("ID") = "ID" ;\n", 1+c, 1+Cstart [c])) ;
520  }
521 #endif
522 
523  Map = cm->Iwork ;
524 
525  for (c = 0 ; c < ncomponents ; c++)
526  {
527  PR1 ((Common->file, "distribute node c = "ID"\n", c)) ;
528  cn = Cn [c] ;
529 
530  /* find mapping of global rows/columns to node c's local indices */
531  cj = 0 ;
532  for (a = c ; a != EMPTY ; a = Cparent [a])
533  {
534  PR2 ((Common->file, "ancestor "ID", for c "ID", ncomp "ID"\n",
535  a, c, ncomponents)) ;
536  ASSERT (a >= c && a < ncomponents) ;
537  k1 = Cstart [a] ;
538  k2 = Cstart [a+1] ;
539  PR2 ((Common->file, "k1 "ID" k2 "ID"\n", k1, k2)) ;
540  for (k = k1 ; k < k2 ; k++)
541  {
542  /* global index k becomes local index j */
543  PR3 ((Common->file, " global: "ID" local "ID"\n", k, cj)) ;
544  Map [k] = cj++ ;
545  }
546  }
547  ASSERT (cj == cn) ;
548 
549  /* get the local matrix for node c */
550  C = LU->LUnode [c]->A ;
551  Cp = C->p ;
552  Ci = C->i ;
553  Cx = C->x ;
554 
555  cj = 0 ;
556  cnz = 0 ;
557 
558  /* create columns 0:(k2-k1-1) of C, containing candidate pivot
559  * columns for node c */
560  k1 = Cstart [c] ;
561  k2 = Cstart [c+1] ;
562  PR2 ((Common->file, "c: "ID" k1 to k2-1: "ID" "ID"\n", c, k1, k2-1)) ;
563  for (k = k1 ; k < k2 ; k++)
564  {
565  Cp [cj++] = cnz ;
566  for (p = Ep2 [k] ; p < Ep [k+1] ; p++)
567  {
568  i = Ei [p] ;
569  ASSERT (Ei [p] >= k1) ;
570  PR3 ((Common->file,
571  "E(1+"ID",1+"ID") = %g ; ce(1+"ID",1+"ID") = "ID"\n",
572  i,k, Ex[p], i,k,c)) ;
573  Ci [cnz] = Map [i] ;
574  Cx [cnz] = Ex [p] ;
575  cnz++ ;
576  ASSERT (cnz <= Cnz [c]) ;
577  }
578  }
579 
580  /* create columns for the ancestors of C. These will become columns
581  * of U2 and S for node c. */
582  for (a = Cparent [c] ; a != EMPTY ; a = Cparent [a])
583  {
584  PR2 ((Common->file, "ancestor "ID"\n", a)) ;
585  PR2 ((Common->file, "k1 "ID" k2 "ID"\n", Cstart [a], Cstart [a+1]));
586  for (k = Cstart [a] ; k < Cstart [a+1] ; k++)
587  {
588  Cp [cj++] = cnz ;
589  ASSERT (cj <= cn) ;
590  for (p = Ep2 [k] ; p < Ep [k+1] ; p++)
591  {
592  i = Ei [p] ;
593  ASSERT (i >= k1) ;
594  if (i >= k2)
595  {
596  /* only get rows Cstart [c] to Cstart [c+1]-1 */
597  break ;
598  }
599  PR3 ((Common->file,
600  "E(1+"ID",1+"ID") = %g ; ce(1+"ID",1+"ID") = "ID"\n",
601  i,k, Ex[p], i,k,c)) ;
602  Ci [cnz] = Map [i] ;
603  Cx [cnz] = Ex [p] ;
604  cnz++ ;
605  ASSERT (cnz <= Cnz [c]) ;
606  }
607  /* next component will start here */
608  Ep2 [k] = p ;
609  }
610  }
611  ASSERT (cj == cn) ;
612  ASSERT (cnz == Cnz [c]) ;
613  Cp [cn] = cnz ;
614 
615  /* place matrix C in node c of the tree */
616  PR2 ((Common->file, "node: "ID" sched: "ID"\n", c, Sched [c])) ;
617 
618  if (Sched [c] != myid)
619  {
620  /* send this matrix to the process that owns node c.
621  Note that the Isend requests are immediately free'd,
622  because the sends are synchronized with an barrier,
623  later on. */
624 
625  /*
626  if (cnz == 4040)
627  {
628  Int k3 = 1084 ;
629  fprintf (Common->file,
630  "sending "ID": ["ID" %g]\n", k3, Ci [k3], Cx [k3]) ;
631  }
632  */
633 
634  PR2 ((Common->file, "n "ID" nz "ID"\n", cn, cnz)) ;
635  DEBUG (CHOLMOD (print_sparse) (C, "send C to a node", cm)) ;
636 
637  MPI (MPI_Isend (Cp, cn+1, MPI_Int, Sched [c],
638  TAG0, MPI_COMM_WORLD, &req)) ;
639  MPI (MPI_Request_free (&req)) ;
640  MPI (MPI_Isend (Ci, cnz, MPI_Int, Sched [c],
641  TAG0, MPI_COMM_WORLD, &req)) ;
642  MPI (MPI_Request_free (&req)) ;
643  MPI (MPI_Isend (Cx, cnz, MPI_DOUBLE, Sched [c],
644  TAG0, MPI_COMM_WORLD, &req)) ;
645  MPI (MPI_Request_free (&req)) ;
646  }
647  }
648 
649  }
650  else
651  {
652 
653  /* ------------------------------------------------------------------ */
654  /* all other processes acquire their input matrix from process 0 */
655  /* ------------------------------------------------------------------ */
656 
657  for (c = 0 ; c < ncomponents ; c++)
658  {
659  if (Sched [c] == myid)
660  {
661  /*
662  {
663  Int *Mi = LU->LUnode [c]->A->i ;
664  double *Mx = LU->LUnode [c]->A->x ;
665  Mi [1084] = -9999999 ;
666  Mx [1084] = -9999999 ;
667  fprintf (Common->file,
668  "pre ["ID" %g]\n", Mi [1084], Mx [1084]) ;
669  }
670  */
671 
672  MPI (MPI_Recv (LU->LUnode [c]->A->p, Cn [c] +1, MPI_Int, 0,
673  TAG0, MPI_COMM_WORLD, &ms)) ;
674  MPI (MPI_Recv (LU->LUnode [c]->A->i, Cnz [c], MPI_Int, 0,
675  TAG0, MPI_COMM_WORLD, &ms)) ;
676  MPI (MPI_Recv (LU->LUnode [c]->A->x, Cnz [c], MPI_DOUBLE, 0,
677  TAG0, MPI_COMM_WORLD, &ms)) ;
678 
679  /*
680  {
681  Int *Mi = LU->LUnode [c]->A->i ;
682  double *Mx = LU->LUnode [c]->A->x ;
683  char *CC = (char *) (LU->LUnode [c]->A->x) ;
684  Int k3 = 1084 ;
685  fprintf (Common->file,
686  "got "ID" ["ID" %g]\n", k3, Mi [k3], Mx [k3]) ;
687  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3])) ;
688  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+1])) ;
689  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+2])) ;
690  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+3])) ;
691  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+4])) ;
692  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+5])) ;
693  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+6])) ;
694  fprintf (Common->file, "byte "ID"\n", (Int) (CC [8*k3+7])) ;
695  }
696  */
697 
698  PR1 ((Common->file, "proc "ID" Node "ID" got orig:\n", myid, c));
699  PR2 ((Common->file, "n "ID" nz "ID"\n", Cn [c], Cnz [c])) ;
700  DEBUG (CHOLMOD (print_sparse) (LU->LUnode [c]->A, "got A", cm)) ;
701 #ifndef NDEBUG
702  /* {
703  Int *Mp, *Mi, j ;
704  double *Mx ;
705  Mp = LU->LUnode [c]->A->p ;
706  Mi = LU->LUnode [c]->A->i ;
707  Mx = LU->LUnode [c]->A->x ;
708  for (j = 0 ; j < Cn [c] ; j++)
709  {
710  fprintf (Common->file, "my own col "ID"\n", j) ;
711  for (k = Mp [j] ; k < Mp [j+1] ; k++)
712  {
713  Int ii = Mi [k] ;
714  double x = Mx [k] ;
715  fprintf (Common->file, " is: "ID" %g\n", ii, x) ;
716  }
717  }
718  k = 0 ;
719  } */
720 #endif
721  }
722  }
723  }
724 
725  /* ---------------------------------------------------------------------- */
726  /* free temporary copy of A(p,p) */
727  /* ---------------------------------------------------------------------- */
728 
729  /* only the root needs to do this */
730  if (myid == 0)
731  {
732  LU->Ep2 = CHOLMOD (free) (n+1, sizeof (Int), LU->Ep2, cm) ;
733  CHOLMOD (free_sparse) (&(LU->E), cm) ;
734  }
735 
736  /* process 0 frees LUnode [c]->A, but only when recvd by c */
737  MPI (MPI_Barrier (MPI_COMM_WORLD)) ;
738  PR1 ((Common->file, "proc "ID" everybody OK so far2\n", Common->myid)) ;
739  if (myid == 0)
740  {
741  for (c = 0 ; c < ncomponents ; c++)
742  {
743  if (Sched [c] != 0)
744  {
745  /* root process no longer needs submatrices sent to others */
746  CHOLMOD (free_sparse) (&(LU->LUnode [c]->A), cm) ;
747  }
748  }
749  }
750 
751 #ifndef NDEBUG
752  for (c = 0 ; c < ncomponents ; c++)
753  {
754  if (Sched [c] == myid)
755  {
756  PR1 ((Common->file, "proc "ID" Node "ID" original matrix:\n", myid, c));
757  DEBUG (CHOLMOD (print_sparse) (LU->LUnode [c]->A, "A", cm)) ;
758  }
759  }
760 #endif
761 
762  /* ---------------------------------------------------------------------- */
763  /* factorize each node */
764  /* ---------------------------------------------------------------------- */
765 
766  ok = TRUE ;
767  for (c = 0 ; c < ncomponents ; c++)
768  {
769  if (Sched [c] == myid)
770  {
771  PR1 ((Common->file, "proc "ID" doing node "ID"\n", myid, c)) ;
772  if (!amesos_paraklete_factorize_node (c, LU, LUsymbolic, Common))
773  {
774  ok = FALSE ;
775  }
776  }
777  }
778 
779  /* ---------------------------------------------------------------------- */
780  /* determine global ordering, including Cperm and numerical pivoting */
781  /* ---------------------------------------------------------------------- */
782 
783  P = LU->P ;
784  Q = LU->Q ;
785  kk = 0 ;
786  Common->status = TRUE ;
787  for (c = 0 ; c < ncomponents ; c++)
788  {
789  /* Give all processors nfound and npiv for each node of the tree.
790  * This also allows us to determine if the matrix is singular, or if
791  * any process ran out of memory. */
792  MPI (MPI_Bcast (LU->LUnode [c]->header, PK_HEADER, MPI_Int,
793  Sched [c], MPI_COMM_WORLD)) ;
794  kk += LU->LUnode [c]->PK_NFOUND ;
795  Common->status = MIN (Common->status, LU->LUnode [c]->PK_STATUS) ;
796  }
797 
798  if (kk < n)
799  {
800  PR0 ((Common->file, "proc "ID" Singular: "ID" of "ID"\n", myid, kk, n)) ;
801  /*
802  printf ("proc "ID" Singular: "ID" of "ID"\n", myid, kk, n) ;
803  */
804  Common->status = PK_SINGULAR ;
805  }
806 
807  if (Common->status != PK_OK)
808  {
809  /* out of memory, singular matrix, or unknown error */
810  amesos_paraklete_free_numeric (&LU, Common) ;
811  return (NULL) ;
812  }
813 
814  /* all processes allocate space for the global permutation */
815  for (c = 0 ; c < ncomponents ; c++)
816  {
817  npiv = LU->LUnode [c]->PK_NPIV ;
818  if (LU->LUnode [c]->Pglobal == NULL)
819  {
820  LU->LUnode [c]->Pglobal = CHOLMOD (malloc) (npiv, sizeof (Int), cm) ;
821  LU->LUnode [c]->Qglobal = CHOLMOD (malloc) (npiv, sizeof (Int), cm) ;
822  }
823  }
824 
825  ok = ok && (kk == n) && (cm->status == CHOLMOD_OK) ;
826 
827  /* check to see if all processes had space for P and Q */
828  PR1 ((Common->file, "proc "ID" here in PQ: ok "ID"\n", Common->myid, ok)) ;
829  all_ok = ok ;
830  MPI (MPI_Allreduce (&ok, &all_ok, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD)) ;
831  if (!all_ok)
832  {
833  /* TODO return NULL, broadcast error to all processes, and free LU */
834  PARAKLETE_ERROR (PK_OUT_OF_MEMORY, "out of memory") ;
835  }
836 
837  kk = 0 ;
838  for (c = 0 ; c < ncomponents ; c++)
839  {
840  npiv = LU->LUnode [c]->PK_NPIV ;
841 
842  /* give Pglobal and Qglobal to all processors */
843  MPI (MPI_Bcast (LU->LUnode [c]->Pglobal, npiv, MPI_Int, Sched [c],
844  MPI_COMM_WORLD)) ;
845  MPI (MPI_Bcast (LU->LUnode [c]->Qglobal, npiv, MPI_Int, Sched [c],
846  MPI_COMM_WORLD)) ;
847 
848  Pc = LU->LUnode [c]->Pglobal ;
849  Qc = LU->LUnode [c]->Qglobal ;
850 
851  nfound = LU->LUnode [c]->PK_NFOUND ;
852  for (k = 0 ; k < nfound ; k++)
853  {
854  /* row Pc[k] and column Qc[k] of E are the kth pivot row/column of
855  * node c. They are the kk-th pivot row/column of the global LU
856  * factors. */
857  P [kk] = Rperm [Pc [k]] ;
858  Q [kk] = Cperm [Qc [k]] ;
859  kk++ ;
860  }
861  }
862  ASSERT (kk == n) ;
863 
864  /* compute Pinv and Qinv. TODO: this is not needed */
865  Pinv = LU->Pinv ;
866  Qinv = LU->Qinv ;
867  for (k = 0 ; k < n ; k++)
868  {
869  Pinv [P [k]] = k ;
870  Qinv [Q [k]] = k ;
871  }
872 
873 #ifndef NDEBUG
874 
875  /* ---------------------------------------------------------------------- */
876  /* dump the matrix */
877  /* ---------------------------------------------------------------------- */
878 
879  if (Common->dump > 1)
880  {
881 
882  if (myid == 0)
883  {
884  Int *Ap, *Ai ;
885  double *Ax ;
886  Int j ;
887 
888  Ap = A->p ;
889  Ai = A->i ;
890  Ax = A->x ;
891 
892  for (k = 0 ; k < n ; k++) printf ("P (1+"ID") = "ID" ;\n", k, P [k]) ;
893  for (k = 0 ; k < n ; k++) printf ("Q (1+"ID") = "ID" ;\n", k, Q [k]) ;
894 
895  printf ("P = 1+P ;\n") ;
896  printf ("Q = 1+Q ;\n") ;
897 
898  for (j = 0 ; j < n ; j++)
899  {
900  for (p = Ap [j] ; p < Ap [j+1] ; p++)
901  {
902  printf ("A (1+"ID",1+"ID") = %.16g ;\n", Ai [p], j, Ax [p]) ;
903  }
904  }
905  }
906 
907  for (c = 0 ; c < ncomponents ; c++)
908  {
909  paraklete_node *LUnode ;
910  Int *Lip, *Llen, *Li, *Uip, *Ulen, *Ui ;
911  double *LUix, *Lx, *Ux ;
912  Int llen, ulen, j ;
913 
914  MPI (MPI_Barrier (MPI_COMM_WORLD)) ;
915  if (Sched [c] != myid) continue ;
916  printf ("\n%% ---- node "ID"\n", c) ;
917 
918  /* dump L */
919  LUnode = LU->LUnode [c] ;
920  Lip = LUnode->lp ;
921  Llen = LUnode->llen ;
922  LUix = LUnode->ix ;
923  nfound = LUnode->PK_NFOUND ;
924  for (j = 0 ; j < nfound ; j++)
925  {
926  GET_COLUMN (Lip, Llen, LUix, j, Li, Lx, llen) ;
927  printf ("\nL (1+"ID",1+"ID") = 1 ;\n", j, j) ;
928  for (p = 1 ; p < llen ; p++)
929  {
930  printf ("L (1+"ID",1+"ID") = %.16g ;\n", Li [p], j, Lx [p]) ;
931  }
932  }
933 
934  /* dump U */
935  cn = LUnode->PK_NN ;
936  Uip = LUnode->up ;
937  Ulen = LUnode->ulen ;
938  for (j = cn-1 ; j >= nfound ; j--)
939  {
940  printf ("\n") ;
941  GET_COLUMN (Uip, Ulen, LUix, j, Ui, Ux, ulen) ;
942  for (p = 0 ; p < ulen ; p++)
943  {
944  printf ("U (1+"ID",1+"ID") = %.16g ;\n", Ui [p], j, Ux [p]) ;
945  }
946  }
947  for ( ; j >= 0 ; j--)
948  {
949  GET_COLUMN (Uip, Ulen, LUix, j, Ui, Ux, ulen) ;
950  printf ("\nU (1+"ID",1+"ID") = %.16g ; %% pivot\n",
951  j, j, Ux [ulen-1]) ;
952  for (p = 0 ; p < ulen-1 ; p++)
953  {
954  printf ("U (1+"ID",1+"ID") = %.16g ;\n", Ui [p], j, Ux [p]) ;
955  }
956  }
957  }
958 
959  if (Common->nproc == 1 && Common->dump > 1)
960  {
961  printf ("norm (L*U-A(P,Q))\n") ;
962  }
963  }
964 #endif
965 
966  return (LU) ;
967 }
#define TAG0
#define MPI_Int
#define EMPTY
#define Int
static paraklete_numeric * amesos_paraklete_allocate_numeric(paraklete_symbolic *LUsymbolic, paraklete_common *Common)
#define FALSE
#define PR0(params)
#define PK_HEADER
struct paraklete_numeric_struct paraklete_numeric
#define P(k)
#define PARAKLETE_ERROR(status, message)
#define CHOLMOD(name)
Int amesos_paraklete_factorize_node(Int c, paraklete_numeric *LU, paraklete_symbolic *LUsymbolic, paraklete_common *Common)
#define PK_SINGULAR
int CHOLMOD() print_sparse(cholmod_sparse *A, char *name, cholmod_common *Common)
#define PK_OK
int CHOLMOD() sort(cholmod_sparse *A, cholmod_common *Common)
#define PR2(params)
int CHOLMOD() free_sparse(cholmod_sparse **AHandle, cholmod_common *Common)
#define GET_COLUMN(Ap, Anz, Aix, j, Ai, Ax, len)
void amesos_paraklete_free_numeric(paraklete_numeric **LUHandle, paraklete_common *Common)
#define NULL
#define ASSERT(expression)
#define ID
paraklete_numeric * amesos_paraklete_factorize(cholmod_sparse *A, paraklete_symbolic *LUsymbolic, paraklete_common *Common)
static cholmod_sparse * paraklete_permute(cholmod_sparse *A, paraklete_numeric *LU, paraklete_symbolic *LUsymbolic, paraklete_common *Common)
#define PR1(params)
#define PR3(params)
#define CHOLMOD_OK
#define PK_OUT_OF_MEMORY
#define MPI(statement)
cholmod_sparse *CHOLMOD() allocate_sparse(size_t nrow, size_t ncol, size_t nzmax, int sorted, int packed, int stype, int xtype, cholmod_common *Common)
struct paraklete_node_struct paraklete_node
#define DEBUG(statement)
Int my_tries
#define MIN(a, b)
#define PARAKLETE_MAGIC
int n
#define TRUE
#define PK_UNKNOWN