IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
SubdomainGraph_dh.c
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "SubdomainGraph_dh.h"
44 #include "getRow_dh.h"
45 #include "Mem_dh.h"
46 #include "Parser_dh.h"
47 #include "Hash_i_dh.h"
48 #include "mat_dh_private.h"
49 #include "io_dh.h"
50 #include "SortedSet_dh.h"
51 #include "shellSort_dh.h"
52 
53 
54 /* for debugging only! */
55 #include <unistd.h>
56 
57 static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj,
58  void *A);
59 static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj,
60  void *A);
61 /*
62 static void partition_metis_private(SubdomainGraph_dh s, void *A);
63 
64  grep for same below!
65 */
66 static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m,
67  bool bj);
68 static void form_subdomaingraph_mpi_private (SubdomainGraph_dh s);
69 static void form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m,
70  void *A);
71 static void find_all_neighbors_sym_private (SubdomainGraph_dh s, int m,
72  void *A);
73 static void find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m,
74  void *A);
75 static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
76  int *interiorNodes, int *bdryNodes,
77  int *interiorCount, int *bdryCount);
78 static void find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m,
79  void *A, int *interiorNodes,
80  int *bdryNodes, int *interiorCount,
81  int *bdryCount);
82 
83 static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A);
84  /* above also forms n2o[] and o2n[] */
85 
86 static void find_ordered_neighbors_private (SubdomainGraph_dh s);
87 static void color_subdomain_graph_private (SubdomainGraph_dh s);
88 static void adjust_matrix_perms_private (SubdomainGraph_dh s, int m);
89 
90 #undef __FUNC__
91 #define __FUNC__ "SubdomainGraph_dhCreate"
92 void
93 SubdomainGraph_dhCreate (SubdomainGraph_dh * s)
94 {
95  START_FUNC_DH
96  struct _subdomain_dh *tmp =
97  (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh));
98  CHECK_V_ERROR;
99  *s = tmp;
100 
101  tmp->blocks = 1;
102  tmp->ptrs = tmp->adj = NULL;
103  tmp->colors = 1;
104  tmp->colorVec = NULL;
105  tmp->o2n_sub = tmp->n2o_sub = NULL;
106  tmp->beg_row = tmp->beg_rowP = NULL;
107  tmp->bdry_count = tmp->row_count = NULL;
108  tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL;
109  tmp->loCount = tmp->hiCount = tmp->allCount = 0;
110 
111  tmp->m = 0;
112  tmp->n2o_row = tmp->o2n_col = NULL;
113  tmp->o2n_ext = tmp->n2o_ext = NULL;
114 
115  tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor");
116  tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph");
117  {
118  int i;
119  for (i = 0; i < TIMING_BINS_SG; ++i)
120  tmp->timing[i] = 0.0;
121  }
122 END_FUNC_DH}
123 
124 #undef __FUNC__
125 #define __FUNC__ "SubdomainGraph_dhDestroy"
126 void
127 SubdomainGraph_dhDestroy (SubdomainGraph_dh s)
128 {
129  START_FUNC_DH if (s->ptrs != NULL)
130  {
131  FREE_DH (s->ptrs);
132  CHECK_V_ERROR;
133  }
134  if (s->adj != NULL)
135  {
136  FREE_DH (s->adj);
137  CHECK_V_ERROR;
138  }
139  if (s->colorVec != NULL)
140  {
141  FREE_DH (s->colorVec);
142  CHECK_V_ERROR;
143  }
144  if (s->o2n_sub != NULL)
145  {
146  FREE_DH (s->o2n_sub);
147  CHECK_V_ERROR;
148  }
149  if (s->n2o_sub != NULL)
150  {
151  FREE_DH (s->n2o_sub);
152  CHECK_V_ERROR;
153  }
154 
155  if (s->beg_row != NULL)
156  {
157  FREE_DH (s->beg_row);
158  CHECK_V_ERROR;
159  }
160  if (s->beg_rowP != NULL)
161  {
162  FREE_DH (s->beg_rowP);
163  CHECK_V_ERROR;
164  }
165  if (s->row_count != NULL)
166  {
167  FREE_DH (s->row_count);
168  CHECK_V_ERROR;
169  }
170  if (s->bdry_count != NULL)
171  {
172  FREE_DH (s->bdry_count);
173  CHECK_V_ERROR;
174  }
175  if (s->loNabors != NULL)
176  {
177  FREE_DH (s->loNabors);
178  CHECK_V_ERROR;
179  }
180  if (s->hiNabors != NULL)
181  {
182  FREE_DH (s->hiNabors);
183  CHECK_V_ERROR;
184  }
185  if (s->allNabors != NULL)
186  {
187  FREE_DH (s->allNabors);
188  CHECK_V_ERROR;
189  }
190 
191  if (s->n2o_row != NULL)
192  {
193  FREE_DH (s->n2o_row);
194  CHECK_V_ERROR;
195  }
196  if (s->o2n_col != NULL)
197  {
198  FREE_DH (s->o2n_col);
199  CHECK_V_ERROR;
200  }
201  if (s->o2n_ext != NULL)
202  {
203  Hash_i_dhDestroy (s->o2n_ext);
204  CHECK_V_ERROR;
205  }
206  if (s->n2o_ext != NULL)
207  {
208  Hash_i_dhDestroy (s->n2o_ext);
209  CHECK_V_ERROR;
210  }
211  FREE_DH (s);
212  CHECK_V_ERROR;
213 END_FUNC_DH}
214 
215 #undef __FUNC__
216 #define __FUNC__ "SubdomainGraph_dhInit"
217 void
218 SubdomainGraph_dhInit (SubdomainGraph_dh s, int blocks, bool bj, void *A)
219 {
220  START_FUNC_DH double t1 = MPI_Wtime ();
221 
222  if (blocks < 1)
223  blocks = 1;
224 
225  if (np_dh == 1 || blocks > 1)
226  {
227  s->blocks = blocks;
228  init_seq_private (s, blocks, bj, A);
229  CHECK_V_ERROR;
230  }
231  else
232  {
233  s->blocks = np_dh;
234  init_mpi_private (s, np_dh, bj, A);
235  CHECK_V_ERROR;
236  }
237 
238  s->timing[TOTAL_SGT] += (MPI_Wtime () - t1);
239 END_FUNC_DH}
240 
241 
242 #undef __FUNC__
243 #define __FUNC__ "SubdomainGraph_dhFindOwner"
244 int
245 SubdomainGraph_dhFindOwner (SubdomainGraph_dh s, int idx, bool permuted)
246 {
247  START_FUNC_DH int sd;
248  int *beg_row = s->beg_row;
249  int *row_count = s->row_count;
250  int owner = -1, blocks = s->blocks;
251 
252  if (permuted)
253  beg_row = s->beg_rowP;
254 
255  /* determine the subdomain that contains "idx" */
256  for (sd = 0; sd < blocks; ++sd)
257  {
258  if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd])
259  {
260  owner = sd;
261  break;
262  }
263  }
264 
265  if (owner == -1)
266  {
267 
268  fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx);
269  fprintf (stderr, "blocks= %i\n", blocks);
270 
271  sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx);
272  SET_ERROR (-1, msgBuf_dh);
273  }
274 
275 END_FUNC_VAL (owner)}
276 
277 
278 #undef __FUNC__
279 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong"
280 void
281 SubdomainGraph_dhPrintStatsLong (SubdomainGraph_dh s, FILE * fp)
282 {
283  START_FUNC_DH int i, j, k;
284  double max = 0, min = INT_MAX;
285 
286  fprintf (fp,
287  "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n");
288  fprintf (fp, "colors used = %i\n", s->colors);
289  fprintf (fp, "subdomain count = %i\n", s->blocks);
290 
291 
292  fprintf (fp, "\ninterior/boundary node ratios:\n");
293 
294  for (i = 0; i < s->blocks; ++i)
295  {
296  int inNodes = s->row_count[i] - s->bdry_count[i];
297  int bdNodes = s->bdry_count[i];
298  double ratio;
299 
300  if (bdNodes == 0)
301  {
302  ratio = -1;
303  }
304  else
305  {
306  ratio = (double) inNodes / (double) bdNodes;
307  }
308 
309  max = MAX (max, ratio);
310  min = MIN (min, ratio);
311  fprintf (fp,
312  " P_%i: first= %3i rowCount= %3i interior= %3i bdry= %3i ratio= %0.1f\n",
313  i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes,
314  ratio);
315  }
316 
317 
318  fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max);
319  fprintf (fp, "min interior/bdry ratio = %.1f\n", min);
320 
321 
322  /*-----------------------------------------
323  * subdomain graph
324  *-----------------------------------------*/
325  if (s->adj != NULL)
326  {
327  fprintf (fp, "\nunpermuted subdomain graph: \n");
328  for (i = 0; i < s->blocks; ++i)
329  {
330  fprintf (fp, "%i :: ", i);
331  for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
332  {
333  fprintf (fp, "%i ", s->adj[j]);
334  }
335  fprintf (fp, "\n");
336  }
337  }
338 
339 
340  /*-----------------------------------------
341  * subdomain permutation
342  *-----------------------------------------*/
343  fprintf (fp, "\no2n subdomain permutation:\n");
344  for (i = 0; i < s->blocks; ++i)
345  {
346  fprintf (fp, " %i %i\n", i, s->o2n_sub[i]);
347  }
348  fprintf (fp, "\n");
349 
350  if (np_dh > 1)
351  {
352 
353  /*-----------------------------------------
354  * local n2o_row permutation
355  *-----------------------------------------*/
356  fprintf (fp, "\nlocal n2o_row permutation:\n ");
357  for (i = 0; i < s->row_count[myid_dh]; ++i)
358  {
359  fprintf (fp, "%i ", s->n2o_row[i]);
360  }
361  fprintf (fp, "\n");
362 
363  /*-----------------------------------------
364  * local n2o permutation
365  *-----------------------------------------*/
366  fprintf (fp, "\nlocal o2n_col permutation:\n ");
367  for (i = 0; i < s->row_count[myid_dh]; ++i)
368  {
369  fprintf (fp, "%i ", s->o2n_col[i]);
370  }
371  fprintf (fp, "\n");
372 
373  }
374  else
375  {
376  /*-----------------------------------------
377  * local n2o_row permutation
378  *-----------------------------------------*/
379  fprintf (fp, "\nlocal n2o_row permutation:\n");
380  fprintf (fp, "--------------------------\n");
381  for (k = 0; k < s->blocks; ++k)
382  {
383  int beg_row = s->beg_row[k];
384  int end_row = beg_row + s->row_count[k];
385 
386  for (i = beg_row; i < end_row; ++i)
387  {
388  fprintf (fp, "%i ", s->n2o_row[i]);
389  }
390  fprintf (fp, "\n");
391  }
392 
393  /*-----------------------------------------
394  * local n2o permutation
395  *-----------------------------------------*/
396  fprintf (fp, "\nlocal o2n_col permutation:\n");
397  fprintf (fp, "--------------------------\n");
398  for (k = 0; k < s->blocks; ++k)
399  {
400  int beg_row = s->beg_row[k];
401  int end_row = beg_row + s->row_count[k];
402 
403  for (i = beg_row; i < end_row; ++i)
404  {
405  fprintf (fp, "%i ", s->o2n_col[i]);
406  }
407  fprintf (fp, "\n");
408  }
409 
410 
411  }
412 
413 END_FUNC_DH}
414 
415 #undef __FUNC__
416 #define __FUNC__ "init_seq_private"
417 void
418 init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
419 {
420  START_FUNC_DH int m, n, beg_row;
421  double t1;
422 
423  /*-------------------------------------------------------
424  * get number of local rows (m), global rows (n), and
425  * global numbering of first locally owned row
426  * (for sequential, beg_row=0 and m == n
427  *-------------------------------------------------------*/
428  EuclidGetDimensions (A, &beg_row, &m, &n);
429  CHECK_V_ERROR;
430  s->m = n;
431 
432  /*-------------------------------------------------------
433  * allocate storage for all data structures
434  * EXCEPT s->adj and hash tables.
435  * (but note that hash tables aren't used for sequential)
436  *-------------------------------------------------------*/
437  allocate_storage_private (s, blocks, m, bj);
438  CHECK_V_ERROR;
439 
440  /*-------------------------------------------------------------
441  * Fill in: beg_row[]
442  * beg_rowP[]
443  * row_count[]
444  * At this point, beg_rowP[] is a copy of beg_row[])
445  *-------------------------------------------------------------*/
446  {
447  int i;
448  int rpp = m / blocks;
449 
450  if (rpp * blocks < m)
451  ++rpp;
452 
453  s->beg_row[0] = 0;
454  for (i = 1; i < blocks; ++i)
455  s->beg_row[i] = rpp + s->beg_row[i - 1];
456  for (i = 0; i < blocks; ++i)
457  s->row_count[i] = rpp;
458  s->row_count[blocks - 1] = m - rpp * (blocks - 1);
459  }
460  memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int));
461 
462 
463  /*-----------------------------------------------------------------
464  * Find all neighboring processors in subdomain graph.
465  * This block fills in: allNabors[]
466  *-----------------------------------------------------------------*/
467  /* NA for sequential! */
468 
469 
470  /*-------------------------------------------------------
471  * Count number of interior nodes for each subdomain;
472  * also, form permutation vector to order boundary
473  * nodes last in each subdomain.
474  * This block fills in: bdry_count[]
475  * n2o_col[]
476  * o2n_row[]
477  *-------------------------------------------------------*/
478  t1 = MPI_Wtime ();
479  if (!bj)
480  {
481  find_bdry_nodes_seq_private (s, m, A);
482  CHECK_V_ERROR;
483  }
484  else
485  {
486  int i;
487  for (i = 0; i < m; ++i)
488  {
489  s->n2o_row[i] = i;
490  s->o2n_col[i] = i;
491  }
492  }
493  s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
494 
495  /*-------------------------------------------------------
496  * Form subdomain graph,
497  * then color and reorder subdomain graph.
498  * This block fills in: ptr[]
499  * adj[]
500  * o2n_sub[]
501  * n2o_sub[]
502  * beg_rowP[]
503  *-------------------------------------------------------*/
504  t1 = MPI_Wtime ();
505  if (!bj)
506  {
507  form_subdomaingraph_seq_private (s, m, A);
508  CHECK_V_ERROR;
509  if (s->doNotColor)
510  {
511  int i;
512  printf_dh ("subdomain coloring and reordering is OFF\n");
513  for (i = 0; i < blocks; ++i)
514  {
515  s->o2n_sub[i] = i;
516  s->n2o_sub[i] = i;
517  s->colorVec[i] = 0;
518  }
519  }
520  else
521  {
522  SET_INFO ("subdomain coloring and reordering is ON");
523  color_subdomain_graph_private (s);
524  CHECK_V_ERROR;
525  }
526  }
527 
528  /* bj setup */
529  else
530  {
531  int i;
532  for (i = 0; i < blocks; ++i)
533  {
534  s->o2n_sub[i] = i;
535  s->n2o_sub[i] = i;
536  }
537  }
538  s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
539 
540  /*-------------------------------------------------------
541  * Here's a step we DON'T do for the parallel case:
542  * we need to adjust the matrix row and column perms
543  * to reflect subdomain reordering (for the parallel
544  * case, these permutation vectors are purely local and
545  * zero-based)
546  *-------------------------------------------------------*/
547  if (!bj)
548  {
549  adjust_matrix_perms_private (s, m);
550  CHECK_V_ERROR;
551  }
552 
553  /*-------------------------------------------------------
554  * Build lists of lower and higher ordered neighbors.
555  * This block fills in: loNabors[]
556  * hiNabors[]
557  *-------------------------------------------------------*/
558  /* NA for sequential */
559 
560  /*-------------------------------------------------------
561  * Exchange boundary node permutation information with
562  * neighboring processors in the subdomain graph.
563  * This block fills in: o2n_ext (hash table)
564  * n2o_ext (hash table)
565  *-------------------------------------------------------*/
566  /* NA for sequential */
567 
568 
569 END_FUNC_DH}
570 
571 
572 #if 0
573 #undef __FUNC__
574 #define __FUNC__ "partition_metis_private"
575 void
576 partition_metis_private (SubdomainGraph_dh s, void *A)
577 {
578  START_FUNC_DH if (ignoreMe)
579  SET_V_ERROR ("not implemented");
580 END_FUNC_DH}
581 #endif
582 
583 #undef __FUNC__
584 #define __FUNC__ "allocate_storage_private"
585 void
586 allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, bool bj)
587 {
588  START_FUNC_DH if (!bj)
589  {
590  s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int));
591  CHECK_V_ERROR;
592  s->ptrs[0] = 0;
593  s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int));
594  CHECK_V_ERROR;
595  s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
596  CHECK_V_ERROR;
597  s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
598  CHECK_V_ERROR;
599  s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
600  CHECK_V_ERROR;
601  }
602 
603  s->n2o_row = (int *) MALLOC_DH (m * sizeof (int));
604  CHECK_V_ERROR;
605  s->o2n_col = (int *) MALLOC_DH (m * sizeof (int));
606  CHECK_V_ERROR;
607 
608  /* these are probably only needed for single mpi task -- ?? */
609  /* nope; beg_row and row_ct are needed by ilu_mpi_bj; yuck! */
610  s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int));
611  CHECK_V_ERROR;
612  s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int));
613  CHECK_V_ERROR;
614  s->row_count = (int *) MALLOC_DH (blocks * sizeof (int));
615  CHECK_V_ERROR;
616  s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int));
617  CHECK_V_ERROR;
618  s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int));
619  CHECK_V_ERROR;
620  s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int));
621  CHECK_V_ERROR;
622 
623 END_FUNC_DH}
624 
625 /*-----------------------------------------------------------------*/
626 
627 
628 #undef __FUNC__
629 #define __FUNC__ "init_mpi_private"
630 void
631 init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A)
632 {
633  START_FUNC_DH int m, n, beg_row;
634  bool symmetric;
635  double t1;
636 
637  symmetric = Parser_dhHasSwitch (parser_dh, "-sym");
638  CHECK_V_ERROR;
639  if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric"))
640  {
641  symmetric = true;
642  }
643 
644  /*-------------------------------------------------------
645  * get number of local rows (m), global rows (n), and
646  * global numbering of first locally owned row
647  *-------------------------------------------------------*/
648  EuclidGetDimensions (A, &beg_row, &m, &n);
649  CHECK_V_ERROR;
650  s->m = m;
651 
652 
653  /*-------------------------------------------------------
654  * allocate storage for all data structures
655  * EXCEPT s->adj and hash tables.
656  *-------------------------------------------------------*/
657  allocate_storage_private (s, blocks, m, bj);
658  CHECK_V_ERROR;
659 
660  /*-------------------------------------------------------------
661  * Fill in: beg_row[]
662  * beg_rowP[]
663  * row_count[]
664  * At this point, beg_rowP[] is a copy of beg_row[])
665  *-------------------------------------------------------------*/
666  if (!bj)
667  {
668  MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh);
669  MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh);
670  memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int));
671  }
672  else
673  {
674  s->beg_row[myid_dh] = beg_row;
675  s->beg_rowP[myid_dh] = beg_row;
676  s->row_count[myid_dh] = m;
677  }
678 
679  /*-----------------------------------------------------------------
680  * Find all neighboring processors in subdomain graph.
681  * This block fills in: allNabors[]
682  *-----------------------------------------------------------------*/
683  if (!bj)
684  {
685  t1 = MPI_Wtime ();
686  if (symmetric)
687  {
688  find_all_neighbors_sym_private (s, m, A);
689  CHECK_V_ERROR;
690  }
691  else
692  {
693  find_all_neighbors_unsym_private (s, m, A);
694  CHECK_V_ERROR;
695  }
696  s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1);
697  }
698 
699 
700  /*-----------------------------------------------------------------
701  * determine which rows are boundary rows, and which are interior
702  * rows; also, form permutation vector to order interior
703  * nodes first within each subdomain
704  * This block fills in: bdry_count[]
705  * n2o_col[]
706  * o2n_row[]
707  *-----------------------------------------------------------------*/
708  t1 = MPI_Wtime ();
709  if (!bj)
710  {
711  int *interiorNodes, *bdryNodes;
712  int interiorCount, bdryCount;
713  int *o2n = s->o2n_col, idx;
714  int i;
715 
716  interiorNodes = (int *) MALLOC_DH (m * sizeof (int));
717  CHECK_V_ERROR;
718  bdryNodes = (int *) MALLOC_DH (m * sizeof (int));
719  CHECK_V_ERROR;
720 
721  /* divide this subdomain's rows into interior and boundary rows;
722  the returned lists are with respect to local numbering.
723  */
724  if (symmetric)
725  {
726  find_bdry_nodes_sym_private (s, m, A,
727  interiorNodes, bdryNodes,
728  &interiorCount, &bdryCount);
729  CHECK_V_ERROR;
730  }
731  else
732  {
733  find_bdry_nodes_unsym_private (s, m, A,
734  interiorNodes, bdryNodes,
735  &interiorCount, &bdryCount);
736  CHECK_V_ERROR;
737  }
738 
739  /* exchange number of boundary rows with all neighbors */
740  MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT,
741  comm_dh);
742 
743  /* form local permutation */
744  idx = 0;
745  for (i = 0; i < interiorCount; ++i)
746  {
747  o2n[interiorNodes[i]] = idx++;
748  }
749  for (i = 0; i < bdryCount; ++i)
750  {
751  o2n[bdryNodes[i]] = idx++;
752  }
753 
754  /* invert permutation */
755  invert_perm (m, o2n, s->n2o_row);
756  CHECK_V_ERROR;
757 
758  FREE_DH (interiorNodes);
759  CHECK_V_ERROR;
760  FREE_DH (bdryNodes);
761  CHECK_V_ERROR;
762  }
763 
764  /* bj setup */
765  else
766  {
767  int *o2n = s->o2n_col, *n2o = s->n2o_row;
768  int i, m = s->m;
769 
770  for (i = 0; i < m; ++i)
771  {
772  o2n[i] = i;
773  n2o[i] = i;
774  }
775  }
776  s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1);
777 
778  /*-------------------------------------------------------
779  * Form subdomain graph,
780  * then color and reorder subdomain graph.
781  * This block fills in: ptr[]
782  * adj[]
783  * o2n_sub[]
784  * n2o_sub[]
785  * beg_rowP[]
786  *-------------------------------------------------------*/
787  if (!bj)
788  {
789  t1 = MPI_Wtime ();
790  form_subdomaingraph_mpi_private (s);
791  CHECK_V_ERROR;
792  if (s->doNotColor)
793  {
794  int i;
795  printf_dh ("subdomain coloring and reordering is OFF\n");
796  for (i = 0; i < blocks; ++i)
797  {
798  s->o2n_sub[i] = i;
799  s->n2o_sub[i] = i;
800  s->colorVec[i] = 0;
801  }
802  }
803  else
804  {
805  SET_INFO ("subdomain coloring and reordering is ON");
806  color_subdomain_graph_private (s);
807  CHECK_V_ERROR;
808  }
809  s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1);
810  }
811 
812  /*-------------------------------------------------------
813  * Build lists of lower and higher ordered neighbors.
814  * This block fills in: loNabors[]
815  * hiNabors[]
816  *-------------------------------------------------------*/
817  if (!bj)
818  {
819  find_ordered_neighbors_private (s);
820  CHECK_V_ERROR;
821  }
822 
823  /*-------------------------------------------------------
824  * Exchange boundary node permutation information with
825  * neighboring processors in the subdomain graph.
826  * This block fills in: o2n_ext (hash table)
827  * n2o_ext (hash table)
828  *-------------------------------------------------------*/
829  if (!bj)
830  {
831  t1 = MPI_Wtime ();
832  SubdomainGraph_dhExchangePerms (s);
833  CHECK_V_ERROR;
834  s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1);
835  }
836 
837 END_FUNC_DH}
838 
839 
840 
841 #undef __FUNC__
842 #define __FUNC__ "SubdomainGraph_dhExchangePerms"
843 void
844 SubdomainGraph_dhExchangePerms (SubdomainGraph_dh s)
845 {
846  START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL;
847  MPI_Status *status = NULL;
848  int *nabors = s->allNabors, naborCount = s->allCount;
849  int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz;
850  int m = s->row_count[myid_dh];
851  int beg_row = s->beg_row[myid_dh];
852  int beg_rowP = s->beg_rowP[myid_dh];
853  int *bdryNodeCounts = s->bdry_count;
854  int myBdryCount = s->bdry_count[myid_dh];
855  bool debug = false;
856  int myFirstBdry = m - myBdryCount;
857  int *n2o_row = s->n2o_row;
858  Hash_i_dh n2o_table, o2n_table;
859 
860  if (logFile != NULL && s->debug)
861  debug = true;
862 
863  /* allocate send buffer, and copy permutation info to buffer;
864  each entry is a <old_value, new_value> pair.
865  */
866  sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int));
867  CHECK_V_ERROR;
868 
869 
870  if (debug)
871  {
872  fprintf (logFile,
873  "\nSUBG myFirstBdry= %i myBdryCount= %i m= %i beg_rowP= %i\n",
874  1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP);
875  fflush (logFile);
876  }
877 
878  for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
879  {
880  sendBuf[2 * j] = n2o_row[i] + beg_row;
881  sendBuf[2 * j + 1] = i + beg_rowP;
882  }
883 
884  if (debug)
885  {
886  fprintf (logFile, "\nSUBG SEND_BUF:\n");
887  for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j)
888  {
889  fprintf (logFile, "SUBG %i, %i\n", 1 + sendBuf[2 * j],
890  1 + sendBuf[2 * j + 1]);
891  }
892  fflush (logFile);
893  }
894 
895  /* allocate a receive buffer for each nabor in the subdomain graph,
896  and set up index array for locating the beginning of each
897  nabor's buffers.
898  */
899  naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int));
900  CHECK_V_ERROR;
901  naborIdx[0] = 0;
902  nz = 0;
903  for (i = 0; i < naborCount; ++i)
904  {
905  nz += (2 * bdryNodeCounts[nabors[i]]);
906  naborIdx[i + 1] = nz;
907  }
908 
909 
910  recvBuf = (int *) MALLOC_DH (nz * sizeof (int));
911  CHECK_V_ERROR;
912 
913 
914 /* for (i=0; i<nz; ++i) recvBuf[i] = -10; */
915 
916  /* perform sends and receives */
917  recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
918  CHECK_V_ERROR;
919  send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request));
920  CHECK_V_ERROR;
921  status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status));
922  CHECK_V_ERROR;
923 
924  for (i = 0; i < naborCount; ++i)
925  {
926  int nabr = nabors[i];
927  int *buf = recvBuf + naborIdx[i];
928  int ct = 2 * bdryNodeCounts[nabr];
929 
930 
931  MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh,
932  &(send_req[i]));
933 
934  if (debug)
935  {
936  fprintf (logFile, "SUBG sending %i elts to %i\n", 2 * myBdryCount,
937  nabr);
938  fflush (logFile);
939  }
940 
941  MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i]));
942 
943  if (debug)
944  {
945  fprintf (logFile, "SUBG receiving %i elts from %i\n", ct, nabr);
946  fflush (logFile);
947  }
948  }
949 
950  MPI_Waitall (naborCount, send_req, status);
951  MPI_Waitall (naborCount, recv_req, status);
952 
953  Hash_i_dhCreate (&n2o_table, nz / 2);
954  CHECK_V_ERROR;
955  Hash_i_dhCreate (&o2n_table, nz / 2);
956  CHECK_V_ERROR;
957  s->n2o_ext = n2o_table;
958  s->o2n_ext = o2n_table;
959 
960  /* insert non-local boundary node permutations in lookup tables */
961  for (i = 0; i < nz; i += 2)
962  {
963  int old = recvBuf[i];
964  int new = recvBuf[i + 1];
965 
966  if (debug)
967  {
968  fprintf (logFile, "SUBG i= %i old= %i new= %i\n", i, old + 1,
969  new + 1);
970  fflush (logFile);
971  }
972 
973  Hash_i_dhInsert (o2n_table, old, new);
974  CHECK_V_ERROR;
975  Hash_i_dhInsert (n2o_table, new, old);
976  CHECK_V_ERROR;
977  }
978 
979 
980  if (recvBuf != NULL)
981  {
982  FREE_DH (recvBuf);
983  CHECK_V_ERROR;
984  }
985  if (naborIdx != NULL)
986  {
987  FREE_DH (naborIdx);
988  CHECK_V_ERROR;
989  }
990  if (sendBuf != NULL)
991  {
992  FREE_DH (sendBuf);
993  CHECK_V_ERROR;
994  }
995  if (recv_req != NULL)
996  {
997  FREE_DH (recv_req);
998  CHECK_V_ERROR;
999  }
1000  if (send_req != NULL)
1001  {
1002  FREE_DH (send_req);
1003  CHECK_V_ERROR;
1004  }
1005  if (status != NULL)
1006  {
1007  FREE_DH (status);
1008  CHECK_V_ERROR;
1009  }
1010 
1011 END_FUNC_DH}
1012 
1013 
1014 
1015 #undef __FUNC__
1016 #define __FUNC__ "form_subdomaingraph_mpi_private"
1017 void
1018 form_subdomaingraph_mpi_private (SubdomainGraph_dh s)
1019 {
1020  START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount;
1021  int *idxAll = NULL;
1022  int i, j, nz, *adj, *ptrs = s->ptrs;
1023  MPI_Request *recvReqs = NULL, sendReq;
1024  MPI_Status *statuses = NULL, status;
1025 
1026  /* all processors tell root how many nabors they have */
1027  if (myid_dh == 0)
1028  {
1029  idxAll = (int *) MALLOC_DH (np_dh * sizeof (int));
1030  CHECK_V_ERROR;
1031  }
1032  MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh);
1033 
1034  /* root counts edges in graph, and broacasts to all */
1035  if (myid_dh == 0)
1036  {
1037  nz = 0;
1038  for (i = 0; i < np_dh; ++i)
1039  nz += idxAll[i];
1040  }
1041  MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh);
1042 
1043  /* allocate space for adjacency lists (memory for the
1044  pointer array was previously allocated)
1045  */
1046  adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int));
1047  CHECK_V_ERROR;
1048 
1049  /* root receives adjacency lists from all processors */
1050  if (myid_dh == 0)
1051  {
1052  recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request));
1053  CHECK_V_ERROR;
1054  statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status));
1055  CHECK_V_ERROR;
1056 
1057  /* first, set up row pointer array */
1058  ptrs[0] = 0;
1059  for (j = 0; j < np_dh; ++j)
1060  ptrs[j + 1] = ptrs[j] + idxAll[j];
1061 
1062  /* second, start the receives */
1063  for (j = 0; j < np_dh; ++j)
1064  {
1065  int ct = idxAll[j];
1066 
1067  MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh,
1068  recvReqs + j);
1069  }
1070  }
1071 
1072  /* all processors send root their adjacency list */
1073  MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq);
1074 
1075  /* wait for comms to go through */
1076  if (myid_dh == 0)
1077  {
1078  MPI_Waitall (np_dh, recvReqs, statuses);
1079  }
1080  MPI_Wait (&sendReq, &status);
1081 
1082  /* root broadcasts assembled subdomain graph to all processors */
1083  MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh);
1084  MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh);
1085 
1086  if (idxAll != NULL)
1087  {
1088  FREE_DH (idxAll);
1089  CHECK_V_ERROR;
1090  }
1091  if (recvReqs != NULL)
1092  {
1093  FREE_DH (recvReqs);
1094  CHECK_V_ERROR;
1095  }
1096  if (statuses != NULL)
1097  {
1098  FREE_DH (statuses);
1099  CHECK_V_ERROR;
1100  }
1101 
1102 END_FUNC_DH}
1103 
1104 /* this is ugly and inefficient; but seq mode is primarily
1105  for debugging and testing, so there.
1106 */
1107 #undef __FUNC__
1108 #define __FUNC__ "form_subdomaingraph_seq_private"
1109 void
1110 form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, void *A)
1111 {
1112  START_FUNC_DH int *dense, i, j, row, blocks = s->blocks;
1113  int *cval, len, *adj;
1114  int idx = 0, *ptrs = s->ptrs;
1115 
1116  /* allocate storage for adj[]; since this function is intended
1117  for debugging/testing, and the number of blocks should be
1118  relatively small, we'll punt and allocate the maximum
1119  possibly needed.
1120  */
1121  adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
1122  CHECK_V_ERROR;
1123 
1124  dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int));
1125  CHECK_V_ERROR;
1126  for (i = 0; i < blocks * blocks; ++i)
1127  dense[i] = 0;
1128 
1129  /* loop over each block's rows to identify all boundary nodes */
1130  for (i = 0; i < blocks; ++i)
1131  {
1132  int beg_row = s->beg_row[i];
1133  int end_row = beg_row + s->row_count[i];
1134 
1135  for (row = beg_row; row < end_row; ++row)
1136  {
1137  EuclidGetRow (A, row, &len, &cval, NULL);
1138  CHECK_V_ERROR;
1139  for (j = 0; j < len; ++j)
1140  {
1141  int col = cval[j];
1142  if (col < beg_row || col >= end_row)
1143  {
1144  int owner = SubdomainGraph_dhFindOwner (s, col, false);
1145  CHECK_V_ERROR;
1146  dense[i * blocks + owner] = 1;
1147  dense[owner * blocks + i] = 1;
1148  }
1149  }
1150  EuclidRestoreRow (A, row, &len, &cval, NULL);
1151  CHECK_V_ERROR;
1152  }
1153  }
1154 
1155  /* form sparse csr representation of subdomain graph
1156  from dense representation
1157  */
1158  ptrs[0] = 0;
1159  for (i = 0; i < blocks; ++i)
1160  {
1161  for (j = 0; j < blocks; ++j)
1162  {
1163  if (dense[i * blocks + j])
1164  {
1165  adj[idx++] = j;
1166  }
1167  }
1168  ptrs[i + 1] = idx;
1169  }
1170 
1171  FREE_DH (dense);
1172  CHECK_V_ERROR;
1173 END_FUNC_DH}
1174 
1175 
1176 #undef __FUNC__
1177 #define __FUNC__ "find_all_neighbors_sym_private"
1178 void
1179 find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, void *A)
1180 {
1181  START_FUNC_DH int *marker, i, j, beg_row, end_row;
1182  int row, len, *cval, ct = 0;
1183  int *nabors = s->allNabors;
1184 
1185  marker = (int *) MALLOC_DH (m * sizeof (int));
1186  CHECK_V_ERROR;
1187  for (i = 0; i < m; ++i)
1188  marker[i] = 0;
1189 
1190  SET_INFO
1191  ("finding nabors in subdomain graph for structurally symmetric matrix");
1192  SET_INFO ("(if this isn't what you want, use '-sym 0' switch)");
1193 
1194  beg_row = s->beg_row[myid_dh];
1195  end_row = beg_row + s->row_count[myid_dh];
1196 
1197  for (row = beg_row; row < end_row; ++row)
1198  {
1199  EuclidGetRow (A, row, &len, &cval, NULL);
1200  CHECK_V_ERROR;
1201  for (j = 0; j < len; ++j)
1202  {
1203  int col = cval[j];
1204  if (col < beg_row || col >= end_row)
1205  {
1206  int owner = SubdomainGraph_dhFindOwner (s, col, false);
1207  CHECK_V_ERROR;
1208  if (!marker[owner])
1209  {
1210  marker[owner] = 1;
1211  nabors[ct++] = owner;
1212  }
1213  }
1214  }
1215  EuclidRestoreRow (A, row, &len, &cval, NULL);
1216  CHECK_V_ERROR;
1217  }
1218  s->allCount = ct;
1219 
1220 /* fprintf(logFile, "@@@@@ allCount= %i\n", ct); */
1221 
1222  if (marker != NULL)
1223  {
1224  FREE_DH (marker);
1225  CHECK_V_ERROR;
1226  }
1227 END_FUNC_DH}
1228 
1229 #undef __FUNC__
1230 #define __FUNC__ "find_all_neighbors_unsym_private"
1231 void
1232 find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, void *A)
1233 {
1234  START_FUNC_DH int i, j, row, beg_row, end_row;
1235  int *marker;
1236  int *cval, len, idx = 0;
1237  int nz, *nabors = s->allNabors, *myNabors;
1238 
1239  myNabors = (int *) MALLOC_DH (np_dh * sizeof (int));
1240  CHECK_V_ERROR;
1241  marker = (int *) MALLOC_DH (np_dh * sizeof (int));
1242  CHECK_V_ERROR;
1243  for (i = 0; i < np_dh; ++i)
1244  marker[i] = 0;
1245 
1246  SET_INFO
1247  ("finding nabors in subdomain graph for structurally unsymmetric matrix");
1248 
1249  /* loop over this block's boundary rows, finding all nabors in
1250  subdomain graph
1251  */
1252  beg_row = s->beg_row[myid_dh];
1253  end_row = beg_row + s->row_count[myid_dh];
1254 
1255 
1256 
1257  /*for each locally owned row ... */
1258  for (row = beg_row; row < end_row; ++row)
1259  {
1260  EuclidGetRow (A, row, &len, &cval, NULL);
1261  CHECK_V_ERROR;
1262  for (j = 0; j < len; ++j)
1263  {
1264  int col = cval[j];
1265  /*for each column that corresponds to a non-locally owned row ... */
1266  if (col < beg_row || col >= end_row)
1267  {
1268  int owner = SubdomainGraph_dhFindOwner (s, col, false);
1269  CHECK_V_ERROR;
1270  /*if I've not yet done so ... */
1271  if (!marker[owner])
1272  {
1273  marker[owner] = 1;
1274  /*append the non-local row's owner in to the list of my nabors
1275  in the subdomain graph */
1276  myNabors[idx++] = owner;
1277  }
1278  }
1279  }
1280  EuclidRestoreRow (A, row, &len, &cval, NULL);
1281  CHECK_V_ERROR;
1282  }
1283 
1284  /*
1285  at this point, idx = the number of my neighbors in the subdomain
1286  graph; equivalently, idx is the number of meaningfull slots in
1287  the myNabors array. -dah 1/31/06
1288  */
1289 
1290  /*
1291  at this point: marker[j] = 0 indicates that processor j is NOT my nabor
1292  marker[j] = 1 indicates that processor j IS my nabor
1293  however, there may be some nabors that can't be discovered in the above loop
1294  "//for each locally owned row;" this can happen if the matrix is
1295  structurally unsymmetric.
1296  -dah 1/31/06
1297  */
1298 
1299 /* fprintf(stderr, "[%i] marker: ", myid_dh);
1300 for (j=0; j<np_dh; j++) {
1301  fprintf(stderr, "[%i] (j=%d) %d\n", myid_dh, j, marker[j]);
1302 }
1303 fprintf(stderr, "\n");
1304 */
1305 
1306  /* find out who my neighbors are that I cannot discern locally */
1307  MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh);
1308  CHECK_V_ERROR;
1309 
1310  /* add in neighbors that I know about from scanning my adjacency lists */
1311  for (i = 0; i < idx; ++i)
1312  nabors[myNabors[i]] = 1;
1313 
1314  /* remove self from the adjacency list */
1315  nabors[myid_dh] = 0;
1316 
1317  /*
1318  at this point: marker[j] = 0 indicates that processor j is NOT my nabor
1319  marker[j] = 1 indicates that processor j IS my nabor
1320  and this is guaranteed to be complete.
1321  */
1322 
1323  /* form final list of neighboring processors */
1324  nz = 0;
1325  for (i = 0; i < np_dh; ++i)
1326  {
1327  if (nabors[i])
1328  myNabors[nz++] = i;
1329  }
1330  s->allCount = nz;
1331  memcpy (nabors, myNabors, nz * sizeof (int));
1332 
1333  if (marker != NULL)
1334  {
1335  FREE_DH (marker);
1336  CHECK_V_ERROR;
1337  }
1338  if (myNabors != NULL)
1339  {
1340  FREE_DH (myNabors);
1341  CHECK_V_ERROR;
1342  }
1343 END_FUNC_DH}
1344 
1345 /*================================================================*/
1346 
1347 #undef __FUNC__
1348 #define __FUNC__ "find_bdry_nodes_sym_private"
1349 void
1350 find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A,
1351  int *interiorNodes, int *bdryNodes,
1352  int *interiorCount, int *bdryCount)
1353 {
1354  START_FUNC_DH int beg_row = s->beg_row[myid_dh];
1355  int end_row = beg_row + s->row_count[myid_dh];
1356  int row, inCt = 0, bdCt = 0;
1357 
1358  int j;
1359  int *cval;
1360 
1361  /* determine if the row is a boundary row */
1362  for (row = beg_row; row < end_row; ++row)
1363  { /* for each row in the subdomain */
1364  bool isBdry = false;
1365  int len;
1366  EuclidGetRow (A, row, &len, &cval, NULL);
1367  CHECK_V_ERROR;
1368 
1369  for (j = 0; j < len; ++j)
1370  { /* for each column in the row */
1371  int col = cval[j];
1372  if (col < beg_row || col >= end_row)
1373  {
1374  isBdry = true;
1375  break;
1376  }
1377  }
1378  EuclidRestoreRow (A, row, &len, &cval, NULL);
1379  CHECK_V_ERROR;
1380 
1381  if (isBdry)
1382  {
1383  bdryNodes[bdCt++] = row - beg_row;
1384  }
1385  else
1386  {
1387  interiorNodes[inCt++] = row - beg_row;
1388  }
1389  }
1390 
1391  *interiorCount = inCt;
1392  *bdryCount = bdCt;
1393 
1394 END_FUNC_DH}
1395 
1396 #define BDRY_NODE_TAG 42
1397 
1398 #undef __FUNC__
1399 #define __FUNC__ "find_bdry_nodes_unsym_private"
1400 void
1401 find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, void *A,
1402  int *interiorNodes, int *boundaryNodes,
1403  int *interiorCount, int *bdryCount)
1404 {
1405  START_FUNC_DH int beg_row = s->beg_row[myid_dh];
1406  int end_row = beg_row + s->row_count[myid_dh];
1407  int i, j, row, max;
1408  int *cval;
1409  int *list, count;
1410  int *rpIN = NULL, *rpOUT = NULL;
1411  int *sendBuf, *recvBuf;
1412  int *marker, inCt, bdCt;
1413  int *bdryNodes, nz;
1414  int sendCt, recvCt;
1415  MPI_Request *sendReq, *recvReq;
1416  MPI_Status *status;
1417  SortedSet_dh ss;
1418 
1419  SortedSet_dhCreate (&ss, m);
1420  CHECK_V_ERROR;
1421 
1422  /*-----------------------------------------------------
1423  * identify all boundary nodes possible using locally
1424  * owned adjacency lists
1425  *-----------------------------------------------------*/
1426  for (row = beg_row; row < end_row; ++row)
1427  {
1428  bool isBdry = false;
1429  int len;
1430  EuclidGetRow (A, row, &len, &cval, NULL);
1431  CHECK_V_ERROR;
1432 
1433  for (j = 0; j < len; ++j)
1434  {
1435  int col = cval[j];
1436  if (col < beg_row || col >= end_row)
1437  {
1438  isBdry = true; /* this row is a boundary node */
1439  SortedSet_dhInsert (ss, col);
1440  CHECK_V_ERROR;
1441  /* the row "col" is also a boundary node */
1442  }
1443  }
1444  EuclidRestoreRow (A, row, &len, &cval, NULL);
1445  CHECK_V_ERROR;
1446 
1447  if (isBdry)
1448  {
1449  SortedSet_dhInsert (ss, row);
1450  CHECK_V_ERROR;
1451  }
1452  }
1453 
1454  /*-----------------------------------------------------
1455  * scan the sorted list to determine what boundary
1456  * node information to send to whom
1457  *-----------------------------------------------------*/
1458  sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
1459  CHECK_V_ERROR;
1460  recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int));
1461  CHECK_V_ERROR;
1462  rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
1463  CHECK_V_ERROR;
1464  rpOUT[0] = 0;
1465  for (i = 0; i < np_dh; ++i)
1466  sendBuf[i] = 0;
1467 
1468  sendCt = 0; /* total number of processor to whom we will send lists */
1469  SortedSet_dhGetList (ss, &list, &count);
1470  CHECK_V_ERROR;
1471 
1472  for (i = 0; i < count; /* i is set below */ )
1473  {
1474  int node = list[i];
1475  int owner;
1476  int last;
1477 
1478  owner = SubdomainGraph_dhFindOwner (s, node, false);
1479  CHECK_V_ERROR;
1480  last = s->beg_row[owner] + s->row_count[owner];
1481 
1482  /* determine the other boundary nodes that belong to owner */
1483  while ((i < count) && (list[i] < last))
1484  ++i;
1485  ++sendCt;
1486  rpOUT[sendCt] = i;
1487  sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1];
1488 
1489  }
1490 
1491  /*-----------------------------------------------------
1492  * processors tell each other how much information
1493  * each will send to whom
1494  *-----------------------------------------------------*/
1495  MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh);
1496  CHECK_V_ERROR;
1497 
1498  /*-----------------------------------------------------
1499  * exchange boundary node information
1500  * (note that we also exchange information with ourself!)
1501  *-----------------------------------------------------*/
1502 
1503  /* first, set up data structures to hold incoming information */
1504  rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int));
1505  CHECK_V_ERROR;
1506  rpIN[0] = 0;
1507  nz = 0;
1508  recvCt = 0;
1509  for (i = 0; i < np_dh; ++i)
1510  {
1511  if (recvBuf[i])
1512  {
1513  ++recvCt;
1514  nz += recvBuf[i];
1515  rpIN[recvCt] = nz;
1516  }
1517  }
1518  bdryNodes = (int *) MALLOC_DH (nz * sizeof (int));
1519  CHECK_V_ERROR;
1520  sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request));
1521  CHECK_V_ERROR;
1522  recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request));
1523  CHECK_V_ERROR;
1524  max = MAX (sendCt, recvCt);
1525  status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status));
1526  CHECK_V_ERROR;
1527 
1528  /* second, start receives for incoming data */
1529  j = 0;
1530  for (i = 0; i < np_dh; ++i)
1531  {
1532  if (recvBuf[i])
1533  {
1534  MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT,
1535  i, BDRY_NODE_TAG, comm_dh, recvReq + j);
1536  ++j;
1537  }
1538  }
1539 
1540  /* third, start sends for outgoing data */
1541  j = 0;
1542  for (i = 0; i < np_dh; ++i)
1543  {
1544  if (sendBuf[i])
1545  {
1546  MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT,
1547  i, BDRY_NODE_TAG, comm_dh, sendReq + j);
1548  ++j;
1549  }
1550  }
1551 
1552  /* fourth, wait for all comms to finish */
1553  MPI_Waitall (sendCt, sendReq, status);
1554  MPI_Waitall (recvCt, recvReq, status);
1555 
1556  /* fifth, convert from global to local indices */
1557  for (i = 0; i < nz; ++i)
1558  bdryNodes[i] -= beg_row;
1559 
1560  /*-----------------------------------------------------
1561  * consolidate information from all processors to
1562  * identify all local boundary nodes
1563  *-----------------------------------------------------*/
1564  marker = (int *) MALLOC_DH (m * sizeof (int));
1565  CHECK_V_ERROR;
1566  for (i = 0; i < m; ++i)
1567  marker[i] = 0;
1568  for (i = 0; i < nz; ++i)
1569  marker[bdryNodes[i]] = 1;
1570 
1571  inCt = bdCt = 0;
1572  for (i = 0; i < m; ++i)
1573  {
1574  if (marker[i])
1575  {
1576  boundaryNodes[bdCt++] = i;
1577  }
1578  else
1579  {
1580  interiorNodes[inCt++] = i;
1581  }
1582  }
1583  *interiorCount = inCt;
1584  *bdryCount = bdCt;
1585 
1586  /*-----------------------------------------------------
1587  * clean up
1588  *-----------------------------------------------------*/
1589  SortedSet_dhDestroy (ss);
1590  CHECK_V_ERROR;
1591  if (rpIN != NULL)
1592  {
1593  FREE_DH (rpIN);
1594  CHECK_V_ERROR;
1595  }
1596  if (rpOUT != NULL)
1597  {
1598  FREE_DH (rpOUT);
1599  CHECK_V_ERROR;
1600  }
1601  if (sendBuf != NULL)
1602  {
1603  FREE_DH (sendBuf);
1604  CHECK_V_ERROR;
1605  }
1606  if (recvBuf != NULL)
1607  {
1608  FREE_DH (recvBuf);
1609  CHECK_V_ERROR;
1610  }
1611  if (bdryNodes != NULL)
1612  {
1613  FREE_DH (bdryNodes);
1614  CHECK_V_ERROR;
1615  }
1616  if (marker != NULL)
1617  {
1618  FREE_DH (marker);
1619  CHECK_V_ERROR;
1620  }
1621  if (sendReq != NULL)
1622  {
1623  FREE_DH (sendReq);
1624  CHECK_V_ERROR;
1625  }
1626  if (recvReq != NULL)
1627  {
1628  FREE_DH (recvReq);
1629  CHECK_V_ERROR;
1630  }
1631  if (status != NULL)
1632  {
1633  FREE_DH (status);
1634  CHECK_V_ERROR;
1635  }
1636 END_FUNC_DH}
1637 
1638 
1639 #undef __FUNC__
1640 #define __FUNC__ "find_ordered_neighbors_private"
1641 void
1642 find_ordered_neighbors_private (SubdomainGraph_dh s)
1643 {
1644  START_FUNC_DH int *loNabors = s->loNabors;
1645  int *hiNabors = s->hiNabors;
1646  int *allNabors = s->allNabors, allCount = s->allCount;
1647  int loCt = 0, hiCt = 0;
1648  int *o2n = s->o2n_sub;
1649  int i, myNewId = o2n[myid_dh];
1650 
1651  for (i = 0; i < allCount; ++i)
1652  {
1653  int nabor = allNabors[i];
1654  if (o2n[nabor] < myNewId)
1655  {
1656  loNabors[loCt++] = nabor;
1657  }
1658  else
1659  {
1660  hiNabors[hiCt++] = nabor;
1661  }
1662  }
1663 
1664  s->loCount = loCt;
1665  s->hiCount = hiCt;
1666 END_FUNC_DH}
1667 
1668 
1669 #undef __FUNC__
1670 #define __FUNC__ "color_subdomain_graph_private"
1671 void
1672 color_subdomain_graph_private (SubdomainGraph_dh s)
1673 {
1674  START_FUNC_DH int i, n = np_dh;
1675  int *rp = s->ptrs, *cval = s->adj;
1676  int j, *marker, thisNodesColor, *colorCounter;
1677  int *o2n = s->o2n_sub;
1678  int *color = s->colorVec;
1679 
1680  if (np_dh == 1)
1681  n = s->blocks;
1682 
1683  marker = (int *) MALLOC_DH ((n + 1) * sizeof (int));
1684  colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int));
1685  for (i = 0; i <= n; ++i)
1686  {
1687  marker[i] = -1;
1688  colorCounter[i] = 0;
1689  }
1690 
1691  /*------------------------------------------------------------------
1692  * color the nodes
1693  *------------------------------------------------------------------*/
1694  for (i = 0; i < n; ++i)
1695  { /* color node "i" */
1696  /* mark colors of "i"s nabors as unavailable;
1697  only need to mark nabors that are (per the input ordering)
1698  numbered less than "i."
1699  */
1700  for (j = rp[i]; j < rp[i + 1]; ++j)
1701  {
1702  int nabor = cval[j];
1703  if (nabor < i)
1704  {
1705  int naborsColor = color[nabor];
1706  marker[naborsColor] = i;
1707  }
1708  }
1709 
1710  /* assign vertex i the "smallest" possible color */
1711  thisNodesColor = -1;
1712  for (j = 0; j < n; ++j)
1713  {
1714  if (marker[j] != i)
1715  {
1716  thisNodesColor = j;
1717  break;
1718  }
1719  }
1720  color[i] = thisNodesColor;
1721  colorCounter[1 + thisNodesColor] += 1;
1722  }
1723 
1724  /*------------------------------------------------------------------
1725  * build ordering vector; if two nodes are similarly colored,
1726  * they will have the same relative ordering as before.
1727  *------------------------------------------------------------------*/
1728  /* prefix-sum to find lowest-numbered node for each color */
1729  for (i = 1; i < n; ++i)
1730  {
1731  if (colorCounter[i] == 0)
1732  break;
1733  colorCounter[i] += colorCounter[i - 1];
1734  }
1735 
1736  for (i = 0; i < n; ++i)
1737  {
1738  o2n[i] = colorCounter[color[i]];
1739  colorCounter[color[i]] += 1;
1740  }
1741 
1742  /* invert permutation */
1743  invert_perm (n, s->o2n_sub, s->n2o_sub);
1744  CHECK_V_ERROR;
1745 
1746 
1747  /*------------------------------------------------------------------
1748  * count the number of colors used
1749  *------------------------------------------------------------------*/
1750  {
1751  int ct = 0;
1752  for (j = 0; j < n; ++j)
1753  {
1754  if (marker[j] == -1)
1755  break;
1756  ++ct;
1757  }
1758  s->colors = ct;
1759  }
1760 
1761 
1762  /*------------------------------------------------------------------
1763  * (re)build the beg_rowP array
1764  *------------------------------------------------------------------*/
1765  {
1766  int sum = 0;
1767  for (i = 0; i < n; ++i)
1768  {
1769  int old = s->n2o_sub[i];
1770  s->beg_rowP[old] = sum;
1771  sum += s->row_count[old];
1772  }
1773  }
1774 
1775  FREE_DH (marker);
1776  CHECK_V_ERROR;
1777  FREE_DH (colorCounter);
1778  CHECK_V_ERROR;
1779 END_FUNC_DH}
1780 
1781 
1782 #undef __FUNC__
1783 #define __FUNC__ "SubdomainGraph_dhDump"
1784 void
1785 SubdomainGraph_dhDump (SubdomainGraph_dh s, char *filename)
1786 {
1787  START_FUNC_DH int i;
1788  int sCt = np_dh;
1789  FILE *fp;
1790 
1791  if (np_dh == 1)
1792  sCt = s->blocks;
1793 
1794 
1795  /* ---------------------------------------------------------
1796  * for seq and par runs, 1st processor prints information
1797  * that is common to all processors
1798  * ---------------------------------------------------------*/
1799  fp = openFile_dh (filename, "w");
1800  CHECK_V_ERROR;
1801 
1802  /* write subdomain ordering permutations */
1803  fprintf (fp, "----- colors used\n");
1804  fprintf (fp, "%i\n", s->colors);
1805  if (s->colorVec == NULL)
1806  {
1807  fprintf (fp, "s->colorVec == NULL\n");
1808  }
1809  else
1810  {
1811  fprintf (fp, "----- colorVec\n");
1812  for (i = 0; i < sCt; ++i)
1813  {
1814  fprintf (fp, "%i ", s->colorVec[i]);
1815  }
1816  fprintf (fp, "\n");
1817  }
1818 
1819  if (s->o2n_sub == NULL || s->o2n_sub == NULL)
1820  {
1821  fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n");
1822  }
1823  else
1824  {
1825  fprintf (fp, "----- o2n_sub\n");
1826  for (i = 0; i < sCt; ++i)
1827  {
1828  fprintf (fp, "%i ", s->o2n_sub[i]);
1829  }
1830  fprintf (fp, "\n");
1831  fprintf (fp, "----- n2o_sub\n");
1832  for (i = 0; i < sCt; ++i)
1833  {
1834  fprintf (fp, "%i ", s->n2o_sub[i]);
1835  }
1836  fprintf (fp, "\n");
1837  }
1838 
1839  /* write begin row arrays */
1840  if (s->beg_row == NULL || s->beg_rowP == NULL)
1841  {
1842  fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n");
1843  }
1844  else
1845  {
1846  fprintf (fp, "----- beg_row\n");
1847  for (i = 0; i < sCt; ++i)
1848  {
1849  fprintf (fp, "%i ", 1 + s->beg_row[i]);
1850  }
1851  fprintf (fp, "\n");
1852  fprintf (fp, "----- beg_rowP\n");
1853  for (i = 0; i < sCt; ++i)
1854  {
1855  fprintf (fp, "%i ", 1 + s->beg_rowP[i]);
1856  }
1857  fprintf (fp, "\n");
1858  }
1859 
1860  /* write row count and bdry count arrays */
1861  if (s->row_count == NULL || s->bdry_count == NULL)
1862  {
1863  fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n");
1864  }
1865  else
1866  {
1867  fprintf (fp, "----- row_count\n");
1868  for (i = 0; i < sCt; ++i)
1869  {
1870  fprintf (fp, "%i ", s->row_count[i]);
1871  }
1872  fprintf (fp, "\n");
1873  fprintf (fp, "----- bdry_count\n");
1874  for (i = 0; i < sCt; ++i)
1875  {
1876  fprintf (fp, "%i ", s->bdry_count[i]);
1877  }
1878  fprintf (fp, "\n");
1879 
1880  }
1881 
1882  /* write subdomain graph */
1883  if (s->ptrs == NULL || s->adj == NULL)
1884  {
1885  fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n");
1886  }
1887  else
1888  {
1889  int j;
1890  int ct;
1891  fprintf (fp, "----- subdomain graph\n");
1892  for (i = 0; i < sCt; ++i)
1893  {
1894  fprintf (fp, "%i :: ", i);
1895  ct = s->ptrs[i + 1] - s->ptrs[i];
1896  if (ct)
1897  {
1898  shellSort_int (ct, s->adj + s->ptrs[i]);
1899  CHECK_V_ERROR;
1900  }
1901  for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
1902  {
1903  fprintf (fp, "%i ", s->adj[j]);
1904  }
1905  fprintf (fp, "\n");
1906  }
1907  }
1908  closeFile_dh (fp);
1909  CHECK_V_ERROR;
1910 
1911  /* ---------------------------------------------------------
1912  * next print info that differs across processors for par
1913  * trials. deal with this as two cases: seq and par
1914  * ---------------------------------------------------------*/
1915  if (s->beg_rowP == NULL)
1916  {
1917  SET_V_ERROR ("s->beg_rowP == NULL; can't continue");
1918  }
1919  if (s->row_count == NULL)
1920  {
1921  SET_V_ERROR ("s->row_count == NULL; can't continue");
1922  }
1923  if (s->o2n_sub == NULL)
1924  {
1925  SET_V_ERROR ("s->o2n_sub == NULL; can't continue");
1926  }
1927 
1928 
1929  if (np_dh == 1)
1930  {
1931  fp = openFile_dh (filename, "a");
1932  CHECK_V_ERROR;
1933 
1934  /* write n2o_row and o2n_col */
1935  if (s->n2o_row == NULL || s->o2n_col == NULL)
1936  {
1937  fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n");
1938  }
1939  else
1940  {
1941  fprintf (fp, "----- n2o_row\n");
1942  for (i = 0; i < s->m; ++i)
1943  {
1944  fprintf (fp, "%i ", 1 + s->n2o_row[i]);
1945  }
1946  fprintf (fp, "\n");
1947 
1948 #if 0
1949 /*
1950 note: this won't match the parallel case, since
1951  parallel permutation vecs are zero-based and purely
1952  local
1953 */
1954 
1955  fprintf (fp, "----- o2n_col\n");
1956  for (i = 0; i < sCt; ++i)
1957  {
1958  int br = s->beg_row[i];
1959  int er = br + s->row_count[i];
1960 
1961  for (j = br; j < er; ++j)
1962  {
1963  fprintf (fp, "%i ", 1 + s->o2n_col[j]);
1964  }
1965  fprintf (fp, "\n");
1966  }
1967  fprintf (fp, "\n");
1968 
1969 #endif
1970 
1971  }
1972  closeFile_dh (fp);
1973  CHECK_V_ERROR;
1974  }
1975 
1976  /* parallel case */
1977  else
1978  {
1979  int id = s->n2o_sub[myid_dh];
1980  int m = s->m;
1981  int pe;
1982  int beg_row = 0;
1983  if (s->beg_row != 0)
1984  beg_row = s->beg_row[myid_dh];
1985 
1986  /* write n2o_row */
1987  for (pe = 0; pe < np_dh; ++pe)
1988  {
1989  MPI_Barrier (comm_dh);
1990  if (id == pe)
1991  {
1992  fp = openFile_dh (filename, "a");
1993  CHECK_V_ERROR;
1994  if (id == 0)
1995  fprintf (fp, "----- n2o_row\n");
1996 
1997  for (i = 0; i < m; ++i)
1998  {
1999  fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row);
2000  }
2001  if (id == np_dh - 1)
2002  fprintf (fp, "\n");
2003  closeFile_dh (fp);
2004  CHECK_V_ERROR;
2005  }
2006  }
2007 
2008 #if 0
2009 
2010  /* write o2n_col */
2011  for (pe = 0; pe < np_dh; ++pe)
2012  {
2013  MPI_Barrier (comm_dh);
2014  if (myid_dh == pe)
2015  {
2016  fp = openFile_dh (filename, "a");
2017  CHECK_V_ERROR;
2018  if (myid_dh == 0)
2019  fprintf (fp, "----- o2n_col\n");
2020 
2021  for (i = 0; i < m; ++i)
2022  {
2023  fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row);
2024  }
2025  fprintf (fp, "\n");
2026 
2027  if (myid_dh == np_dh - 1)
2028  fprintf (fp, "\n");
2029 
2030  closeFile_dh (fp);
2031  CHECK_V_ERROR;
2032  }
2033  }
2034 
2035 #endif
2036 
2037  }
2038 
2039 END_FUNC_DH}
2040 
2041 
2042 
2043 #undef __FUNC__
2044 #define __FUNC__ "find_bdry_nodes_seq_private"
2045 void
2046 find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A)
2047 {
2048  START_FUNC_DH int i, j, row, blocks = s->blocks;
2049  int *cval, *tmp;
2050 
2051  tmp = (int *) MALLOC_DH (m * sizeof (int));
2052  CHECK_V_ERROR;
2053  for (i = 0; i < m; ++i)
2054  tmp[i] = 0;
2055 
2056  /*------------------------------------------
2057  * mark all boundary nodes
2058  *------------------------------------------ */
2059  for (i = 0; i < blocks; ++i)
2060  {
2061  int beg_row = s->beg_row[i];
2062  int end_row = beg_row + s->row_count[i];
2063 
2064  for (row = beg_row; row < end_row; ++row)
2065  {
2066  bool isBdry = false;
2067  int len;
2068  EuclidGetRow (A, row, &len, &cval, NULL);
2069  CHECK_V_ERROR;
2070 
2071  for (j = 0; j < len; ++j)
2072  { /* for each column in the row */
2073  int col = cval[j];
2074 
2075  if (col < beg_row || col >= end_row)
2076  {
2077  tmp[col] = 1;
2078  isBdry = true;
2079  }
2080  }
2081  if (isBdry)
2082  tmp[row] = 1;
2083  EuclidRestoreRow (A, row, &len, &cval, NULL);
2084  CHECK_V_ERROR;
2085  }
2086  }
2087 
2088  /*------------------------------------------
2089  * fill in the bdry_count[] array
2090  *------------------------------------------ */
2091  for (i = 0; i < blocks; ++i)
2092  {
2093  int beg_row = s->beg_row[i];
2094  int end_row = beg_row + s->row_count[i];
2095  int ct = 0;
2096  for (row = beg_row; row < end_row; ++row)
2097  {
2098  if (tmp[row])
2099  ++ct;
2100  }
2101  s->bdry_count[i] = ct;
2102  }
2103 
2104  /*------------------------------------------
2105  * form the o2n_col[] permutation
2106  *------------------------------------------ */
2107  for (i = 0; i < blocks; ++i)
2108  {
2109  int beg_row = s->beg_row[i];
2110  int end_row = beg_row + s->row_count[i];
2111  int interiorIDX = beg_row;
2112  int bdryIDX = end_row - s->bdry_count[i];
2113 
2114  for (row = beg_row; row < end_row; ++row)
2115  {
2116  if (tmp[row])
2117  {
2118  s->o2n_col[row] = bdryIDX++;
2119  }
2120  else
2121  {
2122  s->o2n_col[row] = interiorIDX++;
2123  }
2124  }
2125  }
2126 
2127  /* invert permutation */
2128  invert_perm (m, s->o2n_col, s->n2o_row);
2129  CHECK_V_ERROR;
2130  FREE_DH (tmp);
2131  CHECK_V_ERROR;
2132 
2133 END_FUNC_DH}
2134 
2135 #undef __FUNC__
2136 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph"
2137 void
2138 SubdomainGraph_dhPrintSubdomainGraph (SubdomainGraph_dh s, FILE * fp)
2139 {
2140  START_FUNC_DH if (myid_dh == 0)
2141  {
2142  int i, j;
2143 
2144  fprintf (fp,
2145  "\n-----------------------------------------------------\n");
2146  fprintf (fp, "SubdomainGraph, and coloring and ordering information\n");
2147  fprintf (fp, "-----------------------------------------------------\n");
2148  fprintf (fp, "colors used: %i\n", s->colors);
2149 
2150  fprintf (fp, "o2n ordering vector: ");
2151  for (i = 0; i < s->blocks; ++i)
2152  fprintf (fp, "%i ", s->o2n_sub[i]);
2153 
2154  fprintf (fp, "\ncoloring vector (node, color): \n");
2155  for (i = 0; i < s->blocks; ++i)
2156  fprintf (fp, " %i, %i\n", i, s->colorVec[i]);
2157 
2158  fprintf (fp, "\n");
2159  fprintf (fp, "Adjacency lists:\n");
2160 
2161  for (i = 0; i < s->blocks; ++i)
2162  {
2163  fprintf (fp, " P_%i :: ", i);
2164  for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j)
2165  {
2166  fprintf (fp, "%i ", s->adj[j]);
2167  }
2168  fprintf (fp, "\n");
2169  }
2170  fprintf (fp, "-----------------------------------------------------\n");
2171  }
2172 END_FUNC_DH}
2173 
2174 
2175 #undef __FUNC__
2176 #define __FUNC__ "adjust_matrix_perms_private"
2177 void
2178 adjust_matrix_perms_private (SubdomainGraph_dh s, int m)
2179 {
2180  START_FUNC_DH int i, j, blocks = s->blocks;
2181  int *o2n = s->o2n_col;
2182 
2183  for (i = 0; i < blocks; ++i)
2184  {
2185  int beg_row = s->beg_row[i];
2186  int end_row = beg_row + s->row_count[i];
2187  int adjust = s->beg_rowP[i] - s->beg_row[i];
2188  for (j = beg_row; j < end_row; ++j)
2189  o2n[j] += adjust;
2190  }
2191 
2192  invert_perm (m, s->o2n_col, s->n2o_row);
2193  CHECK_V_ERROR;
2194 END_FUNC_DH}
2195 
2196 #undef __FUNC__
2197 #define __FUNC__ "SubdomainGraph_dhPrintRatios"
2198 void
2199 SubdomainGraph_dhPrintRatios (SubdomainGraph_dh s, FILE * fp)
2200 {
2201  START_FUNC_DH int i;
2202  int blocks = np_dh;
2203  double ratio[25];
2204 
2205  if (myid_dh == 0)
2206  {
2207  if (np_dh == 1)
2208  blocks = s->blocks;
2209  if (blocks > 25)
2210  blocks = 25;
2211 
2212  fprintf (fp, "\n");
2213  fprintf (fp, "Subdomain interior/boundary node ratios\n");
2214  fprintf (fp, "---------------------------------------\n");
2215 
2216  /* compute ratios */
2217  for (i = 0; i < blocks; ++i)
2218  {
2219  if (s->bdry_count[i] == 0)
2220  {
2221  ratio[i] = -1;
2222  }
2223  else
2224  {
2225  ratio[i] =
2226  (double) (s->row_count[i] -
2227  s->bdry_count[i]) / (double) s->bdry_count[i];
2228  }
2229  }
2230 
2231  /* sort ratios */
2232  shellSort_float (blocks, ratio);
2233 
2234  /* print ratios */
2235  if (blocks <= 20)
2236  { /* print all ratios */
2237  int j = 0;
2238  for (i = 0; i < blocks; ++i)
2239  {
2240  fprintf (fp, "%0.2g ", ratio[i]);
2241  ++j;
2242  if (j == 10)
2243  {
2244  fprintf (fp, "\n");
2245  }
2246  }
2247  fprintf (fp, "\n");
2248  }
2249  else
2250  { /* print 10 largest and 10 smallest ratios */
2251  fprintf (fp, "10 smallest ratios: ");
2252  for (i = 0; i < 10; ++i)
2253  {
2254  fprintf (fp, "%0.2g ", ratio[i]);
2255  }
2256  fprintf (fp, "\n");
2257  fprintf (fp, "10 largest ratios: ");
2258  {
2259  int start = blocks - 6, stop = blocks - 1;
2260  for (i = start; i < stop; ++i)
2261  {
2262  fprintf (fp, "%0.2g ", ratio[i]);
2263  }
2264  fprintf (fp, "\n");
2265  }
2266  }
2267  }
2268 
2269 END_FUNC_DH}
2270 
2271 
2272 #undef __FUNC__
2273 #define __FUNC__ "SubdomainGraph_dhPrintStats"
2274 void
2275 SubdomainGraph_dhPrintStats (SubdomainGraph_dh sg, FILE * fp)
2276 {
2277  START_FUNC_DH double *timing = sg->timing;
2278 
2279  fprintf_dh (fp, "\nSubdomainGraph timing report\n");
2280  fprintf_dh (fp, "-----------------------------\n");
2281  fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]);
2282  fprintf_dh (fp, " find neighbors in subdomain graph: %0.2f\n",
2283  timing[FIND_NABORS_SGT]);
2284  fprintf_dh (fp, " locally order interiors and bdry: %0.2f\n",
2285  timing[ORDER_BDRY_SGT]);
2286  fprintf_dh (fp, " form and color subdomain graph: %0.2f\n",
2287  timing[FORM_GRAPH_SGT]);
2288  fprintf_dh (fp, " exchange bdry permutations: %0.2f\n",
2289  timing[EXCHANGE_PERMS_SGT]);
2290  fprintf_dh (fp, " everything else (should be small): %0.2f\n",
2291  timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] +
2292  timing[ORDER_BDRY_SGT] +
2293  timing[FORM_GRAPH_SGT] +
2294  timing[EXCHANGE_PERMS_SGT]));
2295 END_FUNC_DH}