IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
Ifpack_OverlappingRowMatrix.cpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Ifpack_ConfigDefs.h"
44 #include "Ifpack_OverlappingRowMatrix.h"
45 #include "Epetra_RowMatrix.h"
46 #include "Epetra_Map.h"
47 #include "Epetra_CrsMatrix.h"
48 #include "Epetra_Comm.h"
49 #include "Epetra_MultiVector.h"
50 
51 #ifdef HAVE_IFPACK_PARALLEL_SUBDOMAIN_SOLVERS
52 #include "Epetra_IntVector.h"
53 #include "Epetra_MpiComm.h"
54 #include "Teuchos_Hashtable.hpp"
55 #include "Teuchos_Array.hpp"
56 #include "EpetraExt_OperatorOut.h"
57 #include "EpetraExt_BlockMapOut.h"
58 #else
59 # ifdef IFPACK_NODE_AWARE_CODE
60 # include "Epetra_IntVector.h"
61 # include "Epetra_MpiComm.h"
62 # include "Teuchos_Hashtable.hpp"
63 # include "Teuchos_Array.hpp"
64 # include "EpetraExt_OperatorOut.h"
65 # include "EpetraExt_BlockMapOut.h"
66  extern int ML_NODE_ID;
67  int IFPACK_NODE_ID;
68 # endif
69 #endif
70 
71 using namespace Teuchos;
72 
73 // ======================================================================
74 
75 template<typename int_type>
76 void Ifpack_OverlappingRowMatrix::BuildMap(int OverlapLevel_in)
77 {
78  RCP<Epetra_Map> TmpMap;
79  RCP<Epetra_CrsMatrix> TmpMatrix;
80  RCP<Epetra_Import> TmpImporter;
81 
82  // importing rows corresponding to elements that are
83  // in ColMap, but not in RowMap
84  const Epetra_Map *RowMap;
85  const Epetra_Map *ColMap;
86 
87  std::vector<int_type> ExtElements;
88 
89  for (int overlap = 0 ; overlap < OverlapLevel_in ; ++overlap) {
90  if (TmpMatrix != Teuchos::null) {
91  RowMap = &(TmpMatrix->RowMatrixRowMap());
92  ColMap = &(TmpMatrix->RowMatrixColMap());
93  }
94  else {
95  RowMap = &(A().RowMatrixRowMap());
96  ColMap = &(A().RowMatrixColMap());
97  }
98 
99  int size = ColMap->NumMyElements() - RowMap->NumMyElements();
100  std::vector<int_type> list(size);
101 
102  int count = 0;
103 
104  // define the set of rows that are in ColMap but not in RowMap
105  for (int i = 0 ; i < ColMap->NumMyElements() ; ++i) {
106  int_type GID = (int_type) ColMap->GID64(i);
107  if (A().RowMatrixRowMap().LID(GID) == -1) {
108  typename std::vector<int_type>::iterator pos
109  = find(ExtElements.begin(),ExtElements.end(),GID);
110  if (pos == ExtElements.end()) {
111  ExtElements.push_back(GID);
112  list[count] = GID;
113  ++count;
114  }
115  }
116  }
117 
118  const int_type *listptr = NULL;
119  if ( ! list.empty() ) listptr = &list[0];
120  TmpMap = rcp( new Epetra_Map(-1,count, listptr,0,Comm()) );
121 
122  TmpMatrix = rcp( new Epetra_CrsMatrix(Copy,*TmpMap,0) );
123 
124  TmpImporter = rcp( new Epetra_Import(*TmpMap,A().RowMatrixRowMap()) );
125 
126  TmpMatrix->Import(A(),*TmpImporter,Insert);
127  TmpMatrix->FillComplete(A().OperatorDomainMap(),*TmpMap);
128 
129  }
130 
131  // build the map containing all the nodes (original
132  // matrix + extended matrix)
133  std::vector<int_type> list(NumMyRowsA_ + ExtElements.size());
134  for (int i = 0 ; i < NumMyRowsA_ ; ++i)
135  list[i] = (int_type) A().RowMatrixRowMap().GID64(i);
136  for (int i = 0 ; i < (int)ExtElements.size() ; ++i)
137  list[i + NumMyRowsA_] = ExtElements[i];
138 
139  const int_type *listptr = NULL;
140  if ( ! list.empty() ) listptr = &list[0];
141  {
142  Map_ = rcp( new Epetra_Map((int_type) -1, NumMyRowsA_ + ExtElements.size(),
143  listptr, 0, Comm()) );
144  }
145 #ifdef HAVE_IFPACK_PARALLEL_SUBDOMAIN_SOLVERS
146  colMap_ = &*Map_;
147 #else
148 # ifdef IFPACK_NODE_AWARE_CODE
149  colMap_ = &*Map_;
150 # endif
151 #endif
152  // now build the map corresponding to all the external nodes
153  // (with respect to A().RowMatrixRowMap().
154  {
155  const int_type * extelsptr = NULL;
156  if ( ! ExtElements.empty() ) extelsptr = &ExtElements[0];
157  ExtMap_ = rcp( new Epetra_Map((int_type) -1,ExtElements.size(),
158  extelsptr,0,A().Comm()) );
159  }
160 }
161 
162 // ======================================================================
163 // Constructor for the case of one core per subdomain
164 Ifpack_OverlappingRowMatrix::
165 Ifpack_OverlappingRowMatrix(const RCP<const Epetra_RowMatrix>& Matrix_in,
166  int OverlapLevel_in) :
167  Matrix_(Matrix_in),
168  OverlapLevel_(OverlapLevel_in)
169 {
170  // should not be here if no overlap
171  if (OverlapLevel_in == 0)
172  IFPACK_CHK_ERRV(-1);
173 
174  // nothing to do as well with one process
175  if (Comm().NumProc() == 1)
176  IFPACK_CHK_ERRV(-1);
177 
178  NumMyRowsA_ = A().NumMyRows();
179 
180  // construct the external matrix
181 
182 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
183  if(A().RowMatrixRowMap().GlobalIndicesInt()) {
184  BuildMap<int>(OverlapLevel_in);
185  }
186  else
187 #endif
188 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
189  if(A().RowMatrixRowMap().GlobalIndicesLongLong()) {
190  BuildMap<long long>(OverlapLevel_in);
191  }
192  else
193 #endif
194  throw "Ifpack_OverlappingRowMatrix::Ifpack_OverlappingRowMatrix: GlobalIndices type unknown";
195 
196  ExtMatrix_ = rcp( new Epetra_CrsMatrix(Copy,*ExtMap_,*Map_,0) );
197 
198  ExtImporter_ = rcp( new Epetra_Import(*ExtMap_,A().RowMatrixRowMap()) );
199  ExtMatrix_->Import(A(),*ExtImporter_,Insert);
200  ExtMatrix_->FillComplete(A().OperatorDomainMap(),*Map_);
201 
202  Importer_ = rcp( new Epetra_Import(*Map_,A().RowMatrixRowMap()) );
203 
204  // fix indices for overlapping matrix
205  NumMyRowsB_ = B().NumMyRows();
206  NumMyRows_ = NumMyRowsA_ + NumMyRowsB_;
207  NumMyCols_ = NumMyRows_;
208 
209  NumMyDiagonals_ = A().NumMyDiagonals() + B().NumMyDiagonals();
210 
211  NumMyNonzeros_ = A().NumMyNonzeros() + B().NumMyNonzeros();
212  long long NumMyNonzeros_tmp = NumMyNonzeros_;
213  Comm().SumAll(&NumMyNonzeros_tmp,&NumGlobalNonzeros_,1);
214  MaxNumEntries_ = A().MaxNumEntries();
215 
216  if (MaxNumEntries_ < B().MaxNumEntries())
217  MaxNumEntries_ = B().MaxNumEntries();
218 
219 }
220 
221 #ifdef HAVE_IFPACK_PARALLEL_SUBDOMAIN_SOLVERS
222 
223 // ======================================================================
224 // Constructor for the case of two or more cores per subdomain
225 Ifpack_OverlappingRowMatrix::
226 Ifpack_OverlappingRowMatrix(const RCP<const Epetra_RowMatrix>& Matrix_in,
227  int OverlapLevel_in, int subdomainID) :
228  Matrix_(Matrix_in),
229  OverlapLevel_(OverlapLevel_in)
230 {
231 
232  //FIXME timing
233 #ifdef IFPACK_OVA_TIME_BUILD
234  double t0 = MPI_Wtime();
235  double t1, true_t0=t0;
236 #endif
237  //FIXME timing
238 
239  const int votesMax = INT_MAX;
240 
241  // should not be here if no overlap
242  if (OverlapLevel_in == 0)
243  IFPACK_CHK_ERRV(-1);
244 
245  // nothing to do as well with one process
246  if (Comm().NumProc() == 1)
247  IFPACK_CHK_ERRV(-1);
248 
249  // subdomainID is the node (aka socket) number, and so is system dependent
250  // nodeComm is the communicator for all the processes on a particular node
251  // these processes will have the same subdomainID.
252 # ifdef HAVE_MPI
253  const Epetra_MpiComm *pComm = dynamic_cast<const Epetra_MpiComm*>( &Comm() );
254  assert(pComm != NULL);
255  MPI_Comm nodeMPIComm;
256  MPI_Comm_split(pComm->Comm(),subdomainID,pComm->MyPID(),&nodeMPIComm);
257  Epetra_MpiComm *nodeComm = new Epetra_MpiComm(nodeMPIComm);
258 # else
259  Epetra_SerialComm *nodeComm = dynamic_cast<Epetra_MpiComm*>( &(Matrix_in->RowMatrixRowMap().Comm()) );
260 # endif
261 
262  NumMyRowsA_ = A().NumMyRows();
263 
264  RCP<Epetra_Map> TmpMap;
265  RCP<Epetra_CrsMatrix> TmpMatrix;
266  RCP<Epetra_Import> TmpImporter;
267 
268  // importing rows corresponding to elements that are
269  // in ColMap, but not in RowMap
270  const Epetra_Map *RowMap;
271  const Epetra_Map *ColMap;
272  const Epetra_Map *DomainMap;
273 
274  // TODO Count #connections from nodes I own to each ghost node
275 
276 
277  //FIXME timing
278 #ifdef IFPACK_OVA_TIME_BUILD
279  t1 = MPI_Wtime();
280  fprintf(stderr,"[node %d]: time for initialization %2.3e\n", subdomainID, t1-t0);
281  t0=t1;
282 #endif
283  //FIXME timing
284 
285 #ifdef IFPACK_OVA_TIME_BUILD
286  t1 = MPI_Wtime();
287  fprintf(stderr,"[node %d]: overlap hash table allocs %2.3e\n", subdomainID, t1-t0);
288  t0=t1;
289 #endif
290 
291  Teuchos::Hashtable<int,int> colMapTable(3 * A().RowMatrixColMap().NumMyElements() );
292 
293  // ghostTable holds off-node GIDs that are connected to on-node rows and can potentially be this PID's overlap
294  // TODO hopefully 3 times the # column entries is a reasonable table size
295  Teuchos::Hashtable<int,int> ghostTable(3 * A().RowMatrixColMap().NumMyElements() );
296 
297  /* ** ************************************************************************** ** */
298  /* ** ********************** start of main overlap loop ************************ ** */
299  /* ** ************************************************************************** ** */
300  for (int overlap = 0 ; overlap < OverlapLevel_in ; ++overlap)
301  {
302  // nbTable holds node buddy GIDs that are connected to current PID's rows, i.e., GIDs that should be in the overlapped
303  // matrix's column map
304 
305  if (TmpMatrix != Teuchos::null) {
306  RowMap = &(TmpMatrix->RowMatrixRowMap());
307  ColMap = &(TmpMatrix->RowMatrixColMap());
308  DomainMap = &(TmpMatrix->OperatorDomainMap());
309  }
310  else {
311  RowMap = &(A().RowMatrixRowMap());
312  ColMap = &(A().RowMatrixColMap());
313  DomainMap = &(A().OperatorDomainMap());
314  }
315 
316 #ifdef IFPACK_OVA_TIME_BUILD
317  t1 = MPI_Wtime();
318  fprintf(stderr,"[node %d]: overlap pointer pulls %2.3e\n", subdomainID, t1-t0);
319  t0=t1;
320 #endif
321 
322  // For each column ID, determine the owning node (as opposed to core)
323  // ID of the corresponding row.
324  Epetra_IntVector colIdList( *ColMap );
325  Epetra_IntVector rowIdList(*DomainMap);
326  rowIdList.PutValue(subdomainID);
327  Teuchos::RCP<Epetra_Import> nodeIdImporter = rcp(new Epetra_Import( *ColMap, *DomainMap ));
328 
329 #ifdef IFPACK_OVA_TIME_BUILD
330  t1 = MPI_Wtime();
331  fprintf(stderr,"[node %d]: overlap intvector/importer allocs %2.3e\n", subdomainID, t1-t0);
332  t0=t1;
333 #endif
334 
335  colIdList.Import(rowIdList,*nodeIdImporter,Insert);
336 
337  int size = ColMap->NumMyElements() - RowMap->NumMyElements();
338  int count = 0;
339 
340 #ifdef IFPACK_OVA_TIME_BUILD
341  t1 = MPI_Wtime();
342  fprintf(stderr,"[node %d]: overlap col/row ID imports %2.3e\n", subdomainID, t1-t0);
343  t0=t1;
344 #endif
345 
346 
347  // define the set of off-node rows that are in ColMap but not in RowMap
348  // This naturally does not include off-core rows that are on the same node as me, i.e., node buddy rows.
349  for (int i = 0 ; i < ColMap->NumMyElements() ; ++i) {
350  int GID = ColMap->GID(i);
351  int myvotes=0;
352  if (ghostTable.containsKey(GID)) myvotes = ghostTable.get(GID);
353  if ( colIdList[i] != subdomainID && myvotes < votesMax) // don't include anybody found in a previous round
354  {
355  int votes;
356  if (ghostTable.containsKey(GID)) {
357  votes = ghostTable.get(GID);
358  votes++;
359  ghostTable.put(GID,votes);
360  } else {
361  ghostTable.put(GID,1);
362  }
363  }
364  } //for (int i = 0 ; i < ColMap->NumMyElements() ; ++i)
365 
366  Teuchos::Array<int> gidsarray,votesarray;
367  ghostTable.arrayify(gidsarray,votesarray);
368  int *gids = gidsarray.getRawPtr();
369  int *votes = votesarray.getRawPtr();
370 
371  /*
372  This next bit of code decides which node buddy (NB) gets which ghost points. Everyone sends their
373  list of ghost points to pid 0 of the local subcommunicator. Pid 0 decides who gets what:
374 
375  - if a ghost point is touched by only one NB, that NB gets the ghost point
376  - if two or more NBs share a ghost point, whichever NB has the most connections to the ghost
377  point gets it.
378  */
379 
380 # ifdef HAVE_MPI //FIXME What if we build in serial?! This file will likely not compile.
381  int *cullList;
382  int ncull;
383  //int mypid = nodeComm->MyPID();
384 
385  //FIXME timing
386 #ifdef IFPACK_OVA_TIME_BUILD
387  t1 = MPI_Wtime();
388  fprintf(stderr,"[node %d]: overlap before culling %2.3e\n", subdomainID, t1-t0);
389  t0=t1;
390 #endif
391  //FIXME timing
392 
393 
394  if (nodeComm->MyPID() == 0)
395  {
396  // Figure out how much pid 0 is to receive
397  MPI_Status status;
398  int *lengths = new int[nodeComm->NumProc()+1];
399  lengths[0] = 0;
400  lengths[1] = ghostTable.size();
401  for (int i=1; i<nodeComm->NumProc(); i++) {
402  int leng;
403  MPI_Recv( &leng, 1, MPI_INT, i, MPI_ANY_TAG, nodeComm->Comm(), &status);
404  lengths[i+1] = lengths[i] + leng;
405  }
406  int total = lengths[nodeComm->NumProc()];
407 
408  int* ghosts = new int[total];
409  for (int i=0; i<total; i++) ghosts[i] = -9;
410  int *round = new int[total];
411  int *owningpid = new int[total];
412 
413  for (int i=1; i<nodeComm->NumProc(); i++) {
414  int count = lengths[i+1] - lengths[i];
415  MPI_Recv( ghosts+lengths[i], count, MPI_INT, i, MPI_ANY_TAG, nodeComm->Comm(), &status);
416  MPI_Recv( round+lengths[i], count, MPI_INT, i, MPI_ANY_TAG, nodeComm->Comm(), &status);
417  }
418 
419  // put in pid 0's info
420  for (int i=0; i<lengths[1]; i++) {
421  ghosts[i] = gids[i];
422  round[i] = votes[i];
423  owningpid[i] = 0;
424  }
425 
426  // put in the pid associated with each ghost
427  for (int j=1; j<nodeComm->NumProc(); j++) {
428  for (int i=lengths[j]; i<lengths[j+1]; i++) {
429  owningpid[i] = j;
430  }
431  }
432 
433  // sort everything based on the ghost gids
434  int* roundpid[2];
435  roundpid[0] = round; roundpid[1] = owningpid;
436  Epetra_Util epetraUtil;
437  epetraUtil.Sort(true,total,ghosts,0,0,2,roundpid);
438 
439  //set up arrays that get sent back to node buddies and that tell them what ghosts to cull
440  int *nlosers = new int[nodeComm->NumProc()];
441  int** losers = new int*[nodeComm->NumProc()];
442  for (int i=0; i<nodeComm->NumProc(); i++) {
443  nlosers[i] = 0;
444  losers[i] = new int[ lengths[i+1]-lengths[i] ];
445  }
446 
447  // Walk through ghosts array and and for each sequence of duplicate ghost GIDs, choose just one NB to keep it.
448  // The logic is pretty simple. The ghost list is sorted, so all duplicate PIDs are together.
449  // The list is traversed. As duplicates are found, node pid 0 keeps track of the current "winning"
450  // pid. When a pid is determined to have "lost" (less votes/connections to the current GID), the
451  // GID is added to that pid's list of GIDs to be culled. At the end of the repeated sequence, we have
452  // a winner, and other NBs know whether they need to delete it from their import list.
453  int max=0; //for duplicated ghosts, index of pid with most votes and who hence keeps the ghost.
454  // TODO to break ties randomly
455 
456  for (int i=1; i<total; i++) {
457  if (ghosts[i] == ghosts[i-1]) {
458  int idx = i; // pid associated with idx is current "loser"
459  if (round[i] > round[max]) {
460  idx = max;
461  max=i;
462  }
463  int j = owningpid[idx];
464  losers[j][nlosers[j]++] = ghosts[idx];
465  } else {
466  max=i;
467  }
468  } //for (int i=1; i<total; i++)
469 
470  delete [] round;
471  delete [] ghosts;
472  delete [] owningpid;
473 
474  // send the arrays of ghost GIDs to be culled back to the respective node buddies
475  for (int i=1; i<nodeComm->NumProc(); i++) {
476  MPI_Send( nlosers+i, 1, MPI_INT, i, 8675, nodeComm->Comm());
477  MPI_Send( losers[i], nlosers[i], MPI_INT, i, 8675, nodeComm->Comm());
478  }
479 
480  //FIXME Unnecessary memory allocation and copying, but makes culling code cleaner
481  //Could we stick this info into gids and votes, since neither is being used anymore?
482  //TODO Instead of using "losers" arrays, just use "cullList" as in the else clause
483  ncull = nlosers[0];
484  cullList = new int[ncull+1];
485  for (int i=0; i<nlosers[0]; i++)
486  cullList[i] = losers[0][i];
487 
488  for (int i=0; i<nodeComm->NumProc(); i++)
489  delete [] losers[i];
490 
491  delete [] lengths;
492  delete [] losers;
493  delete [] nlosers;
494 
495  } else { //everyone but pid 0
496 
497  // send to node pid 0 all ghosts that this pid could potentially import
498  int hashsize = ghostTable.size();
499  MPI_Send( &hashsize, 1, MPI_INT, 0, 8675, nodeComm->Comm());
500  MPI_Send( gids, hashsize, MPI_INT, 0, 8675, nodeComm->Comm());
501  MPI_Send( votes, hashsize, MPI_INT, 0, 8675, nodeComm->Comm());
502 
503  // receive the ghost GIDs that should not be imported (subset of the list sent off just above)
504  MPI_Status status;
505  MPI_Recv( &ncull, 1, MPI_INT, 0, 8675, nodeComm->Comm(), &status);
506  cullList = new int[ncull+1];
507  MPI_Recv( cullList, ncull, MPI_INT, 0, 8675, nodeComm->Comm(), &status);
508  }
509 
510 
511  //TODO clean out hash table after each time through overlap loop 4/1/07 JJH done moved both hash tables to local scope
512 
513  // Remove from my row map all off-node ghosts that will be imported by a node buddy.
514  for (int i=0; i<ncull; i++) {
515  try{ghostTable.remove(cullList[i]);}
516 
517  catch(...) {
518  printf("pid %d: In OverlappingRowMatrix ctr, problem removing ghost elt %d from ghostTable\n",
519  Comm().MyPID(),cullList[i]);
520  fflush(stdout);
521  }
522  }//for
523 
524  delete [] cullList;
525 
526  // Save off the remaining ghost GIDs from the current overlap round.
527  // These are off-node GIDs (rows) that I will import.
528  gidsarray.clear(); votesarray.clear();
529  ghostTable.arrayify(gidsarray,votesarray);
530 
531  std::vector<int> list(size);
532  count=0;
533  for (int i=0; i<ghostTable.size(); i++) {
534  // if votesarray[i] == votesmax, then this GID was found during a previous overlap round
535  if (votesarray[i] < votesMax) {
536  list[count] = gidsarray[i];
537  ghostTable.put(gidsarray[i],votesMax); //set this GID's vote to the maximum so that this PID alway wins in any subsequent overlap tiebreaking.
538  count++;
539  }
540  }
541 
542  //FIXME timing
543 #ifdef IFPACK_OVA_TIME_BUILD
544  t1 = MPI_Wtime();
545  fprintf(stderr,"[node %d]: overlap duplicate removal %2.3e\n", subdomainID, t1-t0);
546  t0=t1;
547 #endif
548  //FIXME timing
549 
550 # endif //ifdef HAVE_MPI
551 
552  TmpMap = rcp( new Epetra_Map(-1,count, &list[0],0,Comm()) );
553 
554  TmpMatrix = rcp( new Epetra_CrsMatrix(Copy,*TmpMap,0) );
555 
556  TmpImporter = rcp( new Epetra_Import(*TmpMap,A().RowMatrixRowMap()) );
557 
558  TmpMatrix->Import(A(),*TmpImporter,Insert);
559  TmpMatrix->FillComplete(A().OperatorDomainMap(),*TmpMap);
560 
561  //FIXME timing
562 #ifdef IFPACK_OVA_TIME_BUILD
563  t1 = MPI_Wtime();
564  fprintf(stderr,"[node %d]: overlap TmpMatrix fillcomplete %2.3e\n", subdomainID, t1-t0);
565  t0=t1;
566 #endif
567  //FIXME timing
568 
569 
570  // These next two imports get the GIDs that need to go into the column map of the overlapped matrix.
571 
572  // For each column ID in the overlap, determine the owning node (as opposed to core)
573  // ID of the corresponding row. Save those column IDs whose owning node is the current one.
574  // This should get all the imported ghost GIDs.
575  Epetra_IntVector ov_colIdList( TmpMatrix->ColMap() );
576  ov_colIdList.PutValue(-1);
577  Epetra_IntVector ov_rowIdList( TmpMatrix->RowMap() );
578  ov_rowIdList.PutValue(subdomainID);
579  Teuchos::RCP<Epetra_Import> ov_nodeIdImporter = rcp(new Epetra_Import( TmpMatrix->ColMap(), TmpMatrix->RowMap()));
580  ov_colIdList.Import(ov_rowIdList,*ov_nodeIdImporter,Insert);
581 
582  for (int i=0 ; i<ov_colIdList.MyLength(); i++) {
583  if (ov_colIdList[i] == subdomainID) {
584  int GID = (ov_colIdList.Map()).GID(i);
585  colMapTable.put(GID,1);
586  }
587  }
588 
589  // Do a second import of the owning node ID from A's rowmap to TmpMat's column map. This ensures that
590  // all GIDs that belong to a node buddy and are in a ghost row's sparsity pattern will be in the final
591  // overlapped matrix's column map.
592  ov_colIdList.PutValue(-1);
593  Epetra_IntVector ArowIdList( A().RowMatrixRowMap() );
594  ArowIdList.PutValue(subdomainID);
595  nodeIdImporter = rcp(new Epetra_Import( TmpMatrix->ColMap(), A().RowMatrixRowMap() ));
596  ov_colIdList.Import(ArowIdList,*nodeIdImporter,Insert);
597 
598  for (int i=0 ; i<ov_colIdList.MyLength(); i++) {
599  if (ov_colIdList[i] == subdomainID) {
600  int GID = (ov_colIdList.Map()).GID(i);
601  colMapTable.put(GID,1);
602  }
603  }
604 
605  //FIXME timing
606 #ifdef IFPACK_OVA_TIME_BUILD
607  t1 = MPI_Wtime();
608  fprintf(stderr,"[node %d]: overlap 2 imports to fix up colmap %2.3e\n", subdomainID, t1-t0);
609  t0=t1;
610 #endif
611  //FIXME timing
612 
613  } //for (int overlap = 0 ; overlap < OverlapLevel_in ; ++overlap)
614 
615  /* ** ************************************************************************ ** */
616  /* ** ********************** end of main overlap loop ************************ ** */
617  /* ** ************************************************************************ ** */
618 
619  // off-node GIDs that will be in the overlap
620  std::vector<int> ghostElements;
621 
622  Teuchos::Array<int> gidsarray,votesarray;
623  ghostTable.arrayify(gidsarray,votesarray);
624  for (int i=0; i<ghostTable.size(); i++) {
625  ghostElements.push_back(gidsarray[i]);
626  }
627 
628  for (int i = 0 ; i < A().RowMatrixColMap().NumMyElements() ; ++i) {
629  int GID = A().RowMatrixColMap().GID(i);
630  // Remove any entries that are in A's original column map
631  if (colMapTable.containsKey(GID)) {
632  try{colMapTable.remove(GID);}
633  catch(...) {
634  printf("pid %d: In OverlappingRowMatrix ctr, problem removing entry %d from colMapTable\n", Comm().MyPID(),GID);
635  fflush(stdout);
636  }
637  }
638  }
639 
640  // GIDs that will be in the overlapped matrix's column map
641  std::vector<int> colMapElements;
642 
643  gidsarray.clear(); votesarray.clear();
644  colMapTable.arrayify(gidsarray,votesarray);
645  for (int i=0; i<colMapTable.size(); i++)
646  colMapElements.push_back(gidsarray[i]);
647 
648 /*
649  We need two maps here. The first is the row map, which we've got by using my original rows
650  plus whatever I've picked up in ghostElements.
651 
652  The second is a column map. This map should include my rows, plus any columns that could come from node buddies.
653  These GIDs come from the std:array colMapElements, which in turn comes from colMapTable.
654  This map should *not* omit ghosts that have been imported by my node buddies, i.e., for any row that I own,
655  the stencil should include all column GIDs (including imported ghosts) that are on the node.
656 */
657 
658  // build the row map containing all the nodes (original matrix + off-node matrix)
659  std::vector<int> rowList(NumMyRowsA_ + ghostElements.size());
660  for (int i = 0 ; i < NumMyRowsA_ ; ++i)
661  rowList[i] = A().RowMatrixRowMap().GID(i);
662  for (int i = 0 ; i < (int)ghostElements.size() ; ++i)
663  rowList[i + NumMyRowsA_] = ghostElements[i];
664 
665  // row map for the overlapped matrix (local + overlap)
666  Map_ = rcp( new Epetra_Map(-1, NumMyRowsA_ + ghostElements.size(), &rowList[0], 0, Comm()) );
667 
668  // build the column map for the overlapping matrix
669  //vector<int> colList(colMapElements.size());
670  // column map for the overlapped matrix (local + overlap)
671  //colMap_ = rcp( new Epetra_Map(-1, colMapElements.size(), &colList[0], 0, Comm()) );
672  //for (int i = 0 ; i < (int)colMapElements.size() ; i++)
673  // colList[i] = colMapElements[i];
674  std::vector<int> colList(A().RowMatrixColMap().NumMyElements() + colMapElements.size());
675  int nc = A().RowMatrixColMap().NumMyElements();
676  for (int i = 0 ; i < nc; i++)
677  colList[i] = A().RowMatrixColMap().GID(i);
678  for (int i = 0 ; i < (int)colMapElements.size() ; i++)
679  colList[nc+i] = colMapElements[i];
680 
681  // column map for the overlapped matrix (local + overlap)
682  //colMap_ = rcp( new Epetra_Map(-1, A().RowMatrixColMap().NumMyElements() + colMapElements.size(), &colList[0], 0, Comm()) );
683  colMap_ = new Epetra_Map(-1, A().RowMatrixColMap().NumMyElements() + colMapElements.size(), &colList[0], 0, Comm());
684 
685 
686  //FIXME timing
687 #ifdef IFPACK_OVA_TIME_BUILD
688  t1 = MPI_Wtime();
689  fprintf(stderr,"[node %d]: time to init B() col/row maps %2.3e\n", subdomainID, t1-t0);
690  t0=t1;
691 #endif
692  //FIXME timing
693 
694  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
695  //++++ start of sort
696  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
697  // build the column map, but don't use a copy constructor, b/c local communicator SubComm_ is
698  // different from that of Matrix.
699  try {
700  // build row map based on the local communicator. We need this temporarily to build the column map.
701  Epetra_Map* nodeMap = new Epetra_Map(-1,NumMyRowsA_ + ghostElements.size(),&rowList[0],0,*nodeComm);
702  int numMyElts = colMap_->NumMyElements();
703  assert(numMyElts!=0);
704 
705  // The column map *must* be sorted: first locals, then ghosts. The ghosts
706  // must be further sorted so that they are contiguous by owning processor.
707 
708  // For each GID in column map, determine owning PID in local communicator.
709  int* myGlobalElts = new int[numMyElts];
710  colMap_->MyGlobalElements(myGlobalElts);
711  int* pidList = new int[numMyElts];
712  nodeMap->RemoteIDList(numMyElts, myGlobalElts, pidList, 0);
713 
714  // First sort column map GIDs based on the owning PID in local communicator.
715  Epetra_Util Util;
716  int *tt[1];
717  tt[0] = myGlobalElts;
718  Util.Sort(true, numMyElts, pidList, 0, (double**)0, 1, tt);
719 
720  // For each remote PID, sort the column map GIDs in ascending order.
721  // Don't sort the local PID's entries.
722  int localStart=0;
723  while (localStart<numMyElts) {
724  int currPID = (pidList)[localStart];
725  int i=localStart;
726  while (i<numMyElts && (pidList)[i] == currPID) i++;
727  if (currPID != nodeComm->MyPID())
728  Util.Sort(true, i-localStart, (myGlobalElts)+localStart, 0, 0, 0, 0);
729  localStart = i;
730  }
731 
732  // now move the local entries to the front of the list
733  localStart=0;
734  while (localStart<numMyElts && (pidList)[localStart] != nodeComm->MyPID()) localStart++;
735  assert(localStart != numMyElts);
736  int localEnd=localStart;
737  while (localEnd<numMyElts && (pidList)[localEnd] == nodeComm->MyPID()) localEnd++;
738  int* mySortedGlobalElts = new int[numMyElts];
739  int leng = localEnd - localStart;
740  /* This is a little gotcha. It appears that the ordering of the column map's local entries
741  must be the same as that of the domain map's local entries. See the comment in method
742  MakeColMap() in Epetra_CrsGraph.cpp, line 1072. */
743  int *rowGlobalElts = nodeMap->MyGlobalElements();
744 
745  /*TODO TODO TODO NTS rows 68 and 83 show up as local GIDs in rowGlobalElts for both pids 0 & 1 on node 0.
746  This seems to imply that there is something wrong with rowList!!
747  It is ok for the overlapped matrix's row map to have repeated entries (it's overlapped, after all).
748  But ... on a node, there should be no repeats.
749  Ok, here's the underlying cause. On node 0, gpid 1 finds 83 on overlap round 0. gpid 0 finds 83
750  on overlap round 1. This shouldn't happen .. once a node buddy "owns" a row, no other node buddy
751  should ever import ("own") that row.
752 
753  Here's a possible fix. Extend tie breaking across overlap rounds. This means sending to lpid 0
754  a list of *all* rows you've imported (this round plus previous rounds) for tie-breaking
755  If a nb won a ghost row in a previous round, his votes for that ghost row should be really high, i.e.,
756  he should probably win that ghost row forever.
757  7/17/09
758 
759  7/28/09 JJH I believe I've fixed this, but I thought it might be helpful to have this comment in here, in case I missed something.
760  */
761 
762  //move locals to front of list
763  for (int i=0; i<leng; i++) {
764  /* //original code */
765  (mySortedGlobalElts)[i] = rowGlobalElts[i];
766  //(&*mySortedGlobalElts)[i] = (&*myGlobalElts)[localStart+i];
767  //(&*mySortedPidList)[i] = (&*pidList)[localStart+i];
768  }
769  for (int i=leng; i< localEnd; i++) {
770  (mySortedGlobalElts)[i] = (myGlobalElts)[i-leng];
771  }
772  for (int i=localEnd; i<numMyElts; i++) {
773  (mySortedGlobalElts)[i] = (myGlobalElts)[i];
774  }
775 
776  //FIXME timing
777 #ifdef IFPACK_OVA_TIME_BUILD
778  t1 = MPI_Wtime();
779  fprintf(stderr,"[node %d]: time to sort col map arrays (cp=1) %2.3e\n", subdomainID, t1-t0);
780  t0=t1;
781 #endif
782  //FIXME timing
783 
784  int indexBase = colMap_->IndexBase();
785  if (colMap_) delete colMap_;
786  colMap_ = new Epetra_Map(-1,numMyElts,mySortedGlobalElts,indexBase,Comm());
787 
788  delete nodeMap;
789  delete [] myGlobalElts;
790  delete [] pidList;
791  delete [] mySortedGlobalElts;
792 
793  } //try
794  catch(...) {
795  printf("** * Ifpack_OverlappingRowmatrix ctor: problem creating column map * **\n\n");
796  }
797 
798  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
799  //++++ end of sort
800  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
801 
802  //FIXME timing
803 #ifdef IFPACK_OVA_TIME_BUILD
804  t1 = MPI_Wtime();
805  fprintf(stderr,"[node %d]: time to sort col map arrays (cp=2) %2.3e\n", subdomainID, t1-t0);
806  t0=t1;
807 #endif
808  //FIXME timing
809 
810 
811 /*
812 
813  FIXME
814  Does the column map need to be sorted for the overlapping matrix?
815 
816  The column map *must* be sorted:
817 
818  first locals
819  then ghosts
820 
821  The ghosts must be further sorted so that they are contiguous by owning processor
822 
823  int* RemoteSizeList = 0
824  int* RemoteColIndices = ColIndices.Values() + NumLocalColGIDs; // Points to back end of ColIndices
825 
826  EPETRA_CHK_ERR(DomainMap.RemoteIDList(NumRemoteColGIDs, RemoteColIndices, PIDList.Values(), 0, RemoteSizeList));
827  Epetra_Util epetraUtil;
828  SortLists[0] = RemoteColIndices;
829  SortLists[1] = RemoteSizeList;
830  epetraUtil.Sort(true, NumRemoteColGIDs, PIDList.Values(), 0, 0, NLists, SortLists);
831 */
832 
833  // now build the map corresponding to all the external nodes
834  // (with respect to A().RowMatrixRowMap().
835  ExtMap_ = rcp( new Epetra_Map(-1,ghostElements.size(), &ghostElements[0],0,Comm()) );
836  ExtMatrix_ = rcp( new Epetra_CrsMatrix(Copy,*ExtMap_,*colMap_,0) );
837 
838  ExtImporter_ = rcp( new Epetra_Import(*ExtMap_,A().RowMatrixRowMap()) );
839  ExtMatrix_->Import(A(),*ExtImporter_,Insert);
840 
841  //FIXME timing
842 #ifdef IFPACK_OVA_TIME_BUILD
843  t1 = MPI_Wtime();
844  fprintf(stderr,"[node %d]: time to import and setup ExtMap_ %2.3e\n", subdomainID, t1-t0);
845  t0=t1;
846 #endif
847  //FIXME timing
848 
849 
850 /*
851  Notes to self: In FillComplete, the range map does not have to be 1-1 as long as
852  (row map == range map). Ditto for the domain map not being 1-1
853  if (col map == domain map).
854 
855 */
856 
857  ExtMatrix_->FillComplete( *colMap_ , *Map_ ); //FIXME wrong
858 
859  //FIXME timing
860 #ifdef IFPACK_OVA_TIME_BUILD
861  t1 = MPI_Wtime();
862  fprintf(stderr,"[node %d]: time to FillComplete B() %2.3e\n", subdomainID, t1-t0);
863  t0=t1;
864 #endif
865  //FIXME timing
866 
867 
868  // Note: B() = *ExtMatrix_ .
869 
870  Importer_ = rcp( new Epetra_Import(*Map_,A().RowMatrixRowMap()) ); //FIXME is this right?!
871 
872  // fix indices for overlapping matrix
873  NumMyRowsB_ = B().NumMyRows();
874  NumMyRows_ = NumMyRowsA_ + NumMyRowsB_; //TODO is this wrong for a subdomain on >1 processor? // should be ok
875  //NumMyCols_ = NumMyRows_; //TODO is this wrong for a subdomain on >1 processor? // YES!!!
876  //NumMyCols_ = A().NumMyCols() + B().NumMyCols();
877  NumMyCols_ = B().NumMyCols();
878 
879  /*FIXME*/ //somehow B's NumMyCols is the entire subdomain (local + overlap)
880 
881  NumMyDiagonals_ = A().NumMyDiagonals() + B().NumMyDiagonals();
882 
883  NumMyNonzeros_ = A().NumMyNonzeros() + B().NumMyNonzeros();
884  long long NumMyNonZerosTemp = NumMyNonzeros_;
885  Comm().SumAll(&NumMyNonZerosTemp,&NumGlobalNonzeros_,1);
886  MaxNumEntries_ = A().MaxNumEntries();
887 
888  if (MaxNumEntries_ < B().MaxNumEntries())
889  MaxNumEntries_ = B().MaxNumEntries();
890 
891 # ifdef HAVE_MPI
892  delete nodeComm;
893 # endif
894 
895  //FIXME timing
896 #ifdef IFPACK_OVA_TIME_BUILD
897  t1 = MPI_Wtime();
898  fprintf(stderr,"[node %d]: time for final calcs %2.3e\n", subdomainID, t1-t0);
899  fprintf(stderr,"[node %d]: total IORM ctor time %2.3e\n", subdomainID, t1-true_t0);
900 #endif
901  //FIXME timing
902 
903 
904 } //Ifpack_OverlappingRowMatrix() ctor for more than one core
905 
906 #else
907 # ifdef IFPACK_NODE_AWARE_CODE
908 
909 // ======================================================================
910 // Constructor for the case of two or more cores per subdomain
911 Ifpack_OverlappingRowMatrix::
912 Ifpack_OverlappingRowMatrix(const RCP<const Epetra_RowMatrix>& Matrix_in,
913  int OverlapLevel_in, int nodeID) :
914  Matrix_(Matrix_in),
915  OverlapLevel_(OverlapLevel_in)
916 {
917 
918  //FIXME timing
919 #ifdef IFPACK_OVA_TIME_BUILD
920  double t0 = MPI_Wtime();
921  double t1, true_t0=t0;
922 #endif
923  //FIXME timing
924 
925  const int votesMax = INT_MAX;
926 
927  // should not be here if no overlap
928  if (OverlapLevel_in == 0)
929  IFPACK_CHK_ERRV(-1);
930 
931  // nothing to do as well with one process
932  if (Comm().NumProc() == 1)
933  IFPACK_CHK_ERRV(-1);
934 
935  // nodeID is the node (aka socket) number, and so is system dependent
936  // nodeComm is the communicator for all the processes on a particular node
937  // these processes will have the same nodeID.
938 # ifdef HAVE_MPI
939  const Epetra_MpiComm *pComm = dynamic_cast<const Epetra_MpiComm*>( &Comm() );
940  assert(pComm != NULL);
941  MPI_Comm nodeMPIComm;
942  MPI_Comm_split(pComm->Comm(),nodeID,pComm->MyPID(),&nodeMPIComm);
943  Epetra_MpiComm *nodeComm = new Epetra_MpiComm(nodeMPIComm);
944 # else
945  Epetra_SerialComm *nodeComm = dynamic_cast<Epetra_MpiComm*>( &(Matrix_in->RowMatrixRowMap().Comm()) );
946 # endif
947 
948  NumMyRowsA_ = A().NumMyRows();
949 
950  RCP<Epetra_Map> TmpMap;
951  RCP<Epetra_CrsMatrix> TmpMatrix;
952  RCP<Epetra_Import> TmpImporter;
953 
954  // importing rows corresponding to elements that are
955  // in ColMap, but not in RowMap
956  const Epetra_Map *RowMap;
957  const Epetra_Map *ColMap;
958  const Epetra_Map *DomainMap;
959 
960  // TODO Count #connections from nodes I own to each ghost node
961 
962 
963  //FIXME timing
964 #ifdef IFPACK_OVA_TIME_BUILD
965  t1 = MPI_Wtime();
966  fprintf(stderr,"[node %d]: time for initialization %2.3e\n", nodeID, t1-t0);
967  t0=t1;
968 #endif
969  //FIXME timing
970 
971 #ifdef IFPACK_OVA_TIME_BUILD
972  t1 = MPI_Wtime();
973  fprintf(stderr,"[node %d]: overlap hash table allocs %2.3e\n", nodeID, t1-t0);
974  t0=t1;
975 #endif
976 
977  Teuchos::Hashtable<int,int> colMapTable(3 * A().RowMatrixColMap().NumMyElements() );
978 
979  // ghostTable holds off-node GIDs that are connected to on-node rows and can potentially be this PID's overlap
980  // TODO hopefully 3 times the # column entries is a reasonable table size
981  Teuchos::Hashtable<int,int> ghostTable(3 * A().RowMatrixColMap().NumMyElements() );
982 
983  /* ** ************************************************************************** ** */
984  /* ** ********************** start of main overlap loop ************************ ** */
985  /* ** ************************************************************************** ** */
986  for (int overlap = 0 ; overlap < OverlapLevel_in ; ++overlap)
987  {
988  // nbTable holds node buddy GIDs that are connected to current PID's rows, i.e., GIDs that should be in the overlapped
989  // matrix's column map
990 
991  if (TmpMatrix != Teuchos::null) {
992  RowMap = &(TmpMatrix->RowMatrixRowMap());
993  ColMap = &(TmpMatrix->RowMatrixColMap());
994  DomainMap = &(TmpMatrix->OperatorDomainMap());
995  }
996  else {
997  RowMap = &(A().RowMatrixRowMap());
998  ColMap = &(A().RowMatrixColMap());
999  DomainMap = &(A().OperatorDomainMap());
1000  }
1001 
1002 #ifdef IFPACK_OVA_TIME_BUILD
1003  t1 = MPI_Wtime();
1004  fprintf(stderr,"[node %d]: overlap pointer pulls %2.3e\n", nodeID, t1-t0);
1005  t0=t1;
1006 #endif
1007 
1008  // For each column ID, determine the owning node (as opposed to core)
1009  // ID of the corresponding row.
1010  Epetra_IntVector colIdList( *ColMap );
1011  Epetra_IntVector rowIdList(*DomainMap);
1012  rowIdList.PutValue(nodeID);
1013  Teuchos::RCP<Epetra_Import> nodeIdImporter = rcp(new Epetra_Import( *ColMap, *DomainMap ));
1014 
1015 #ifdef IFPACK_OVA_TIME_BUILD
1016  t1 = MPI_Wtime();
1017  fprintf(stderr,"[node %d]: overlap intvector/importer allocs %2.3e\n", nodeID, t1-t0);
1018  t0=t1;
1019 #endif
1020 
1021  colIdList.Import(rowIdList,*nodeIdImporter,Insert);
1022 
1023  int size = ColMap->NumMyElements() - RowMap->NumMyElements();
1024  int count = 0;
1025 
1026 #ifdef IFPACK_OVA_TIME_BUILD
1027  t1 = MPI_Wtime();
1028  fprintf(stderr,"[node %d]: overlap col/row ID imports %2.3e\n", nodeID, t1-t0);
1029  t0=t1;
1030 #endif
1031 
1032 
1033  // define the set of off-node rows that are in ColMap but not in RowMap
1034  // This naturally does not include off-core rows that are on the same node as me, i.e., node buddy rows.
1035  for (int i = 0 ; i < ColMap->NumMyElements() ; ++i) {
1036  int GID = ColMap->GID(i);
1037  int myvotes=0;
1038  if (ghostTable.containsKey(GID)) myvotes = ghostTable.get(GID);
1039  if ( colIdList[i] != nodeID && myvotes < votesMax) // don't include anybody found in a previous round
1040  {
1041  int votes;
1042  if (ghostTable.containsKey(GID)) {
1043  votes = ghostTable.get(GID);
1044  votes++;
1045  ghostTable.put(GID,votes);
1046  } else {
1047  ghostTable.put(GID,1);
1048  }
1049  }
1050  } //for (int i = 0 ; i < ColMap->NumMyElements() ; ++i)
1051 
1052  Teuchos::Array<int> gidsarray,votesarray;
1053  ghostTable.arrayify(gidsarray,votesarray);
1054  int *gids = gidsarray.getRawPtr();
1055  int *votes = votesarray.getRawPtr();
1056 
1057  /*
1058  This next bit of code decides which node buddy (NB) gets which ghost points. Everyone sends their
1059  list of ghost points to pid 0 of the local subcommunicator. Pid 0 decides who gets what:
1060 
1061  - if a ghost point is touched by only one NB, that NB gets the ghost point
1062  - if two or more NBs share a ghost point, whichever NB has the most connections to the ghost
1063  point gets it.
1064  */
1065 
1066 # ifdef HAVE_MPI //FIXME What if we build in serial?! This file will likely not compile.
1067  int *cullList;
1068  int ncull;
1069  //int mypid = nodeComm->MyPID();
1070 
1071  //FIXME timing
1072 #ifdef IFPACK_OVA_TIME_BUILD
1073  t1 = MPI_Wtime();
1074  fprintf(stderr,"[node %d]: overlap before culling %2.3e\n", nodeID, t1-t0);
1075  t0=t1;
1076 #endif
1077  //FIXME timing
1078 
1079 
1080  if (nodeComm->MyPID() == 0)
1081  {
1082  // Figure out how much pid 0 is to receive
1083  MPI_Status status;
1084  int *lengths = new int[nodeComm->NumProc()+1];
1085  lengths[0] = 0;
1086  lengths[1] = ghostTable.size();
1087  for (int i=1; i<nodeComm->NumProc(); i++) {
1088  int leng;
1089  MPI_Recv( &leng, 1, MPI_INT, i, MPI_ANY_TAG, nodeComm->Comm(), &status);
1090  lengths[i+1] = lengths[i] + leng;
1091  }
1092  int total = lengths[nodeComm->NumProc()];
1093 
1094  int* ghosts = new int[total];
1095  for (int i=0; i<total; i++) ghosts[i] = -9;
1096  int *round = new int[total];
1097  int *owningpid = new int[total];
1098 
1099  for (int i=1; i<nodeComm->NumProc(); i++) {
1100  int count = lengths[i+1] - lengths[i];
1101  MPI_Recv( ghosts+lengths[i], count, MPI_INT, i, MPI_ANY_TAG, nodeComm->Comm(), &status);
1102  MPI_Recv( round+lengths[i], count, MPI_INT, i, MPI_ANY_TAG, nodeComm->Comm(), &status);
1103  }
1104 
1105  // put in pid 0's info
1106  for (int i=0; i<lengths[1]; i++) {
1107  ghosts[i] = gids[i];
1108  round[i] = votes[i];
1109  owningpid[i] = 0;
1110  }
1111 
1112  // put in the pid associated with each ghost
1113  for (int j=1; j<nodeComm->NumProc(); j++) {
1114  for (int i=lengths[j]; i<lengths[j+1]; i++) {
1115  owningpid[i] = j;
1116  }
1117  }
1118 
1119  // sort everything based on the ghost gids
1120  int* roundpid[2];
1121  roundpid[0] = round; roundpid[1] = owningpid;
1122  Epetra_Util epetraUtil;
1123  epetraUtil.Sort(true,total,ghosts,0,0,2,roundpid);
1124 
1125  //set up arrays that get sent back to node buddies and that tell them what ghosts to cull
1126  int *nlosers = new int[nodeComm->NumProc()];
1127  int** losers = new int*[nodeComm->NumProc()];
1128  for (int i=0; i<nodeComm->NumProc(); i++) {
1129  nlosers[i] = 0;
1130  losers[i] = new int[ lengths[i+1]-lengths[i] ];
1131  }
1132 
1133  // Walk through ghosts array and and for each sequence of duplicate ghost GIDs, choose just one NB to keep it.
1134  // The logic is pretty simple. The ghost list is sorted, so all duplicate PIDs are together.
1135  // The list is traversed. As duplicates are found, node pid 0 keeps track of the current "winning"
1136  // pid. When a pid is determined to have "lost" (less votes/connections to the current GID), the
1137  // GID is added to that pid's list of GIDs to be culled. At the end of the repeated sequence, we have
1138  // a winner, and other NBs know whether they need to delete it from their import list.
1139  int max=0; //for duplicated ghosts, index of pid with most votes and who hence keeps the ghost.
1140  // TODO to break ties randomly
1141 
1142  for (int i=1; i<total; i++) {
1143  if (ghosts[i] == ghosts[i-1]) {
1144  int idx = i; // pid associated with idx is current "loser"
1145  if (round[i] > round[max]) {
1146  idx = max;
1147  max=i;
1148  }
1149  int j = owningpid[idx];
1150  losers[j][nlosers[j]++] = ghosts[idx];
1151  } else {
1152  max=i;
1153  }
1154  } //for (int i=1; i<total; i++)
1155 
1156  delete [] round;
1157  delete [] ghosts;
1158  delete [] owningpid;
1159 
1160  // send the arrays of ghost GIDs to be culled back to the respective node buddies
1161  for (int i=1; i<nodeComm->NumProc(); i++) {
1162  MPI_Send( nlosers+i, 1, MPI_INT, i, 8675, nodeComm->Comm());
1163  MPI_Send( losers[i], nlosers[i], MPI_INT, i, 8675, nodeComm->Comm());
1164  }
1165 
1166  //FIXME Unnecessary memory allocation and copying, but makes culling code cleaner
1167  //Could we stick this info into gids and votes, since neither is being used anymore?
1168  //TODO Instead of using "losers" arrays, just use "cullList" as in the else clause
1169  ncull = nlosers[0];
1170  cullList = new int[ncull+1];
1171  for (int i=0; i<nlosers[0]; i++)
1172  cullList[i] = losers[0][i];
1173 
1174  for (int i=0; i<nodeComm->NumProc(); i++)
1175  delete [] losers[i];
1176 
1177  delete [] lengths;
1178  delete [] losers;
1179  delete [] nlosers;
1180 
1181  } else { //everyone but pid 0
1182 
1183  // send to node pid 0 all ghosts that this pid could potentially import
1184  int hashsize = ghostTable.size();
1185  MPI_Send( &hashsize, 1, MPI_INT, 0, 8675, nodeComm->Comm());
1186  MPI_Send( gids, hashsize, MPI_INT, 0, 8675, nodeComm->Comm());
1187  MPI_Send( votes, hashsize, MPI_INT, 0, 8675, nodeComm->Comm());
1188 
1189  // receive the ghost GIDs that should not be imported (subset of the list sent off just above)
1190  MPI_Status status;
1191  MPI_Recv( &ncull, 1, MPI_INT, 0, 8675, nodeComm->Comm(), &status);
1192  cullList = new int[ncull+1];
1193  MPI_Recv( cullList, ncull, MPI_INT, 0, 8675, nodeComm->Comm(), &status);
1194  }
1195 
1196 
1197  //TODO clean out hash table after each time through overlap loop 4/1/07 JJH done moved both hash tables to local scope
1198 
1199  // Remove from my row map all off-node ghosts that will be imported by a node buddy.
1200  for (int i=0; i<ncull; i++) {
1201  try{ghostTable.remove(cullList[i]);}
1202 
1203  catch(...) {
1204  printf("pid %d: In OverlappingRowMatrix ctr, problem removing ghost elt %d from ghostTable\n",
1205  Comm().MyPID(),cullList[i]);
1206  fflush(stdout);
1207  }
1208  }//for
1209 
1210  delete [] cullList;
1211 
1212  // Save off the remaining ghost GIDs from the current overlap round.
1213  // These are off-node GIDs (rows) that I will import.
1214  gidsarray.clear(); votesarray.clear();
1215  ghostTable.arrayify(gidsarray,votesarray);
1216 
1217  std::vector<int> list(size);
1218  count=0;
1219  for (int i=0; i<ghostTable.size(); i++) {
1220  // if votesarray[i] == votesmax, then this GID was found during a previous overlap round
1221  if (votesarray[i] < votesMax) {
1222  list[count] = gidsarray[i];
1223  ghostTable.put(gidsarray[i],votesMax); //set this GID's vote to the maximum so that this PID alway wins in any subsequent overlap tiebreaking.
1224  count++;
1225  }
1226  }
1227 
1228  //FIXME timing
1229 #ifdef IFPACK_OVA_TIME_BUILD
1230  t1 = MPI_Wtime();
1231  fprintf(stderr,"[node %d]: overlap duplicate removal %2.3e\n", nodeID, t1-t0);
1232  t0=t1;
1233 #endif
1234  //FIXME timing
1235 
1236 # endif //ifdef HAVE_MPI
1237 
1238  TmpMap = rcp( new Epetra_Map(-1,count, &list[0],0,Comm()) );
1239 
1240  TmpMatrix = rcp( new Epetra_CrsMatrix(Copy,*TmpMap,0) );
1241 
1242  TmpImporter = rcp( new Epetra_Import(*TmpMap,A().RowMatrixRowMap()) );
1243 
1244  TmpMatrix->Import(A(),*TmpImporter,Insert);
1245  TmpMatrix->FillComplete(A().OperatorDomainMap(),*TmpMap);
1246 
1247  //FIXME timing
1248 #ifdef IFPACK_OVA_TIME_BUILD
1249  t1 = MPI_Wtime();
1250  fprintf(stderr,"[node %d]: overlap TmpMatrix fillcomplete %2.3e\n", nodeID, t1-t0);
1251  t0=t1;
1252 #endif
1253  //FIXME timing
1254 
1255 
1256  // These next two imports get the GIDs that need to go into the column map of the overlapped matrix.
1257 
1258  // For each column ID in the overlap, determine the owning node (as opposed to core)
1259  // ID of the corresponding row. Save those column IDs whose owning node is the current one.
1260  // This should get all the imported ghost GIDs.
1261  Epetra_IntVector ov_colIdList( TmpMatrix->ColMap() );
1262  ov_colIdList.PutValue(-1);
1263  Epetra_IntVector ov_rowIdList( TmpMatrix->RowMap() );
1264  ov_rowIdList.PutValue(nodeID);
1265  Teuchos::RCP<Epetra_Import> ov_nodeIdImporter = rcp(new Epetra_Import( TmpMatrix->ColMap(), TmpMatrix->RowMap()));
1266  ov_colIdList.Import(ov_rowIdList,*ov_nodeIdImporter,Insert);
1267 
1268  for (int i=0 ; i<ov_colIdList.MyLength(); i++) {
1269  if (ov_colIdList[i] == nodeID) {
1270  int GID = (ov_colIdList.Map()).GID(i);
1271  colMapTable.put(GID,1);
1272  }
1273  }
1274 
1275  // Do a second import of the owning node ID from A's rowmap to TmpMat's column map. This ensures that
1276  // all GIDs that belong to a node buddy and are in a ghost row's sparsity pattern will be in the final
1277  // overlapped matrix's column map.
1278  ov_colIdList.PutValue(-1);
1279  Epetra_IntVector ArowIdList( A().RowMatrixRowMap() );
1280  ArowIdList.PutValue(nodeID);
1281  nodeIdImporter = rcp(new Epetra_Import( TmpMatrix->ColMap(), A().RowMatrixRowMap() ));
1282  ov_colIdList.Import(ArowIdList,*nodeIdImporter,Insert);
1283 
1284  for (int i=0 ; i<ov_colIdList.MyLength(); i++) {
1285  if (ov_colIdList[i] == nodeID) {
1286  int GID = (ov_colIdList.Map()).GID(i);
1287  colMapTable.put(GID,1);
1288  }
1289  }
1290 
1291  //FIXME timing
1292 #ifdef IFPACK_OVA_TIME_BUILD
1293  t1 = MPI_Wtime();
1294  fprintf(stderr,"[node %d]: overlap 2 imports to fix up colmap %2.3e\n", nodeID, t1-t0);
1295  t0=t1;
1296 #endif
1297  //FIXME timing
1298 
1299  } //for (int overlap = 0 ; overlap < OverlapLevel_in ; ++overlap)
1300 
1301  /* ** ************************************************************************ ** */
1302  /* ** ********************** end of main overlap loop ************************ ** */
1303  /* ** ************************************************************************ ** */
1304 
1305  // off-node GIDs that will be in the overlap
1306  std::vector<int> ghostElements;
1307 
1308  Teuchos::Array<int> gidsarray,votesarray;
1309  ghostTable.arrayify(gidsarray,votesarray);
1310  for (int i=0; i<ghostTable.size(); i++) {
1311  ghostElements.push_back(gidsarray[i]);
1312  }
1313 
1314  for (int i = 0 ; i < A().RowMatrixColMap().NumMyElements() ; ++i) {
1315  int GID = A().RowMatrixColMap().GID(i);
1316  // Remove any entries that are in A's original column map
1317  if (colMapTable.containsKey(GID)) {
1318  try{colMapTable.remove(GID);}
1319  catch(...) {
1320  printf("pid %d: In OverlappingRowMatrix ctr, problem removing entry %d from colMapTable\n", Comm().MyPID(),GID);
1321  fflush(stdout);
1322  }
1323  }
1324  }
1325 
1326  // GIDs that will be in the overlapped matrix's column map
1327  std::vector<int> colMapElements;
1328 
1329  gidsarray.clear(); votesarray.clear();
1330  colMapTable.arrayify(gidsarray,votesarray);
1331  for (int i=0; i<colMapTable.size(); i++)
1332  colMapElements.push_back(gidsarray[i]);
1333 
1334 /*
1335  We need two maps here. The first is the row map, which we've got by using my original rows
1336  plus whatever I've picked up in ghostElements.
1337 
1338  The second is a column map. This map should include my rows, plus any columns that could come from node buddies.
1339  These GIDs come from the std:array colMapElements, which in turn comes from colMapTable.
1340  This map should *not* omit ghosts that have been imported by my node buddies, i.e., for any row that I own,
1341  the stencil should include all column GIDs (including imported ghosts) that are on the node.
1342 */
1343 
1344  // build the row map containing all the nodes (original matrix + off-node matrix)
1345  std::vector<int> rowList(NumMyRowsA_ + ghostElements.size());
1346  for (int i = 0 ; i < NumMyRowsA_ ; ++i)
1347  rowList[i] = A().RowMatrixRowMap().GID(i);
1348  for (int i = 0 ; i < (int)ghostElements.size() ; ++i)
1349  rowList[i + NumMyRowsA_] = ghostElements[i];
1350 
1351  // row map for the overlapped matrix (local + overlap)
1352  Map_ = rcp( new Epetra_Map(-1, NumMyRowsA_ + ghostElements.size(), &rowList[0], 0, Comm()) );
1353 
1354  // build the column map for the overlapping matrix
1355  //vector<int> colList(colMapElements.size());
1356  // column map for the overlapped matrix (local + overlap)
1357  //colMap_ = rcp( new Epetra_Map(-1, colMapElements.size(), &colList[0], 0, Comm()) );
1358  //for (int i = 0 ; i < (int)colMapElements.size() ; i++)
1359  // colList[i] = colMapElements[i];
1360  std::vector<int> colList(A().RowMatrixColMap().NumMyElements() + colMapElements.size());
1361  int nc = A().RowMatrixColMap().NumMyElements();
1362  for (int i = 0 ; i < nc; i++)
1363  colList[i] = A().RowMatrixColMap().GID(i);
1364  for (int i = 0 ; i < (int)colMapElements.size() ; i++)
1365  colList[nc+i] = colMapElements[i];
1366 
1367  // column map for the overlapped matrix (local + overlap)
1368  //colMap_ = rcp( new Epetra_Map(-1, A().RowMatrixColMap().NumMyElements() + colMapElements.size(), &colList[0], 0, Comm()) );
1369  colMap_ = new Epetra_Map(-1, A().RowMatrixColMap().NumMyElements() + colMapElements.size(), &colList[0], 0, Comm());
1370 
1371 
1372  //FIXME timing
1373 #ifdef IFPACK_OVA_TIME_BUILD
1374  t1 = MPI_Wtime();
1375  fprintf(stderr,"[node %d]: time to init B() col/row maps %2.3e\n", nodeID, t1-t0);
1376  t0=t1;
1377 #endif
1378  //FIXME timing
1379 
1380  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1381  //++++ start of sort
1382  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1383  // build the column map, but don't use a copy constructor, b/c local communicator SubComm_ is
1384  // different from that of Matrix.
1385  try {
1386  // build row map based on the local communicator. We need this temporarily to build the column map.
1387  Epetra_Map* nodeMap = new Epetra_Map(-1,NumMyRowsA_ + ghostElements.size(),&rowList[0],0,*nodeComm);
1388  int numMyElts = colMap_->NumMyElements();
1389  assert(numMyElts!=0);
1390 
1391  // The column map *must* be sorted: first locals, then ghosts. The ghosts
1392  // must be further sorted so that they are contiguous by owning processor.
1393 
1394  // For each GID in column map, determine owning PID in local communicator.
1395  int* myGlobalElts = new int[numMyElts];
1396  colMap_->MyGlobalElements(myGlobalElts);
1397  int* pidList = new int[numMyElts];
1398  nodeMap->RemoteIDList(numMyElts, myGlobalElts, pidList, 0);
1399 
1400  // First sort column map GIDs based on the owning PID in local communicator.
1401  Epetra_Util Util;
1402  int *tt[1];
1403  tt[0] = myGlobalElts;
1404  Util.Sort(true, numMyElts, pidList, 0, (double**)0, 1, tt);
1405 
1406  // For each remote PID, sort the column map GIDs in ascending order.
1407  // Don't sort the local PID's entries.
1408  int localStart=0;
1409  while (localStart<numMyElts) {
1410  int currPID = (pidList)[localStart];
1411  int i=localStart;
1412  while (i<numMyElts && (pidList)[i] == currPID) i++;
1413  if (currPID != nodeComm->MyPID())
1414  Util.Sort(true, i-localStart, (myGlobalElts)+localStart, 0, 0, 0, 0);
1415  localStart = i;
1416  }
1417 
1418  // now move the local entries to the front of the list
1419  localStart=0;
1420  while (localStart<numMyElts && (pidList)[localStart] != nodeComm->MyPID()) localStart++;
1421  assert(localStart != numMyElts);
1422  int localEnd=localStart;
1423  while (localEnd<numMyElts && (pidList)[localEnd] == nodeComm->MyPID()) localEnd++;
1424  int* mySortedGlobalElts = new int[numMyElts];
1425  int leng = localEnd - localStart;
1426  /* This is a little gotcha. It appears that the ordering of the column map's local entries
1427  must be the same as that of the domain map's local entries. See the comment in method
1428  MakeColMap() in Epetra_CrsGraph.cpp, line 1072. */
1429  int *rowGlobalElts = nodeMap->MyGlobalElements();
1430 
1431  /*TODO TODO TODO NTS rows 68 and 83 show up as local GIDs in rowGlobalElts for both pids 0 & 1 on node 0.
1432  This seems to imply that there is something wrong with rowList!!
1433  It is ok for the overlapped matrix's row map to have repeated entries (it's overlapped, after all).
1434  But ... on a node, there should be no repeats.
1435  Ok, here's the underlying cause. On node 0, gpid 1 finds 83 on overlap round 0. gpid 0 finds 83
1436  on overlap round 1. This shouldn't happen .. once a node buddy "owns" a row, no other node buddy
1437  should ever import ("own") that row.
1438 
1439  Here's a possible fix. Extend tie breaking across overlap rounds. This means sending to lpid 0
1440  a list of *all* rows you've imported (this round plus previous rounds) for tie-breaking
1441  If a nb won a ghost row in a previous round, his votes for that ghost row should be really high, i.e.,
1442  he should probably win that ghost row forever.
1443  7/17/09
1444 
1445  7/28/09 JJH I believe I've fixed this, but I thought it might be helpful to have this comment in here, in case I missed something.
1446  */
1447 
1448  //move locals to front of list
1449  for (int i=0; i<leng; i++) {
1450  /* //original code */
1451  (mySortedGlobalElts)[i] = rowGlobalElts[i];
1452  //(&*mySortedGlobalElts)[i] = (&*myGlobalElts)[localStart+i];
1453  //(&*mySortedPidList)[i] = (&*pidList)[localStart+i];
1454  }
1455  for (int i=leng; i< localEnd; i++) {
1456  (mySortedGlobalElts)[i] = (myGlobalElts)[i-leng];
1457  }
1458  for (int i=localEnd; i<numMyElts; i++) {
1459  (mySortedGlobalElts)[i] = (myGlobalElts)[i];
1460  }
1461 
1462  //FIXME timing
1463 #ifdef IFPACK_OVA_TIME_BUILD
1464  t1 = MPI_Wtime();
1465  fprintf(stderr,"[node %d]: time to sort col map arrays (cp=1) %2.3e\n", nodeID, t1-t0);
1466  t0=t1;
1467 #endif
1468  //FIXME timing
1469 
1470  int indexBase = colMap_->IndexBase();
1471  if (colMap_) delete colMap_;
1472  colMap_ = new Epetra_Map(-1,numMyElts,mySortedGlobalElts,indexBase,Comm());
1473 
1474  delete nodeMap;
1475  delete [] myGlobalElts;
1476  delete [] pidList;
1477  delete [] mySortedGlobalElts;
1478 
1479  } //try
1480  catch(...) {
1481  printf("** * Ifpack_OverlappingRowmatrix ctor: problem creating column map * **\n\n");
1482  }
1483 
1484  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1485  //++++ end of sort
1486  //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1487 
1488  //FIXME timing
1489 #ifdef IFPACK_OVA_TIME_BUILD
1490  t1 = MPI_Wtime();
1491  fprintf(stderr,"[node %d]: time to sort col map arrays (cp=2) %2.3e\n", nodeID, t1-t0);
1492  t0=t1;
1493 #endif
1494  //FIXME timing
1495 
1496 
1497 /*
1498 
1499  FIXME
1500  Does the column map need to be sorted for the overlapping matrix?
1501 
1502  The column map *must* be sorted:
1503 
1504  first locals
1505  then ghosts
1506 
1507  The ghosts must be further sorted so that they are contiguous by owning processor
1508 
1509  int* RemoteSizeList = 0
1510  int* RemoteColIndices = ColIndices.Values() + NumLocalColGIDs; // Points to back end of ColIndices
1511 
1512  EPETRA_CHK_ERR(DomainMap.RemoteIDList(NumRemoteColGIDs, RemoteColIndices, PIDList.Values(), 0, RemoteSizeList));
1513  Epetra_Util epetraUtil;
1514  SortLists[0] = RemoteColIndices;
1515  SortLists[1] = RemoteSizeList;
1516  epetraUtil.Sort(true, NumRemoteColGIDs, PIDList.Values(), 0, 0, NLists, SortLists);
1517 */
1518 
1519  // now build the map corresponding to all the external nodes
1520  // (with respect to A().RowMatrixRowMap().
1521  ExtMap_ = rcp( new Epetra_Map(-1,ghostElements.size(), &ghostElements[0],0,Comm()) );
1522  ExtMatrix_ = rcp( new Epetra_CrsMatrix(Copy,*ExtMap_,*colMap_,0) );
1523 
1524  ExtImporter_ = rcp( new Epetra_Import(*ExtMap_,A().RowMatrixRowMap()) );
1525  ExtMatrix_->Import(A(),*ExtImporter_,Insert);
1526 
1527  //FIXME timing
1528 #ifdef IFPACK_OVA_TIME_BUILD
1529  t1 = MPI_Wtime();
1530  fprintf(stderr,"[node %d]: time to import and setup ExtMap_ %2.3e\n", nodeID, t1-t0);
1531  t0=t1;
1532 #endif
1533  //FIXME timing
1534 
1535 
1536 /*
1537  Notes to self: In FillComplete, the range map does not have to be 1-1 as long as
1538  (row map == range map). Ditto for the domain map not being 1-1
1539  if (col map == domain map).
1540 
1541 */
1542 
1543  ExtMatrix_->FillComplete( *colMap_ , *Map_ ); //FIXME wrong
1544 
1545  //FIXME timing
1546 #ifdef IFPACK_OVA_TIME_BUILD
1547  t1 = MPI_Wtime();
1548  fprintf(stderr,"[node %d]: time to FillComplete B() %2.3e\n", nodeID, t1-t0);
1549  t0=t1;
1550 #endif
1551  //FIXME timing
1552 
1553 
1554  // Note: B() = *ExtMatrix_ .
1555 
1556  Importer_ = rcp( new Epetra_Import(*Map_,A().RowMatrixRowMap()) ); //FIXME is this right?!
1557 
1558  // fix indices for overlapping matrix
1559  NumMyRowsB_ = B().NumMyRows();
1560  NumMyRows_ = NumMyRowsA_ + NumMyRowsB_; //TODO is this wrong for a subdomain on >1 processor? // should be ok
1561  //NumMyCols_ = NumMyRows_; //TODO is this wrong for a subdomain on >1 processor? // YES!!!
1562  //NumMyCols_ = A().NumMyCols() + B().NumMyCols();
1563  NumMyCols_ = B().NumMyCols();
1564 
1565  /*FIXME*/ //somehow B's NumMyCols is the entire subdomain (local + overlap)
1566 
1567  NumMyDiagonals_ = A().NumMyDiagonals() + B().NumMyDiagonals();
1568 
1569  NumMyNonzeros_ = A().NumMyNonzeros() + B().NumMyNonzeros();
1570  Comm().SumAll(&NumMyNonzeros_,&NumGlobalNonzeros_,1);
1571  MaxNumEntries_ = A().MaxNumEntries();
1572 
1573  if (MaxNumEntries_ < B().MaxNumEntries())
1574  MaxNumEntries_ = B().MaxNumEntries();
1575 
1576 # ifdef HAVE_MPI
1577  delete nodeComm;
1578 # endif
1579 
1580  //FIXME timing
1581 #ifdef IFPACK_OVA_TIME_BUILD
1582  t1 = MPI_Wtime();
1583  fprintf(stderr,"[node %d]: time for final calcs %2.3e\n", nodeID, t1-t0);
1584  fprintf(stderr,"[node %d]: total IORM ctor time %2.3e\n", nodeID, t1-true_t0);
1585 #endif
1586  //FIXME timing
1587 
1588 
1589 } //Ifpack_OverlappingRowMatrix() ctor for more than one core
1590 
1591 // Destructor
1592 Ifpack_OverlappingRowMatrix::~Ifpack_OverlappingRowMatrix() {
1593  delete colMap_;
1594 }
1595 
1596 # endif //ifdef IFPACK_NODE_AWARE_CODE
1597 #endif // ifdef HAVE_IFPACK_PARALLEL_SUBDOMAIN_SOLVERS
1598 
1599 
1600 // ======================================================================
1602 NumMyRowEntries(int MyRow, int & NumEntries) const
1603 {
1604  if (MyRow < NumMyRowsA_)
1605  return(A().NumMyRowEntries(MyRow,NumEntries));
1606  else
1607  return(B().NumMyRowEntries(MyRow - NumMyRowsA_, NumEntries));
1608 }
1609 
1610 #ifdef HAVE_IFPACK_PARALLEL_SUBDOMAIN_SOLVERS
1611 // ======================================================================
1613 ExtractMyRowCopy(int LocRow, int Length, int & NumEntries, double *Values,
1614  int * Indices) const
1615 {
1616  //assert(1==0);
1617  int ierr;
1618  const Epetra_Map *Themap;
1619  if (LocRow < NumMyRowsA_) {
1620  ierr = A().ExtractMyRowCopy(LocRow,Length,NumEntries,Values,Indices);
1621  Themap=&A().RowMatrixColMap();
1622  }
1623  else {
1624  ierr = B().ExtractMyRowCopy(LocRow-NumMyRowsA_,Length,NumEntries,Values,Indices);
1625  Themap=&B().RowMatrixColMap();
1626  }
1627 
1628  IFPACK_RETURN(ierr);
1629 }
1630 
1631 // ======================================================================
1632 int Ifpack_OverlappingRowMatrix::
1633 ExtractGlobalRowCopy(int GlobRow, int Length, int & NumEntries, double *Values,
1634  int * Indices) const
1635 {
1636  int ierr;
1637  const Epetra_Map *Themap;
1638  int LocRow = A().RowMatrixRowMap().LID(GlobRow);
1639  if (LocRow < NumMyRowsA_ && LocRow != -1) { //TODO don't need to check less than nummyrows
1640  ierr = A().ExtractMyRowCopy(LocRow,Length,NumEntries,Values,Indices);
1641  Themap=&A().RowMatrixColMap();
1642  }
1643  else {
1644  LocRow = B().RowMatrixRowMap().LID(GlobRow);
1645  assert(LocRow!=-1);
1646  //ierr = B().ExtractMyRowCopy(LocRow-NumMyRowsA_,Length,NumEntries,Values,Indices);
1647  ierr = B().ExtractMyRowCopy(LocRow,Length,NumEntries,Values,Indices);
1648  Themap=&B().RowMatrixColMap();
1649  }
1650 
1651  for (int i=0; i<NumEntries; i++) {
1652  Indices[i]=Themap->GID(Indices[i]);
1653  assert(Indices[i] != -1);
1654  }
1655 
1656  IFPACK_RETURN(ierr);
1657 }
1658 #else
1659 # ifdef IFPACK_NODE_AWARE_CODE
1660 // ======================================================================
1662 ExtractMyRowCopy(int LocRow, int Length, int & NumEntries, double *Values,
1663  int * Indices) const
1664 {
1665  assert(1==0);
1666  int ierr;
1667  const Epetra_Map *Themap;
1668  if (LocRow < NumMyRowsA_) {
1669  ierr = A().ExtractMyRowCopy(LocRow,Length,NumEntries,Values,Indices);
1670  Themap=&A().RowMatrixColMap();
1671  }
1672  else {
1673  ierr = B().ExtractMyRowCopy(LocRow-NumMyRowsA_,Length,NumEntries,Values,Indices);
1674  Themap=&B().RowMatrixColMap();
1675  }
1676 
1677  IFPACK_RETURN(ierr);
1678 }
1679 
1680 // ======================================================================
1681 int Ifpack_OverlappingRowMatrix::
1682 ExtractGlobalRowCopy(int GlobRow, int Length, int & NumEntries, double *Values,
1683  int * Indices) const
1684 {
1685  int ierr;
1686  const Epetra_Map *Themap;
1687  int LocRow = A().RowMatrixRowMap().LID(GlobRow);
1688  if (LocRow < NumMyRowsA_ && LocRow != -1) { //TODO don't need to check less than nummyrows
1689  ierr = A().ExtractMyRowCopy(LocRow,Length,NumEntries,Values,Indices);
1690  Themap=&A().RowMatrixColMap();
1691  }
1692  else {
1693  LocRow = B().RowMatrixRowMap().LID(GlobRow);
1694  assert(LocRow!=-1);
1695  //ierr = B().ExtractMyRowCopy(LocRow-NumMyRowsA_,Length,NumEntries,Values,Indices);
1696  ierr = B().ExtractMyRowCopy(LocRow,Length,NumEntries,Values,Indices);
1697  Themap=&B().RowMatrixColMap();
1698  }
1699 
1700  for (int i=0; i<NumEntries; i++) {
1701  Indices[i]=Themap->GID(Indices[i]);
1702  assert(Indices[i] != -1);
1703  }
1704 
1705  IFPACK_RETURN(ierr);
1706 }
1707 # else
1708 
1709 // ======================================================================
1711 ExtractMyRowCopy(int MyRow, int Length, int & NumEntries, double *Values,
1712  int * Indices) const
1713 {
1714  int ierr;
1715  if (MyRow < NumMyRowsA_)
1716  ierr = A().ExtractMyRowCopy(MyRow,Length,NumEntries,Values,Indices);
1717  else
1718  ierr = B().ExtractMyRowCopy(MyRow - NumMyRowsA_,Length,NumEntries,
1719  Values,Indices);
1720 
1721  IFPACK_RETURN(ierr);
1722 }
1723 # endif // ifdef IFPACK_NODE_AWARE_CODE
1724 #endif // ifdef HAVE_IFPACK_PARALLEL_SUBDOMAIN_SOLVERS
1725 
1726 // ======================================================================
1728 ExtractDiagonalCopy(Epetra_Vector & /* Diagonal */) const
1729 {
1730  IFPACK_CHK_ERR(-1);
1731 }
1732 
1733 
1734 // ======================================================================
1736 Multiply(bool /* TransA */, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1737 {
1738  int NumVectors = X.NumVectors();
1739  std::vector<int> Ind(MaxNumEntries_);
1740  std::vector<double> Val(MaxNumEntries_);
1741 
1742  Y.PutScalar(0.0);
1743 
1744  // matvec with A (local rows)
1745  for (int i = 0 ; i < NumMyRowsA_ ; ++i) {
1746  for (int k = 0 ; k < NumVectors ; ++k) {
1747  int Nnz;
1748  IFPACK_CHK_ERR(A().ExtractMyRowCopy(i,MaxNumEntries_,Nnz,
1749  &Val[0], &Ind[0]));
1750  for (int j = 0 ; j < Nnz ; ++j) {
1751  Y[k][i] += Val[j] * X[k][Ind[j]];
1752  }
1753  }
1754  }
1755 
1756  // matvec with B (overlapping rows)
1757  for (int i = 0 ; i < NumMyRowsB_ ; ++i) {
1758  for (int k = 0 ; k < NumVectors ; ++k) {
1759  int Nnz;
1760  IFPACK_CHK_ERR(B().ExtractMyRowCopy(i,MaxNumEntries_,Nnz,
1761  &Val[0], &Ind[0]));
1762  for (int j = 0 ; j < Nnz ; ++j) {
1763  Y[k][i + NumMyRowsA_] += Val[j] * X[k][Ind[j]];
1764  }
1765  }
1766  }
1767  return(0);
1768 }
1769 
1770 // ======================================================================
1771 int Ifpack_OverlappingRowMatrix::
1772 Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
1773 {
1774  IFPACK_CHK_ERR(Multiply(UseTranspose(),X,Y));
1775  return(0);
1776 }
1777 
1778 // ======================================================================
1779 int Ifpack_OverlappingRowMatrix::
1780 ApplyInverse(const Epetra_MultiVector& /* X */, Epetra_MultiVector& /* Y */) const
1781 {
1782  IFPACK_CHK_ERR(-1);
1783 }
1784 
1785 // ======================================================================
1786 #ifndef HAVE_IFPACK_PARALLEL_SUBDOMAIN_SOLVERS
1787 # ifndef IFPACK_NODE_AWARE_CODE
1788 Epetra_RowMatrix& Ifpack_OverlappingRowMatrix::B() const
1789 {
1790  return(*ExtMatrix_);
1791 }
1792 # endif
1793 #endif
1794 // ======================================================================
1795 const Epetra_BlockMap& Ifpack_OverlappingRowMatrix::Map() const
1796 {
1797  return(*Map_);
1798 }
1799 
1800 // ======================================================================
1801 int Ifpack_OverlappingRowMatrix::
1802 ImportMultiVector(const Epetra_MultiVector& X, Epetra_MultiVector& OvX,
1803  Epetra_CombineMode CM)
1804 {
1805  OvX.Import(X,*Importer_,CM);
1806  return(0);
1807 }
1808 
1809 // ======================================================================
1810 int Ifpack_OverlappingRowMatrix::
1811 ExportMultiVector(const Epetra_MultiVector& OvX, Epetra_MultiVector& X,
1812  Epetra_CombineMode CM)
1813 {
1814  X.Export(OvX,*Importer_,CM);
1815  return(0);
1816 }
1817 
virtual int NumMyRowEntries(int MyRow, int &NumEntries) const
Returns the number of nonzero entries in MyRow.
int MaxNumEntries() const
Returns the max number of local entries in a row.
virtual const Epetra_Map & RowMatrixRowMap() const =0
int MyGlobalElements(int *MyGlobalElementList) const
virtual int ExtractDiagonalCopy(Epetra_Vector &Diagonal) const
Returns a copy of the main diagonal in a user-provided vector.
int PutValue(int Value)
int MyPID() const
virtual int ExtractMyRowCopy(int MyRow, int Length, int &NumEntries, double *Values, int *Indices) const
Returns a copy of the specified local row in user-provided arrays.
int IndexBase() const
virtual int SumAll(double *PartialSums, double *GlobalSums, int Count) const =0
int NumMyElements() const
virtual int Multiply(bool TransA, const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Returns the result of a Epetra_RowMatrix multiplied by a Epetra_MultiVector X in Y.
int GID(int LID) const
MPI_Comm Comm() const
int LID(int GID) const
int NumProc() const
static void EPETRA_LIB_DLL_EXPORT Sort(bool SortAscending, int NumKeys, T *Keys, int NumDoubleCompanions, double **DoubleCompanions, int NumIntCompanions, int **IntCompanions, int NumLongLongCompanions, long long **LongLongCompanions)
Epetra_CombineMode
virtual int ExtractMyRowCopy(int MyRow, int Length, int &NumEntries, double *Values, int *Indices) const =0
virtual const Epetra_Map & RowMatrixColMap() const =0
const Epetra_Comm & Comm() const
Returns the communicator object of Graph.
bool UseTranspose() const
Returns the current UseTranspose setting.
int MyPID() const
int RemoteIDList(int NumIDs, const int *GIDList, int *PIDList, int *LIDList) const