Amesos Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Superludist2_OO.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Amesos: Direct Sparse Solver Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
25 //
26 // ***********************************************************************
27 // @HEADER
28 
29 #define KLOPTER
30 #include "Superludist2_OO.h"
31 #ifdef EPETRA_MPI
32 #include "Epetra_MpiComm.h"
33 #else
34  This code cannot be compiled without mpi.h.
35 #include "Epetra_Comm.h"
36 #endif
37 #include "Epetra_Map.h"
38 #include "Epetra_LocalMap.h"
39 #include "Epetra_Vector.h"
40 #include "Epetra_RowMatrix.h"
41 #include "Epetra_Operator.h"
42 #include "Epetra_Import.h"
43 #include "Epetra_Export.h"
44 #include "Epetra_CrsMatrix.h"
45 #include "CrsMatrixTranspose.h"
46 #include <vector>
47 
48 #ifdef DEBUG
49 #include "Comm_assert_equal.h"
50  // #include "CrsMatricesAreIdentical.h"
51 #endif
52 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
53 #include "ExtractCrsFromRowMatrix.h"
54 #endif
55 /*
56  Returns the largest number of rows that allows NumProcs to be used within a
57  rectangular grid with no more rows than columns.
58 
59  i.e. max i such that there exists j > i such that i*j = NumProcs
60 */
61 int SLU_NumProcRows( int NumProcs ) {
62 #ifdef TFLOP
63  // Else, parameter 6 of DTRSV CTNLU is incorrect
64  return 1;
65 #else
66  int i;
67  int NumProcRows ;
68  for ( i = 1; i*i <= NumProcs; i++ )
69  ;
70  bool done = false ;
71  for ( NumProcRows = i-1 ; done == false ; ) {
72  int NumCols = NumProcs / NumProcRows ;
73  if ( NumProcRows * NumCols == NumProcs )
74  done = true;
75  else
76  NumProcRows-- ;
77  }
78  return NumProcRows;
79 #endif
80 }
81 
82 //=============================================================================
84  // AllocAzArrays();
85 
86  Problem_ = &prob ;
87  Transpose_ = false;
88  A_and_LU_built = false ;
89  Factored_ = false ;
90  FirstCallToSolve_ = true ;
91  //
92  // The following are initialized just on general principle
93  //
94  numprocs = -13 ;
95  nprow = -13 ;
96  npcol = -13 ;
97  numrows = -13 ;
98 
99 }
100 
101 //=============================================================================
103  // DeleteMemory();
104  // DeleteAzArrays();
105 
106  if ( A_and_LU_built ) {
107  // Destroy_CompRowLoc_Matrix_dist(&A);
108  SUPERLU_FREE( A.Store );
109  ScalePermstructFree(&ScalePermstruct);
110  Destroy_LU(numrows, &grid, &LUstruct);
111  LUstructFree(&LUstruct);
112  if ( options.SolveInitialized ) {
113  dSolveFinalize(&options, &SOLVEstruct ) ;
114  }
115  superlu_gridexit(&grid);
116  }
117 
118 }
119 
120 //=============================================================================
121 
122 //
123 // Solve() uses several intermediate matrices to convert the input matrix
124 // to one that we can pass to the Sparse Direct Solver
125 //
126 // Epetra_RowMatrix *RowMatrixA - The input matrix
127 // Epetra_CrsMatrix *CastCrsMatrixA - The input matrix casted to a crs matrix
128 // Epetra_CrsMatrix ExtractCrsMatrixA - Converted to a Crs matrix
129 // (Unused if RowMatrix is an Epetra_CrsMatrix)
130 // Epetra_CrsMatrix *Phase2Mat - Guaranteed to be a CrsMatrix
131 // Epetra_CrsMatrix SerialCrsMatrixA - Phase2Mat coalesced to one process
132 // Epetra_CrsMatrix *Phase3Mat - A pseudo-distributed CrsMatrix
133 // (i.e. stored exclusively on one process)
134 // Epetra_CrsMatrix Phase3MatTrans - The transpose of Phase3Mat
135 // Epetra_CrsMatrix *Phase4Mat - A pseudo-serial CrsMatrix with the
136 // proper transposition
137 // Epetra_CrsMatrix Phase5Mat - A replicated CrsMatrix with the
138 // proper transposition
139 //
140 // This is what the code does:
141 // Step 1) Convert the matrix to an Epetra_CrsMatrix
142 // Step 2) Coalesce the matrix onto process 0
143 // Step 3) Transpose the matrix
144 // Step 4) Replicate the matrix
145 // Step 5) Convert vector b to a replicated vector
146 // Step 6) Convert the matrix to Ap, Ai, Aval
147 // Step 7) Call SuperLUdist
148 // Step 8) Convert vector x back to a distributed vector
149 //
150 // Remaining tasks:
151 // 1) I still need to make it possible for Superludist2_OO to accept a
152 // replicated matrix without using any additional memory.
153 // I believe that all that I need is to move the definition
154 // of ExtractCrsMatrixA, SerialCrsMatrixA and Phase3MatTrans up
155 // to the top of the code and add a conditional which tests to
156 // see if RowMatrixA is actually in the exact format that we need,
157 // and if so, skip all the matrix transformations. Also, Phase5Mat
158 // needs to be changed to Phase4Replicated with an added pointer, named
159 // *Phase5Mat.
160 // 2) Can we handle a transposed matrix any cleaner? We build Ap,
161 // Ai and Avalues - can we build that as a transpose more efficiently
162 // than doing a full CrsMatrix to CrsMatrix transpose?
163 //
164 // Memory usage:
165 // ExtractCrsMatrixA - 1 if RowMAtrixA is not a CrsMatrix
166 // SerialCrsMatrixA - 1 if RowMatrixA is not a serial matrix
167 // Phase3MatTrans - 1 if RowMatrixA unless a transpose solve is requested
168 // Phase5Mat - 1
169 // If we need SerialCrsMAttrixA, then ExtractCrsMatrixA will not be a
170 // serial matrix, hence three extra serial copies is the maximum.
171 //
172 
173 #undef EPETRA_CHK_ERR
174 #define EPETRA_CHK_ERR(xxx) assert( (xxx) == 0 )
175 
176 int Superludist2_OO::Solve(bool factor) {
177  //
178  // I am going to put these here until I determine that I need them in
179  // Superludist2_OO.h
180  //
181 
182 
183  bool CheckExtraction = false; // Set to true to force extraction for unit test
184 
185  assert( GetTrans() == false ) ;
186 
187  Epetra_RowMatrix *RowMatrixA =
188  dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
189 
190  EPETRA_CHK_ERR( RowMatrixA == 0 ) ;
191 
192  Epetra_CrsMatrix *CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA) ;
193 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
194  Epetra_CrsMatrix *ExtractCrsMatrixA = 0;
195 #endif
196  Epetra_CrsMatrix *Phase2Mat = 0 ;
197  const Epetra_Comm &Comm = RowMatrixA->Comm();
198 
199  int iam = Comm.MyPID() ;
200 #if 0
201  //
202  // The following lines allow us time to attach the debugger
203  //
204  int hatever;
205  if ( iam == 0 ) cin >> hatever ;
206  Comm.Barrier();
207 #endif
208 
209 
210  //
211  // Step 1) Convert the matrix to an Epetra_CrsMatrix
212  //
213  // If RowMatrixA is not a CrsMatrix, i.e. the dynamic cast fails,
214  // extract a CrsMatrix from the RowMatrix.
215  //
216  if ( CastCrsMatrixA != 0 && ! CheckExtraction ) {
217  Phase2Mat = CastCrsMatrixA ;
218  } else {
219 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
220  ExtractCrsMatrixA = new Epetra_CrsMatrix( *RowMatrixA ) ;
221 
222  Phase2Mat = ExtractCrsMatrixA ;
223 #ifdef DEBUG
224  if ( CheckExtraction )
225  assert( CrsMatricesAreIdentical( CastCrsMatrixA, ExtractCrsMatrixA ) ) ;
226 #endif
227 #else
228  assert( false ) ;
229 #endif
230  }
231  assert( Phase2Mat != NULL ) ;
232  const Epetra_Map &Phase2Matmap = Phase2Mat->RowMap() ;
233 
234  //
235  // Old Glossary:
236  // numrows, numcols = m,n, i.e. the size of the full, global, matrix
237  // m_loc = the number of rows owned by this process
238  // nnz_loc = the number of non zeros in the rows owned by this process
239  // Ap, Aval, Ai, = rowptr, nzval, colind, = sparse row storage
240  // MyRowPtr = a redundant computation of Ap (rowptr)
241 
242  //
243  // Compute nnz_loc, m_loc, and MyRowPtr from the map
244  //
245 
246 #if 0
247  cout << " A Comm.NumProc() = " << Comm.NumProc() << endl ;
248 
249  cout << " true = " << true << " LInMap = " << Phase2Matmap.LinearMap() << endl ; // Map must be contiguously divided
250  cout << " Superludist2_OO.cpp:: traceback mode = " << Epetra_Object::GetTracebackMode() << endl ;
251  cerr << " Send this to cerr cerr cerr traceback mode = " << Epetra_Object::GetTracebackMode() << endl ;
252 #endif
253 
254  numrows = Phase2Matmap.NumGlobalElements() ;
255  assert( numrows == Phase2Mat->NumGlobalRows() ) ;
256  int numcols = Phase2Mat->NumGlobalCols() ;
257  assert( numrows == numcols ) ;
258 
259  //
260  // Here I compute what a uniform distribution should look like
261  // so that I can compare what I think MyFirstElement should be
262  // against what it really is.
263  //
264  int m_per_p = numrows / Comm.NumProc() ;
265  int remainder = numrows - ( m_per_p * Comm.NumProc() );
266  int MyFirstElement = iam * m_per_p + EPETRA_MIN( iam, remainder );
267  int MyFirstNonElement = (iam+1) * m_per_p + EPETRA_MIN( iam+1, remainder );
268  int NumExpectedElements = MyFirstNonElement - MyFirstElement ;
269 
270 
271  int IsLocal = ( Phase2Matmap.NumMyElements() ==
272  Phase2Matmap.NumGlobalElements() )?1:0;
273  Comm.Broadcast( &IsLocal, 1, 0 ) ;
274  //
275  // Step ?) Convert to a distributed matrix, if appropriate
276  //
277  Epetra_CrsMatrix *Phase3Mat = 0 ;
278  Epetra_Map DistMap( Phase2Matmap.NumGlobalElements(), NumExpectedElements, 0, Comm );
279  Epetra_CrsMatrix DistCrsMatrixA(Copy, DistMap, 0);
280 
281  bool redistribute = true ;
282  if ( redistribute ) {
283 
284  Epetra_Export export_to_dist( Phase2Matmap, DistMap);
285 
286  DistCrsMatrixA.Export( *Phase2Mat, export_to_dist, Add );
287 
288  DistCrsMatrixA.FillComplete() ;
289  Phase3Mat = &DistCrsMatrixA ;
290 
291 
292  } else {
293  {
294  EPETRA_CHK_ERR( ! ( Phase2Matmap.LinearMap()) ) ; // Map must be contiguously divided
295  //
296  // This is another way to check that the distribution is as pdgssvx expects it
297  // (i.e. a linear map)
298  //
299  int Phase2NumElements = Phase2Matmap.NumMyElements() ;
300  vector <int> MyRows( Phase2NumElements ) ;
301  Phase2Matmap.MyGlobalElements( &MyRows[0] ) ;
302  for (int row = 0 ; row < Phase2NumElements ; row++ ) {
303  EPETRA_CHK_ERR( MyFirstElement+row != MyRows[row] ) ;
304  }
305  }
306  Phase3Mat = Phase2Mat ;
307  }
308 
309 #if 0
310  assert( MyFirstElement == Phase3Mat->RowMap().MinMyGID() ) ;
311  assert( NumExpectedElements == Phase3Mat->RowMap().NumMyElements() ) ;
312  assert( MyFirstElement+NumExpectedElements-1 == Phase3Mat->RowMap().MaxMyGID() ) ;
313 #endif
314  // Comm.Barrier();
315  int MyActualFirstElement = Phase3Mat->RowMap().MinMyGID() ;
316  int NumMyElements = Phase3Mat->NumMyRows() ;
317  vector <int> MyRowPtr( NumMyElements+1 ) ;
318  //
319  // This is actually redundant with the Ap below
320  //
321  MyRowPtr[0] = 0 ;
322  int CurrentRowPtr = 0 ;
323  for ( int i = 0; i < NumMyElements ; i++ ) {
324  CurrentRowPtr += Phase3Mat->NumMyEntries( i ) ;
325  MyRowPtr[i+1] = CurrentRowPtr ;
326  }
327 
328  //
329  // Extract nzval(Aval) and colind(Ai) from the CrsMatrix (Phase3Mat)
330  //
331 
332  int nnz_loc = Phase3Mat->NumMyNonzeros() ;
333  if ( factor ) {
334  //
335  // Step 6) Convert the matrix to Ap, Ai, Aval
336  //
337  Ap.resize( NumMyElements+1 );
338  Ai.resize( EPETRA_MAX( NumMyElements, nnz_loc) ) ;
339  Aval.resize( EPETRA_MAX( NumMyElements, nnz_loc) ) ;
340 
341  int NzThisRow ;
342  double *RowValues;
343  int *ColIndices;
344  int Ai_index = 0 ;
345  int MyRow;
346  int num_my_cols = Phase3Mat->NumMyCols() ;
347  vector <int>Global_Columns( num_my_cols ) ;
348  for ( int i = 0 ; i < num_my_cols ; i ++ ) {
349  Global_Columns[i] = Phase3Mat->GCID( i ) ;
350  }
351 
352  for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) {
353  int status = Phase3Mat->ExtractMyRowView( MyRow, NzThisRow, RowValues, ColIndices ) ;
354  assert( status == 0 ) ;
355  Ap[MyRow] = Ai_index ;
356  assert( Ap[MyRow] == MyRowPtr[MyRow] ) ;
357  for ( int j = 0; j < NzThisRow; j++ ) {
358  Ai[Ai_index] = Global_Columns[ColIndices[j]] ;
359  Aval[Ai_index] = RowValues[j] ;
360  Ai_index++;
361  }
362  }
363  assert( NumMyElements == MyRow );
364  Ap[ NumMyElements ] = Ai_index ;
365  }
366 
367  //
368  // Pull B out of the Epetra_vector
369  //
370 
371  Epetra_MultiVector *vecX = Problem_->GetLHS() ;
372  Epetra_MultiVector *vecB = Problem_->GetRHS() ;
373 
374  int nrhs;
375  if ( vecX == 0 ) {
376  nrhs = 0 ;
377  EPETRA_CHK_ERR( vecB != 0 ) ;
378  } else {
379  nrhs = vecX->NumVectors() ;
380  EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ;
381  }
382 
383  Epetra_MultiVector vecXdistributed( DistMap, nrhs ) ;
384  Epetra_MultiVector vecBdistributed( DistMap, nrhs ) ;
385 
386 
387  double *bValues ;
388  double *xValues ;
389  int ldb, ldx ;
390 
391  Epetra_MultiVector* vecXptr;
392  Epetra_MultiVector* vecBptr;
393 
394  if ( redistribute ) {
395  Epetra_Import ImportToDistributed( DistMap, Phase2Matmap);
396 
397  vecXdistributed.Import( *vecX, ImportToDistributed, Insert ) ;
398  vecBdistributed.Import( *vecB, ImportToDistributed, Insert ) ;
399 
400  vecXptr = &vecXdistributed ;
401  vecBptr = &vecBdistributed ;
402  } else {
403  vecXptr = vecX ;
404  vecBptr = vecB ;
405  }
406 
407 
408  EPETRA_CHK_ERR( vecBptr->ExtractView( &bValues, &ldb ) ) ;
409  EPETRA_CHK_ERR( vecXptr->ExtractView( &xValues, &ldx ) ) ;
410  EPETRA_CHK_ERR( ! ( ldx == ldb ) ) ;
411  EPETRA_CHK_ERR( ! ( ldx == NumMyElements ) ) ;
412 
413  //
414  // Step 7) Call SuperLUdist
415  //
416 
417  //
418  // This really belongs somewhere else - perhaps in the constructor
419  //
420  const Epetra_MpiComm & comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm);
421  MPI_Comm MPIC = comm1.Comm() ;
422 
423  if ( factor ) {
424  numprocs = Comm.NumProc() ;
426  npcol = numprocs / nprow ;
427  assert ( nprow * npcol == numprocs ) ;
428  superlu_gridinit( MPIC, nprow, npcol, &grid);
429 
430 
431  } else {
432  assert( numprocs == Comm.NumProc() ) ;
433  }
434 
435  /* Bail out if I do not belong in the grid. */
436  if ( iam < nprow * npcol ) {
437 
438  if ( factor ) {
439  set_default_options(&options);
440 
441  dCreate_CompRowLoc_Matrix_dist( &A, numrows, numcols,
442  nnz_loc, NumMyElements, MyActualFirstElement,
443  &Aval[0], &Ai[0], &Ap[0],
444  SLU_NR_loc, SLU_D, SLU_GE );
445 
446  A_and_LU_built = true;
447 
448  /* Initialize ScalePermstruct and LUstruct. */
449  ScalePermstructInit(numrows, numcols, &ScalePermstruct);
450  LUstructInit(numrows, numcols, &LUstruct);
451 
452  assert( options.Fact == DOFACT );
453  options.Fact = DOFACT ;
454 
455  Factored_ = true;
456  } else {
457  assert( Factored_ == true ) ;
458  EPETRA_CHK_ERR( Factored_ == false ) ;
459  options.Fact = FACTORED ;
460  }
461  //
462  // pdgssvx returns x in b, so we copy b into x.
463  //
464  for ( int j = 0 ; j < nrhs; j++ )
465  for ( int i = 0 ; i < NumMyElements; i++ ) xValues[i+j*ldx] = bValues[i+j*ldb];
466 
467  PStatInit(&stat); /* Initialize the statistics variables. */
468 
469  int info ;
470  vector<double>berr(nrhs);
471  pdgssvx(&options, &A, &ScalePermstruct, &xValues[0], ldx, nrhs, &grid,
472  &LUstruct, &SOLVEstruct, &berr[0], &stat, &info);
473  EPETRA_CHK_ERR( info ) ;
474 
475  PStatFree(&stat);
476 
477  }
478 
479  if ( redistribute ) {
480  Epetra_Import ImportBackToOriginal( Phase2Matmap,DistMap);
481 
482  vecX->Import( *vecXptr, ImportBackToOriginal, Insert ) ;
483  }
484 
485 
486  return(0) ;
487 }
int NumGlobalElements() const
SOLVEstruct_t SOLVEstruct
vector< double > Aval
bool GetTrans() const
Return the transpose flag.
virtual ~Superludist2_OO(void)
Superludist2_OO Destructor.
Epetra_MultiVector * GetLHS() const
Epetra_MultiVector * GetRHS() const
int NumMyEntries(int Row) const
int MyGlobalElements(int *MyGlobalElementList) const
int NumGlobalRows() const
#define EPETRA_MIN(x, y)
virtual void Barrier() const =0
virtual int MyPID() const =0
int ExtractMyRowView(int MyRow, int &NumEntries, double *&Values, int *&Indices) const
vector< int > Ai
const Epetra_Map & RowMap() const
int NumMyElements() const
int NumMyCols() const
virtual const Epetra_Comm & Comm() const =0
int NumMyRows() const
MPI_Comm Comm() const
#define EPETRA_CHK_ERR(xxx)
superlu_options_t options
int NumGlobalCols() const
int MaxMyGID() const
int MinMyGID() const
virtual int Broadcast(double *MyVals, int Count, int Root) const =0
bool LinearMap() const
ScalePermstruct_t ScalePermstruct
vector< int > Ap
virtual int NumProc() const =0
Superludist2_OO(const Epetra_LinearProblem &LinearProblem)
Superludist2_OO Constructor.
const Epetra_LinearProblem * Problem_
LUstruct_t LUstruct
int Solve(bool Factor)
All computation is performed during the call to Solve()
int SLU_NumProcRows(int NumProcs)
Epetra_Operator * GetOperator() const
SuperLUStat_t stat
#define EPETRA_MAX(x, y)
int GCID(int LCID_in) const
int NumMyNonzeros() const