Amesos Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SuperludistOO.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Amesos: Direct Sparse Solver Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
25 //
26 // ***********************************************************************
27 // @HEADER
28 
29 #include "SuperludistOO.h"
30 #ifdef EPETRA_MPI
31 #include "Epetra_MpiComm.h"
32 #else
33  This code cannot be compiled without mpi.h.
34 #include "Epetra_Comm.h"
35 #endif
36 #include "Epetra_Map.h"
37 #include "Epetra_LocalMap.h"
38 #include "Epetra_Vector.h"
39 #include "Epetra_RowMatrix.h"
40 #include "Epetra_Operator.h"
41 #include "Epetra_Import.h"
42 #include "Epetra_Export.h"
43 #include "Epetra_CrsMatrix.h"
44 #include "supermatrix.h"
45 #include "superlu_ddefs.h"
46 #include "CrsMatrixTranspose.h"
47 #include <vector>
48 
49 #ifdef DEBUG
50 #include "Comm_assert_equal.h"
51  // #include "CrsMatricesAreIdentical.h"
52 #endif
53 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
54 #include "ExtractCrsFromRowMatrix.h"
55 #endif
56 /*
57  Returns the largest number of rows that allows NumProcs to be used within a
58  rectangular grid with no more rows than columns.
59 
60  i.e. max i such that there exists j > i such that i*j = NumProcs
61 */
62 int SLU_NumRows( int NumProcs ) {
63 #ifdef TFLOP
64  // Else, parameter 6 of DTRSV CTNLU is incorrect
65  return 1;
66 #else
67  int i;
68  int numrows ;
69  for ( i = 1; i*i <= NumProcs; i++ )
70  ;
71  bool done = false ;
72  for ( numrows = i-1 ; done == false ; ) {
73  int NumCols = NumProcs / numrows ;
74  if ( numrows * NumCols == NumProcs )
75  done = true;
76  else
77  numrows-- ;
78  }
79  return numrows;
80 #endif
81 }
82 
83 //=============================================================================
85  // AllocAzArrays();
86 
87  Problem_ = &prob ;
88  Transpose_ = false;
89  A_and_LU_built = false ;
90  Factored_ = false ;
91  FirstCallToSolve_ = true ;
92  //
93  // The following are initialized just on general principle
94  //
95  numprocs = -13 ;
96  nprow = -13 ;
97  npcol = -13 ;
98  numrows = -13 ;
99 
100 }
101 
102 //=============================================================================
104  // DeleteMemory();
105  // DeleteAzArrays();
106 
107  if ( A_and_LU_built ) {
108  // Destroy_CompCol_Matrix_dist(&A);
109  SUPERLU_FREE(A.Store);
110  Destroy_LU(numrows, &grid, &LUstruct);
111  ScalePermstructFree(&ScalePermstruct);
112  LUstructFree(&LUstruct);
113  SUPERLU_FREE(berr);
114  }
115 
116 }
117 
118 //=============================================================================
119 
120 //
121 // Solve() uses several intermediate matrices to convert the input matrix
122 // to one that we can pass to the Sparse Direct Solver
123 //
124 // Epetra_RowMatrix *RowMatrixA - The input matrix
125 // Epetra_CrsMatrix *CastCrsMatrixA - The input matrix casted to a crs matrix
126 // Epetra_CrsMatrix ExtractCrsMatrixA - Converted to a Crs matrix
127 // (Unused if RowMatrix is an Epetra_CrsMatrix)
128 // Epetra_CrsMatrix *Phase2Mat - Guaranteed to be a CrsMatrix
129 // Epetra_CrsMatrix SerialCrsMatrixA - Phase2Mat coalesced to one process
130 // Epetra_CrsMatrix *Phase3Mat - A pseudo-distributed CrsMatrix
131 // (i.e. stored exclusively on one process)
132 // Epetra_CrsMatrix Phase3MatTrans - The transpose of Phase3Mat
133 // Epetra_CrsMatrix *Phase4Mat - A pseudo-serial CrsMatrix with the
134 // proper transposition
135 // Epetra_CrsMatrix Phase5Mat - A replicated CrsMatrix with the
136 // proper transposition
137 //
138 // This is what the code does:
139 // Step 1) Convert the matrix to an Epetra_CrsMatrix
140 // Step 2) Coalesce the matrix onto process 0
141 // Step 3) Transpose the matrix
142 // Step 4) Replicate the matrix
143 // Step 5) Convert vector b to a replicated vector
144 // Step 6) Convert the matrix to Ap, Ai, Aval
145 // Step 7) Call SuperLUdist
146 // Step 8) Convert vector x back to a distributed vector
147 //
148 // Remaining tasks:
149 // 1) I still need to make it possible for SuperludistOO to accept a
150 // replicated matrix without using any additional memory.
151 // I believe that all that I need is to move the definition
152 // of ExtractCrsMatrixA, SerialCrsMatrixA and Phase3MatTrans up
153 // to the top of the code and add a conditional which tests to
154 // see if RowMatrixA is actually in the exact format that we need,
155 // and if so, skip all the matrix transformations. Also, Phase5Mat
156 // needs to be changed to Phase4Replicated with an added pointer, named
157 // *Phase5Mat.
158 // 2) Can we handle a transposed matrix any cleaner? We build Ap,
159 // Ai and Avalues - can we build that as a transpose more efficiently
160 // than doing a full CrsMatrix to CrsMatrix transpose?
161 //
162 // Memory usage:
163 // ExtractCrsMatrixA - 1 if RowMAtrixA is not a CrsMatrix
164 // SerialCrsMatrixA - 1 if RowMatrixA is not a serial matrix
165 // Phase3MatTrans - 1 if RowMatrixA unless a transpose solve is requested
166 // Phase5Mat - 1
167 // If we need SerialCrsMAttrixA, then ExtractCrsMatrixA will not be a
168 // serial matrix, hence three extra serial copies is the maximum.
169 //
170 int SuperludistOO::Solve(bool factor) {
171  //
172  // I am going to put these here until I determine that I need them in
173  // SuperludistOO.h
174  //
175 
176  bool CheckExtraction = false; // Set to true to force extraction for unit test
177 
178  Epetra_RowMatrix *RowMatrixA =
179  dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
180 
181  EPETRA_CHK_ERR( RowMatrixA == 0 ) ;
182 
183  Epetra_CrsMatrix *CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA) ;
184 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
185  Epetra_CrsMatrix *ExtractCrsMatrixA = 0;
186 #endif
187  Epetra_CrsMatrix *Phase2Mat = 0 ;
188  const Epetra_Comm &Comm = RowMatrixA->Comm();
189 
190  int iam = Comm.MyPID() ;
191 #if 0
192  //
193  // The following lines allow us time to attach the debugger
194  //
195  int hatever;
196  if ( iam == 0 ) cin >> hatever ;
197  Comm.Barrier();
198 #endif
199 
200  //
201  // Step 1) Convert the matrix to an Epetra_CrsMatrix
202  //
203  // If RowMatrixA is not a CrsMatrix, i.e. the dynamic cast fails,
204  // extract a CrsMatrix from the RowMatrix.
205  //
206  if ( CastCrsMatrixA != 0 && ! CheckExtraction ) {
207  Phase2Mat = CastCrsMatrixA ;
208  } else {
209 #ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
210  ExtractCrsMatrixA = new Epetra_CrsMatrix( *RowMatrixA ) ;
211 
212  Phase2Mat = ExtractCrsMatrixA ;
213 #ifdef DEBUG
214  if ( CheckExtraction )
215  assert( CrsMatricesAreIdentical( CastCrsMatrixA, ExtractCrsMatrixA ) ) ;
216 #endif
217 #else
218  assert( false ) ;
219 #endif
220  }
221 
222  assert( Phase2Mat != NULL ) ;
223  const Epetra_Map &Phase2Matmap = Phase2Mat->RowMap() ;
224 
225  //
226  // Step 2) Coalesce the matrix onto process 0
227  //
228  const Epetra_MpiComm & comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm);
229  MPI_Comm MPIC = comm1.Comm() ;
230 
231  int IsLocal = ( Phase2Matmap.NumMyElements() ==
232  Phase2Matmap.NumGlobalElements() )?1:0;
233  Comm.Broadcast( &IsLocal, 1, 0 ) ;
234 #ifdef DEBUG
235  assert( Comm_assert_equal( &Comm, IsLocal ) );
236 #endif
237 
238  Epetra_CrsMatrix *Phase3Mat = 0 ;
239 
240  int NumGlobalElements_ = Phase2Matmap.NumGlobalElements() ;
241  // Create a serial map in case we end up needing it
242  // If it is created inside the else block below it would have to
243  // be with a call to new().
244  int NumMyElements_ = 0 ;
245  if (iam==0) NumMyElements_ = NumGlobalElements_;
246  Epetra_Map SerialMap( NumGlobalElements_, NumMyElements_, 0, Comm );
247  Epetra_CrsMatrix SerialCrsMatrixA(Copy, SerialMap, 0);
248 
249 
250  if ( IsLocal==1 ) {
251  Phase3Mat = Phase2Mat ;
252  } else {
253 
254  Epetra_Export export_to_serial( Phase2Matmap, SerialMap);
255 
256  SerialCrsMatrixA.Export( *Phase2Mat, export_to_serial, Add );
257 
258  SerialCrsMatrixA.FillComplete() ;
259  Phase3Mat = &SerialCrsMatrixA ;
260 
261  }
262  Comm.Barrier() ;
263 
264 
265 
266  //
267  // Step 3) Transpose the matrix
268  //
269  const Epetra_Map &Phase3Matmap = Phase3Mat->RowMap() ;
270 
271  numrows = Phase3Mat->NumGlobalRows();
272  int numcols = Phase3Mat->NumGlobalCols();
273  int numentries = Phase3Mat->NumGlobalNonzeros();
274 
275  Epetra_CrsMatrix Phase3MatTrans(Copy, Phase3Matmap, 0);
276  Epetra_CrsMatrix *Phase4Mat;
277 
278  if ( GetTrans() ) {
279  Phase4Mat = Phase3Mat ;
280  } else {
281  assert( CrsMatrixTranspose( Phase3Mat, &Phase3MatTrans ) == 0 ) ;
282  Phase4Mat = &Phase3MatTrans ;
283  }
284 
285 
286  //
287  // Step 4) Replicate the matrix
288  //
289  int * AllIDs = new int[numrows];
290  for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ;
291 
292  // Create a replicated map and matrix
293  Epetra_Map ReplicatedMap( -1, numrows, AllIDs, 0, Comm);
294  // Epetra_LocalMap ReplicatedMap( numrows, 0, Comm); // Compiles,runs, fails to replicate
295 
296  delete [] AllIDs;
297 
298  Epetra_Import importer( ReplicatedMap, Phase3Matmap );
299 
300  Epetra_CrsMatrix Phase5Mat(Copy, ReplicatedMap, 0);
301  EPETRA_CHK_ERR( Phase5Mat.Import( *Phase4Mat, importer, Insert) );
302  EPETRA_CHK_ERR( Phase5Mat.FillComplete() ) ;
303 
304  assert( Phase5Mat.NumMyRows() == Phase4Mat->NumGlobalRows() ) ;
305 
306  //
307  // Step 5) Convert vector b to a replicated vector
308  //
309  Epetra_MultiVector *vecX = Problem_->GetLHS() ;
310  Epetra_MultiVector *vecB = Problem_->GetRHS() ;
311 
312  int nrhs;
313  if ( vecX == 0 ) {
314  nrhs = 0 ;
315  EPETRA_CHK_ERR( vecB != 0 ) ;
316  } else {
317  nrhs = vecX->NumVectors() ;
318  EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ;
319  }
320 
321  int nArows = Phase3Mat->NumGlobalRows() ;
322  int nAcols = Phase3Mat->NumGlobalCols() ;
323 
324 #ifdef ONE_VECTOR_ONLY
325  assert( vecX->NumVectors() == 1 ) ;
326  assert( vecB->NumVectors() == 1 ) ;
327 
328  Epetra_Vector *vecXvector = dynamic_cast<Epetra_Vector*>(vecX) ;
329  Epetra_Vector *vecBvector = dynamic_cast<Epetra_Vector*>(vecB) ;
330 
331  assert( vecXvector != 0 ) ;
332  assert( vecBvector != 0 ) ;
333 
334  Epetra_Vector vecXreplicated( ReplicatedMap ) ;
335  Epetra_Vector vecBreplicated( ReplicatedMap ) ;
336 #else
337  Epetra_MultiVector *vecXvector = (vecX) ;
338  Epetra_MultiVector *vecBvector = (vecB) ;
339 
340  Epetra_MultiVector vecXreplicated( ReplicatedMap, nrhs ) ;
341  Epetra_MultiVector vecBreplicated( ReplicatedMap, nrhs ) ;
342 #endif
343 
344  Epetra_Import ImportToReplicated( ReplicatedMap, Phase2Matmap);
345 
346  vecXreplicated.Import( *vecXvector, ImportToReplicated, Insert ) ;
347  vecBreplicated.Import( *vecBvector, ImportToReplicated, Insert ) ;
348 
349  assert( nArows == vecXreplicated.MyLength() ) ;
350  assert( nAcols == vecBreplicated.MyLength() ) ;
351 
352  double *bValues ;
353  double *xValues ;
354  int bLda, xLda ;
355 
356  assert( vecBreplicated.ExtractView( &bValues, &bLda ) == 0 ) ;
357  assert( vecXreplicated.ExtractView( &xValues, &xLda ) == 0 ) ;
358 
359  if ( factor ) {
360  //
361  // Step 6) Convert the matrix to Ap, Ai, Aval
362  //
363  Ap.resize( numrows+1 );
364  Ai.resize( EPETRA_MAX( numrows, numentries) ) ;
365  Aval.resize( EPETRA_MAX( numrows, numentries) ) ;
366 
367  int NumEntries ;
368  double *RowValues;
369  int *ColIndices;
370  int Ai_index = 0 ;
371  int MyRow;
372  for ( MyRow = 0; MyRow <numrows; MyRow++ ) {
373  int status = Phase5Mat.ExtractMyRowView( MyRow, NumEntries, RowValues, ColIndices ) ;
374  assert( status == 0 ) ;
375  Ap[MyRow] = Ai_index ;
376  for ( int j = 0; j < NumEntries; j++ ) {
377  Ai[Ai_index] = ColIndices[j] ;
378  Aval[Ai_index] = RowValues[j] ;
379  Ai_index++;
380  }
381  }
382  assert( numrows == MyRow );
383  Ap[ numrows ] = Ai_index ;
384  }
385 
386  //
387  // Step 7) Call SuperLUdist
388  //
389 
390  //
391  // This really belongs somewhere else - perhaps in the constructor
392  //
393  if ( factor ) {
394  numprocs = Comm.NumProc() ;
395  nprow = SLU_NumRows( numprocs ) ;
396  npcol = numprocs / nprow ;
397  assert ( nprow * npcol == numprocs ) ;
398  superlu_gridinit( MPIC, nprow, npcol, &grid);
399 
400 #ifdef DEBUG
401  assert( Comm_assert_equal( &Comm, numentries ) );
402  assert( Comm_assert_equal( &Comm, numrows ) );
403  assert( Comm_assert_equal( &Comm, numcols ) );
404 #endif
405 
406  } else {
407  assert( numprocs == Comm.NumProc() ) ;
408  }
409  int ldb = numrows ;
410  /* Bail out if I do not belong in the grid. */
411  if ( iam < nprow * npcol ) {
412  //
413  // All processes need to have identical values of:
414  // numrows(m), numcols(n), nnz(NumEntries),
415  // Aval(a), Ap(xa), Ai(asub)
416  // double(numentries), int(n+1), int(numentries)
417 
418 #ifdef DEBUG
419  for ( int ii = 0; ii < min( numentries, 10 ) ; ii++ ) {
420  assert( Comm_assert_equal( &Comm, Aval[ii] ) ) ;
421  }
422  for ( int ii = 0; ii < min( numcols+1, 10 ) ; ii++ ) {
423  assert( Comm_assert_equal( &Comm, Ai[ii] ) );
424  assert( Comm_assert_equal( &Comm, Ap[ii] ) );
425  }
426  for ( int ii = 0; ii < min( numrows, 10 ) ; ii++ ) {
427  assert( Comm_assert_equal( &Comm, bValues[ii] ) );
428  }
429 #endif
430 
431  if ( factor ) {
432  set_default_options(&options);
433 
434  if ( !(berr = doubleMalloc_dist(nrhs)) )
435  EPETRA_CHK_ERR( -1 ) ;
436 
437  /* Create compressed column matrix for A. */
438  dCreate_CompCol_Matrix_dist(&A, numrows, numcols,
439  numentries, &Aval[0], &Ai[0],
440  &Ap[0], SLU_NC, SLU_D, SLU_GE);
441  A_and_LU_built = true;
442 
443 #if 0
444  cout << " Here is A " << endl ;
445  dPrint_CompCol_Matrix_dist( &A );
446  cout << " That was A " << "numrows = " << numrows << endl ;
447  cout << "numcols = " << numcols << endl ;
448 #endif
449 
450  /* Initialize ScalePermstruct and LUstruct. */
451  ScalePermstructInit(numrows, numcols, &ScalePermstruct);
452  LUstructInit(numrows, numcols, &LUstruct);
453 
454  assert( options.Fact == DOFACT );
455  options.Fact = DOFACT ;
456 
457  Factored_ = true;
458  } else {
459  assert( Factored_ == true ) ;
460  EPETRA_CHK_ERR( Factored_ == false ) ;
461  options.Fact = FACTORED ;
462  }
463  //
464  // pdgssvx_ABglobal returns x in b, so we copy b into x and pass x as b.
465  //
466  for ( int j = 0 ; j < nrhs; j++ )
467  for ( int i = 0 ; i < numrows; i++ ) xValues[i+j*xLda] = bValues[i+j*xLda];
468 
469  /* Initialize the statistics variables. */
470  PStatInit(&stat);
471 
472  /* Call the linear equation solver. */
473  int info ;
474  pdgssvx_ABglobal(&options, &A, &ScalePermstruct, &xValues[0],
475  ldb, nrhs, &grid, &LUstruct, berr, &stat, &info);
476  EPETRA_CHK_ERR( info ) ;
477 
478  PStatFree(&stat);
479 
480  }
481 
482 
483  //
484  // Step 8) Convert vector x back to a distributed vector
485  //
486  // This is an ugly hack - it should be cleaned up someday
487  //
488  for (int i = 0 ; i < numrows; i++ ) {
489  int lid[1] ;
490  lid[0] = Phase2Matmap.LID( i ) ;
491  if ( lid[0] >= 0 ) {
492  for (int j = 0 ; j < nrhs; j++ ) {
493  vecXvector->ReplaceMyValue( lid[0], j, xValues[i + ldb * j] ) ;
494  }
495  }
496  }
497 
498  return(0) ;
499 }
const Epetra_LinearProblem * Problem_
int NumGlobalElements() const
int SLU_NumRows(int NumProcs)
Epetra_MultiVector * GetLHS() const
Epetra_MultiVector * GetRHS() const
superlu_options_t options
int NumGlobalRows() const
int CrsMatrixTranspose(Epetra_CrsMatrix *In, Epetra_CrsMatrix *Out)
bool GetTrans() const
Return the transpose flag.
vector< int > Ap
ScalePermstruct_t ScalePermstruct
vector< double > Aval
virtual void Barrier() const =0
vector< int > Ai
gridinfo_t grid
virtual int MyPID() const =0
int FillComplete(bool OptimizeDataStorage=true)
SuperludistOO(const Epetra_LinearProblem &LinearProblem)
SuperludistOO Constructor.
int ExtractMyRowView(int MyRow, int &NumEntries, double *&Values, int *&Indices) const
virtual ~SuperludistOO(void)
SuperludistOO Destructor.
const Epetra_Map & RowMap() const
int NumMyElements() const
virtual const Epetra_Comm & Comm() const =0
SuperLUStat_t stat
int NumMyRows() const
int Solve(bool Factor)
All computation is performed during the call to Solve()
MPI_Comm Comm() const
#define EPETRA_CHK_ERR(xxx)
int LID(int GID) const
int NumGlobalNonzeros() const
int NumGlobalCols() const
bool FirstCallToSolve_
virtual int Broadcast(double *MyVals, int Count, int Root) const =0
virtual int NumProc() const =0
Epetra_Operator * GetOperator() const
LUstruct_t LUstruct
#define EPETRA_MAX(x, y)