EpetraExt  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
EpetraExt_MatrixMatrix_mult_A_B.cpp
Go to the documentation of this file.
1 //@HEADER
2 // ***********************************************************************
3 //
4 // EpetraExt: Epetra Extended - Linear Algebra Services Package
5 // Copyright (2011) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 
42 #include <EpetraExt_ConfigDefs.h>
43 #include <EpetraExt_MatrixMatrix.h>
44 
45 #include <EpetraExt_MMHelpers.h>
46 
48 
49 #include <Epetra_Export.h>
50 #include <Epetra_Import.h>
51 #include <Epetra_Util.h>
52 #include <Epetra_Map.h>
53 #include <Epetra_Comm.h>
54 #include <Epetra_CrsMatrix.h>
55 #include <Epetra_Vector.h>
56 #include <Epetra_Directory.h>
57 #include <Epetra_HashTable.h>
58 #include <Epetra_Distributor.h>
59 #include <Epetra_IntSerialDenseVector.h>
60 
61 #ifdef HAVE_VECTOR
62 #include <vector>
63 #endif
64 
65 #ifdef HAVE_MPI
66 #include <Epetra_MpiDistributor.h>
67 #endif
68 
69 
70 #include <Teuchos_TimeMonitor.hpp>
71 
72 namespace EpetraExt {
73 
74 /*****************************************************************************/
75 /*****************************************************************************/
76 /*****************************************************************************/
77 static inline int C_estimate_nnz(const Epetra_CrsMatrix & A, const Epetra_CrsMatrix &B){
78  // Follows the NZ estimate in ML's ml_matmatmult.c
79  int Aest=(A.NumMyRows()>0)? A.NumMyNonzeros()/A.NumMyRows():100;
80  int Best=(B.NumMyRows()>0)? B.NumMyNonzeros()/B.NumMyRows():100;
81 
82  int nnzperrow=(int)(sqrt((double)Aest) + sqrt((double)Best) - 1);
83  nnzperrow*=nnzperrow;
84 
85  return (int)(A.NumMyRows()*nnzperrow*0.75 + 100);
86 }
87 
88 // Commented out unused, file-local function.
89 #if 0
90 /*****************************************************************************/
91 /*****************************************************************************/
92 /*****************************************************************************/
93 static inline int auto_resize(std::vector<int> &x,int num_new){
94  int newsize=x.size() + EPETRA_MAX((int)x.size(),num_new);
95  x.resize(newsize);
96  return newsize;
97 }
98 #endif // 0
99 
100 /*****************************************************************************/
101 /*****************************************************************************/
102 /*****************************************************************************/
103 template<typename int_type>
104 int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map*& unionmap, std::vector<int>& Cremotepids,
105  std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
106 {
107 #ifdef HAVE_MPI
108 
109 #ifdef ENABLE_MMM_TIMINGS
110  using Teuchos::TimeMonitor;
111  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 1")));
112 #endif
113 
114  // So we need to merge the ColMap of B and the TargetMap of Bimport in an AztecOO/Ifpack/ML compliant order.
115  Epetra_Util util;
116  int i,j,MyPID = B.Comm().MyPID(), NumProc = B.Comm().NumProc();
117  int Bstart=0, Istart=0, Cstart=0,Pstart=0;
118 
119  const Epetra_Map & BColMap = B.ColMap();
120  const Epetra_Map & DomainMap = B.DomainMap();
121  const LightweightMap & IColMap = Bimport.ColMap_;
122 
123  int Nb = BColMap.NumMyElements();
124  int_type * Bgids = 0;
125  BColMap.MyGlobalElementsPtr(Bgids);
126  int Ni = IColMap.NumMyElements();
127  int_type * Igids = 0;
128  if(Ni>0)
129  IColMap.MyGlobalElementsPtr(Igids);
130 
131  if((int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
132  if((int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
133 
134  // Since we're getting called, we know we have to be using an MPI implementation of Epetra.
135  // Which means we should have an MpiDistributor for both B and Bimport.
136  // Unless all of B's columns are owned by the calling proc (e.g. MueLu for A*Ptent w/ uncoupled aggregation)
137  Epetra_MpiDistributor *Distor=0;
138  if(B.Importer()) {
139  Distor=dynamic_cast<Epetra_MpiDistributor*>(&B.Importer()->Distributor());
140  if(!Distor) EPETRA_CHK_ERR(-2);
141  }
142 
143  // **********************
144  // Stage 1: Get the owning PIDs
145  // **********************
146  // Note: if B doesn't have an importer, the calling proc owns all its colids...
147  std::vector<int> Bpids(Nb), Ipids(Ni);
148  if(B.Importer()) {EPETRA_CHK_ERR(util.GetPids(*B.Importer(),Bpids,true));}
149  else Bpids.assign(Nb,-1);
150 
151  if(Ni != (int) Bimport.ColMapOwningPIDs_.size()) {
152  EPETRA_CHK_ERR(-21);
153  }
154  for(i=0;i<Ni;i++){
155  Ipids[i] = (Bimport.ColMapOwningPIDs_[i]==MyPID)?(-1):(Bimport.ColMapOwningPIDs_[i]);
156  }
157 
158  // **********************
159  // Stage 2: Allocate memory (make things too big)
160  // **********************
161  int Csize=Nb+Ni;
162  int Psize=Nb+Ni;
163  std::vector<int_type> Cgids(Csize);
164  Cremotepids.resize(Psize);
165 
166 #ifdef ENABLE_MMM_TIMINGS
167  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 2")));
168 #endif
169 
170  // **********************
171  // Stage 3: Local Unknowns
172  // **********************
173  if(!B.Importer() || (B.Importer()->NumSameIDs() == DomainMap.NumMyElements())) {
174  // B's colmap has all of the domain elements. We can just copy those into the start of the array.
175  DomainMap.MyGlobalElements(Cgids.size() ? &Cgids[0] : 0);
176  Cstart=DomainMap.NumMyElements();
177  Bstart=DomainMap.NumMyElements();
178 
179  for(i=0; i<DomainMap.NumMyElements(); i++) Bcols2Ccols[i] = i;
180  }
181  else {
182  // There are more entries in the DomainMap than B's ColMap. So we stream through both B and Bimport for the copy.
183  int NumDomainElements = DomainMap.NumMyElements();
184  for(i = 0; i < NumDomainElements; i++) {
185  int_type GID = (int_type) DomainMap.GID64(i);
186  int LID = BColMap.LID(GID);
187  // B has this guy
188  if(LID!=-1) {
189  Bcols2Ccols[LID]=Cstart;
190  Cgids[Cstart] = GID;
191  Cstart++;
192  Bstart++;
193  }
194  else {
195  // B import has this guy
196  LID = IColMap.LID(GID);
197  if(LID!=-1) {
198  Icols2Ccols[LID]=Cstart;
199  Cgids[Cstart] = GID;
200  Cstart++;
201  }
202  }
203  }
204  }
205 
206  // Now advance Ilast to the last owned unknown in Bimport
207  for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
208  while(Cgids[j]!=Igids[i]) j++;
209  Icols2Ccols[i]=j;
210  }
211  Istart=i;
212 
213 
214 #ifdef ENABLE_MMM_TIMINGS
215  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 3")));
216 #endif
217 
218 
219  // **********************
220  // Stage 4: Processor-by-processor set_union
221  // **********************
222  // NOTE: Intial sizes for Btemp/Itemp from B's distributor. This should be exact for Btemp and a decent guess for Itemp
223  int initial_temp_length = 0;
224  const int * lengths_from=0;
225  if(Distor) {
226  lengths_from= Distor->LengthsFrom();
227  for(i=0; i < Distor->NumReceives(); i++) initial_temp_length += lengths_from[i];
228  }
229  else initial_temp_length=100;
230 
231  std::vector<int_type> Btemp(initial_temp_length), Itemp(initial_temp_length);
232  std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
233 
234 
235  while (Bstart < Nb || Istart < Ni) {
236  int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
237 
238  // Find the next active processor ID
239  if(Bstart < Nb) Bproc=Bpids[Bstart];
240  if(Istart < Ni) Iproc=Ipids[Istart];
241 
242  Cproc = (Bproc < Iproc)?Bproc:Iproc;
243 
244  if(Bproc == Cproc && Iproc != Cproc) {
245  // Only B has this processor. Copy the data.
246  // B: Find the beginning of the next processor
247  for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
248  int Bnext=i;
249 
250  // Copy data to C
251  int tCsize = Bnext-Bstart;
252  if(Btemp.size() < (size_t)tCsize) {Btemp2.resize(tCsize);}
253 
254  for(i=Bstart; i<Bnext; i++) {
255  Cremotepids[i-Bstart+Pstart] = Cproc;
256  Cgids[i-Bstart+Cstart] = Bgids[i];
257  Btemp2[i-Bstart] = i;
258  }
259 
260  // Sort & record reindexing
261  int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
262  util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
263 
264  for(i=0, j=Cstart; i<tCsize; i++){
265  while(Cgids[j] != Bgids[Btemp2[i]]) j++;
266  Bcols2Ccols[Btemp2[i]] = j;
267  }
268  Cstart+=tCsize;
269  Pstart+=tCsize;
270  Bstart=Bnext;
271  }
272  else if(Bproc != Cproc && Iproc == Cproc) {
273  // Only I has this processor. Copy the data.
274  // I: Find the beginning of the next processor
275  for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
276  int Inext=i;
277 
278  // Copy data to C
279  int tCsize = Inext-Istart;
280  if(Itemp.size() < (size_t)tCsize) {Itemp2.resize(tCsize);}
281 
282  for(i=Istart; i<Inext; i++) {
283  Cremotepids[i-Istart+Pstart] = Cproc;
284  Cgids[i-Istart+Cstart] = Igids[i];
285  Itemp2[i-Istart] = i;
286  }
287 
288  // Sort & record reindexing
289  int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
290  util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
291 
292  for(i=0, j=Cstart; i<tCsize; i++){
293  while(Cgids[j] != Igids[Itemp2[i]]) j++;
294  Icols2Ccols[Itemp2[i]] = j;
295  }
296  Cstart+=tCsize;
297  Pstart+=tCsize;
298  Istart=Inext;
299  }
300  else {
301  // Both B and I have this processor, so we need to do a set_union. So we need to sort.
302  int Bnext, Inext;
303  // B: Find the beginning of the next processor
304  for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
305  Bnext=i;
306 
307  // I: Find the beginning of the next processor
308  for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
309  Inext=i;
310 
311  // Copy data to temp
312  int tBsize = Bnext-Bstart;
313  int tIsize = Inext-Istart;
314  // int tCsize = tBsize+tIsize;
315  if(Btemp.size() < (size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
316  if(Itemp.size() < (size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
317 
318  for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
319  for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
320 
321  // Sort & set_union
322  int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0; int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
323  util.Sort(true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0);
324  util.Sort(true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0);
325  typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
326  typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
327 
328  for(i=0, j=Cstart; i<tBsize; i++){
329  while(Cgids[j] != Bgids[Btemp2[i]]) j++;
330  Bcols2Ccols[Btemp2[i]] = j;
331  }
332 
333  for(i=0, j=Cstart; i<tIsize; i++){
334  while(Cgids[j] != Igids[Itemp2[i]]) j++;
335  Icols2Ccols[Itemp2[i]] = j;
336  }
337 
338  for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
339  Cstart = (last_el - mycstart) + Cstart;
340  Pstart = (last_el - mycstart) + Pstart;
341  Bstart=Bnext;
342  Istart=Inext;
343  }
344  } // end while
345 
346 #ifdef ENABLE_MMM_TIMINGS
347  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 4")));
348 #endif
349 
350  // Resize the RemotePIDs down
351  Cremotepids.resize(Pstart);
352 
353  // **********************
354  // Stage 5: Call constructor
355  // **********************
356  // Make the map
357  unionmap=new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.ColMap().IndexBase64(),
358  B.Comm(),B.ColMap().DistributedGlobal(),(int_type) B.ColMap().MinAllGID64(),(int_type) B.ColMap().MaxAllGID64());
359  return 0;
360 #else
361  return -1;
362 #endif
363  }
364 
365 /*****************************************************************************/
366 /*****************************************************************************/
367 /*****************************************************************************/
368 // Provide a "resize" operation for double*'s.
369 inline void resize_doubles(int nold,int nnew,double*& d){
370  if(nnew > nold){
371  double *tmp = new double[nnew];
372  for(int i=0; i<nold; i++)
373  tmp[i]=d[i];
374  delete [] d;
375  d=tmp;
376  }
377 }
378 
379 
380 /*****************************************************************************/
381 /*****************************************************************************/
382 /*****************************************************************************/
383 template<typename int_type>
385  const Epetra_CrsMatrix & B,
386  const CrsMatrixStruct& Bview,
387  std::vector<int> & Bcol2Ccol,
388  std::vector<int> & Bimportcol2Ccol,
389  std::vector<int>& Cremotepids,
390  Epetra_CrsMatrix& C,
391  bool keep_all_hard_zeros
392 ){
393 #ifdef ENABLE_MMM_TIMINGS
394  using Teuchos::TimeMonitor;
395  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix SerialCore")));
396 #endif
397 
398  // *****************************
399  // Improved Parallel Gustavson in Local IDs
400  // *****************************
401  const Epetra_Map * colmap_C = &(C.ColMap());
402 
403 
404  int NumMyDiagonals=0; // Counter to speed up ESFC
405 
406  int m=A.NumMyRows();
407  int n=colmap_C->NumMyElements();
408  int i,j,k;
409 
410  // DataPointers for A
411  int *Arowptr, *Acolind;
412  double *Avals;
413  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
414 
415  // DataPointers for B, Bimport
416  int *Browptr, *Bcolind;
417  double *Bvals;
418  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
419 
420  int *Irowptr=0, *Icolind=0;
421  double *Ivals=0;
422  if(Bview.importMatrix){
423  Irowptr = &Bview.importMatrix->rowptr_[0];
424  Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
425  Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
426  }
427 
428  // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
429  // This is needed because I'm about to walk all over the CrsGrapData...
431 
432  // The status array will contain the index into colind where this entry was last deposited.
433  // c_status[i] < CSR_ip - not in the row yet.
434  // c_status[i] >= CSR_ip, this is the entry where you can find the data
435  // We start with this filled with -1's indicating that there are no entries yet.
436  std::vector<int> c_status(n, -1);
437 
438  // Classic csr assembly (low memory edition)
439  int CSR_alloc=C_estimate_nnz(A,B);
440  if(CSR_alloc < n) CSR_alloc = n;
441  int CSR_ip=0,OLD_ip=0;
444  double *& CSR_vals = C.ExpertExtractValues();
445 
446  CSR_rowptr.Resize(m+1);
447  CSR_colind.Resize(CSR_alloc);
448  resize_doubles(0,CSR_alloc,CSR_vals);
449 
450  // Static Profile stuff
451  std::vector<int> NumEntriesPerRow(m);
452 
453  // For each row of A/C
454  for(i=0; i<m; i++){
455  bool found_diagonal=false;
456  CSR_rowptr[i]=CSR_ip;
457 
458  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
459  int Ak = Acolind[k];
460  double Aval = Avals[k];
461  if(!keep_all_hard_zeros && Aval==0) continue;
462 
463  if(Bview.targetMapToOrigRow[Ak] != -1){
464  // Local matrix
465  int Bk = Bview.targetMapToOrigRow[Ak];
466  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
467  int Cj=Bcol2Ccol[Bcolind[j]];
468 
469  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
470 
471  if(c_status[Cj]<OLD_ip){
472  // New entry
473  c_status[Cj]=CSR_ip;
474  CSR_colind[CSR_ip]=Cj;
475  CSR_vals[CSR_ip]=Aval*Bvals[j];
476  CSR_ip++;
477  }
478  else
479  CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
480  }
481  }
482  else{
483  // Remote matrix
484  int Ik = Bview.targetMapToImportRow[Ak];
485  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
486  int Cj=Bimportcol2Ccol[Icolind[j]];
487 
488  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
489 
490  if(c_status[Cj]<OLD_ip){
491  // New entry
492  c_status[Cj]=CSR_ip;
493  CSR_colind[CSR_ip]=Cj;
494  CSR_vals[CSR_ip]=Aval*Ivals[j];
495  CSR_ip++;
496  }
497  else
498  CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
499  }
500  }
501  }
502  NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
503 
504  // Resize for next pass if needed
505  if(CSR_ip + n > CSR_alloc){
506  resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
507  CSR_alloc*=2;
508  CSR_colind.Resize(CSR_alloc);
509  }
510  OLD_ip=CSR_ip;
511  }
512 
513  CSR_rowptr[m]=CSR_ip;
514 
515 #ifdef ENABLE_MMM_TIMINGS
516  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Final Sort")));
517 #endif
518 
519  // Sort the entries
520  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
521 
522 #ifdef ENABLE_MMM_TIMINGS
523  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Fast IE")));
524 #endif
525 
526  // Do a fast build of C's importer
527  Epetra_Import * Cimport=0;
528  int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
529  int NumExports=0;
530  int *ExportLIDs=0, *ExportPIDs=0;
531  if(Bview.importMatrix) {
532  NumExports = Bview.importMatrix->ExportLIDs_.size();
533  ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
534  ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
535  }
536  else if(B.Importer()) {
537  // Grab the exports from B proper
538  NumExports = B.Importer()->NumExportIDs();
539  ExportLIDs = B.Importer()->ExportLIDs();
540  ExportPIDs = B.Importer()->ExportPIDs();
541  }
542 
543 
544  if(B.Importer() && C.ColMap().SameAs(B.ColMap()))
545  Cimport = new Epetra_Import(*B.Importer()); // Because the domain maps are the same
546  else if(!C.ColMap().SameAs(B.DomainMap()))
547  Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
548 
549 
550 #ifdef ENABLE_MMM_TIMINGS
551  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix ESFC")));
552 #endif
553 
554  // Update the CrsGraphData
555  C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
556 
557  return 0;
558 }
559 
560 
561 
562 
563 /*****************************************************************************/
564 /*****************************************************************************/
565 /*****************************************************************************/
566 template<typename int_type>
568  const Epetra_CrsMatrix & B,
569  CrsMatrixStruct& Bview,
570  std::vector<int> & Bcol2Ccol,
571  std::vector<int> & Bimportcol2Ccol,
572  Epetra_CrsMatrix& C,
573  bool keep_all_hard_zeros){
574 
575 #ifdef ENABLE_MMM_TIMINGS
576  using Teuchos::TimeMonitor;
577  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse SerialCore")));
578 #endif
579 
580  // *****************************
581  // Improved Parallel Gustavson in Local IDs
582  // *****************************
583  const Epetra_Map * colmap_C = &(C.ColMap());
584 
585  int m=A.NumMyRows();
586  int n=colmap_C->NumMyElements();
587  int i,j,k;
588 
589  // DataPointers for A
590  int *Arowptr, *Acolind;
591  double *Avals;
592  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
593 
594  // DataPointers for B, Bimport
595  int *Browptr, *Bcolind;
596  double *Bvals;
597  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
598 
599  int *Irowptr=0, *Icolind=0;
600  double *Ivals=0;
601  if(Bview.importMatrix){
602  Irowptr = &Bview.importMatrix->rowptr_[0];
603  Icolind = &Bview.importMatrix->colind_[0];
604  Ivals = &Bview.importMatrix->vals_[0];
605  }
606 
607  // DataPointers for C
608  int *CSR_rowptr, *CSR_colind;
609  double *CSR_vals;
610  EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
611 
612 
613  // The status array will contain the index into colind where this dude was last deposited.
614  // c_status[i] < CSR_ip - not in the row yet.
615  // c_status[i] >= CSR_ip, this is the entry where you can find the data
616  // We start with this filled with -1's indicating that there are no entries yet.
617  std::vector<int> c_status(n, -1);
618 
619  // Classic csr assembly
620  int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
621  int CSR_ip=0,OLD_ip=0;
622 
623  // For each row of A/C
624  for(i=0; i<m; i++){
625  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
626  int Ak=Acolind[k];
627  double Aval = Avals[k];
628  if(!keep_all_hard_zeros && Aval==0) continue;
629 
630  if(Bview.targetMapToOrigRow[Ak] != -1){
631  // Local matrix
632  int Bk = Bview.targetMapToOrigRow[Ak];
633  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
634  int Cj=Bcol2Ccol[Bcolind[j]];
635 
636  if(c_status[Cj]<OLD_ip){
637  // New entry
638  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
639  c_status[Cj]=CSR_ip;
640  CSR_colind[CSR_ip]=Cj;
641  CSR_vals[CSR_ip]=Aval*Bvals[j];
642  CSR_ip++;
643  }
644  else
645  CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
646  }
647  }
648  else{
649  // Remote matrix
650  int Ik = Bview.targetMapToImportRow[Ak];
651  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
652  int Cj=Bimportcol2Ccol[Icolind[j]];
653 
654  if(c_status[Cj]<OLD_ip){
655  // New entry
656  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
657  c_status[Cj]=CSR_ip;
658  CSR_colind[CSR_ip]=Cj;
659  CSR_vals[CSR_ip]=Aval*Ivals[j];
660  CSR_ip++;
661  }
662  else
663  CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
664  }
665  }
666  }
667  OLD_ip=CSR_ip;
668  }
669 
670 #ifdef ENABLE_MMM_TIMINGS
671  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Final Sort")));
672 #endif
673 
674  // Sort the entries
675  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
676 
677  return 0;
678 }
679 
680 
681 
682 /*****************************************************************************/
683 /*****************************************************************************/
684 /*****************************************************************************/
685 //kernel method for computing the local portion of C = A*B
686 template<typename int_type>
688  CrsMatrixStruct & Aview,
689  const Epetra_CrsMatrix & B,
690  CrsMatrixStruct& Bview,
691  Epetra_CrsMatrix& C,
692  bool call_FillComplete_on_result,
693  bool keep_all_hard_zeros)
694 {
695  int C_firstCol = Bview.colMap->MinLID();
696  int C_lastCol = Bview.colMap->MaxLID();
697 
698  int C_firstCol_import = 0;
699  int C_lastCol_import = -1;
700 
701  int_type* bcols = 0;
702  Bview.colMap->MyGlobalElementsPtr(bcols);
703  int_type* bcols_import = NULL;
704  if (Bview.importMatrix != NULL) {
705  C_firstCol_import = Bview.importMatrix->ColMap_.MinLID();
706  C_lastCol_import = Bview.importMatrix->ColMap_.MaxLID();
707  Bview.importMatrix->ColMap_.MyGlobalElementsPtr(bcols_import);
708  }
709 
710  int C_numCols = C_lastCol - C_firstCol + 1;
711  int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
712 
713  if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
714 
715  // Allocate workspace memory
716  double* dwork = new double[C_numCols];
717  int_type* iwork = new int_type[C_numCols];
718  int_type *c_cols=iwork;
719  double *c_vals=dwork;
720  int *c_index=new int[C_numCols];
721 
722  int i, j, k;
723  bool C_filled=C.Filled();
724 
725  // DataPointers for A
726  int *Arowptr, *Acolind;
727  double *Avals;
728  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
729 
730  //To form C = A*B we're going to execute this expression:
731  //
732  // C(i,j) = sum_k( A(i,k)*B(k,j) )
733  //
734  //Our goal, of course, is to navigate the data in A and B once, without
735  //performing searches for column-indices, etc.
736 
737  // Mark indices as empty w/ -1
738  for(k=0;k<C_numCols;k++) c_index[k]=-1;
739 
740  //loop over the rows of A.
741  for(i=0; i<A.NumMyRows(); ++i) {
742 
743  int_type global_row = (int_type) Aview.rowMap->GID64(i);
744 
745  //loop across the i-th row of A and for each corresponding row
746  //in B, loop across colums and accumulate product
747  //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
748  //as we stride across B(k,:) we're calculating updates for row i of the
749  //result matrix C.
750 
751  /* Outline of the revised, ML-inspired algorithm
752 
753  C_{i,j} = \sum_k A_{i,k} B_{k,j}
754 
755  This algorithm uses a "middle product" formulation, with the loop ordering of
756  i, k, j. This means we compute a row of C at a time, but compute partial sums of
757  each entry in row i until we finish the k loop.
758 
759  This algorithm also has a few twists worth documenting.
760 
761  1) The first major twist involves the c_index, c_cols and c_vals arrays. The arrays c_cols
762  and c_vals store the *local* column index and values accumulator respectively. These
763  arrays are allocated to a size equal to the max number of local columns in C, namely C_numcols.
764  The value c_current tells us how many non-zeros we currently have in this row.
765 
766  So how do we take a LCID and find the right accumulator? This is where the c_index array
767  comes in. At the start (and stop) and the i loop, c_index is filled with -1's. Now
768  whenever we find a LCID in the k loop, we first loop at c_index[lcid]. If this value is
769  -1 we haven't seen this entry yet. In which case we add the appropriate stuff to c_cols
770  and c_vals and then set c_index[lcid] to the location of the accumulator (c_current before
771  we increment it). If the value is NOT -1, this tells us the location in the c_vals/c_cols
772  arrays (namely c_index[lcid]) where our accumulator lives.
773 
774  This trick works because we're working with local ids. We can then loop from 0 to c_current
775  and reset c_index to -1's when we're done, only touching the arrays that have changed.
776  While we're at it, we can switch to the global ids so we can call [Insert|SumInto]GlobalValues.
777  Thus, the effect of this trick is to avoid passes over the index array.
778 
779  2) The second major twist involves handling the remote and local components of B separately.
780  (ML doesn't need to do this, because its local ordering scheme is consistent between the local
781  and imported components of B.) Since they have different column maps, they have inconsistent
782  local column ids. This means the "second twist" won't work as stated on both matrices at the
783  same time. While this could be handled any number of ways, I have chosen to do the two parts
784  of B separately to make the code easier to read (and reduce the memory footprint of the MMM).
785  */
786 
787  // Local matrix: Zero Current counts for matrix
788  int c_current=0;
789 
790  // Local matrix: Do the "middle product"
791  for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
792  int Ak = Acolind[k];
793  double Aval = Avals[k];
794  // We're skipping remote entries on this pass.
795  if(Bview.remote[Ak] || (!keep_all_hard_zeros && Aval==0)) continue;
796 
797  int* Bcol_inds = Bview.indices[Ak];
798  double* Bvals_k = Bview.values[Ak];
799 
800  for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
801  int col=Bcol_inds[j];
802  if(c_index[col]<0){
803  // We haven't seen this entry before; add it. (In ML, on
804  // the first pass, you haven't seen any of the entries
805  // before, so they are added without the check. Not sure
806  // how much more efficient that would be; depends on branch
807  // prediction. We've favored code readability here.)
808  c_cols[c_current]=col;
809  c_vals[c_current]=Aval*Bvals_k[j];
810  c_index[col]=c_current;
811  c_current++;
812  }
813  else{
814  // We've already seen this entry; accumulate it.
815  c_vals[c_index[col]]+=Aval*Bvals_k[j];
816  }
817  }
818  }
819  // Local matrix: Reset c_index and switch c_cols to GIDs
820  for(k=0; k<c_current; k++){
821  c_index[c_cols[k]]=-1;
822  c_cols[k]=bcols[c_cols[k]]; // Switch from local to global IDs.
823  }
824  // Local matrix: Insert.
825  //
826  // We should check global error results after the algorithm is
827  // through. It's probably safer just to let the algorithm run all
828  // the way through before doing this, since otherwise we have to
829  // remember to free all allocations carefully.
830  //
831  // FIXME (mfh 27 Mar 2015) This code collects error codes, but
832  // doesn't do anything with them. This results in build warnings
833  // (set but unused variable). Thus, I'm commenting out error code
834  // collection for now.
835 #if 0
836  int err = C_filled ?
837  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
838  :
839  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
840 #else
841  if (C_filled) {
842  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
843  } else {
844  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
845  }
846 #endif // 0
847 
848  // Remote matrix: Zero current counts again for matrix
849  c_current=0;
850 
851  // Remote matrix: Do the "middle product"
852  for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
853  int Ak = Acolind[k];
854  double Aval = Avals[k];
855  // We're skipping local entries on this pass.
856  if(!Bview.remote[Ak] || Aval==0) continue;
857 
858  int* Bcol_inds = Bview.indices[Ak];
859  double* Bvals_k = Bview.values[Ak];
860 
861  for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
862  int col=Bcol_inds[j];
863  if(c_index[col]<0){
864  c_cols[c_current]=col;
865  c_vals[c_current]=Aval*Bvals_k[j];
866  c_index[col]=c_current;
867  c_current++;
868  }
869  else{
870  c_vals[c_index[col]]+=Aval*Bvals_k[j];
871  }
872  }
873  }
874  // Remote matrix: Reset c_index and switch c_cols to GIDs
875  for(k=0; k<c_current; k++){
876  c_index[c_cols[k]]=-1;
877  c_cols[k]=bcols_import[c_cols[k]];
878  }
879  // Remove matrix: Insert
880  //
881  // See above (on error handling).
882 #if 0
883  err = C_filled ?
884  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
885  :
886  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
887 #else
888  if (C_filled) {
889  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
890  } else {
891  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
892  }
893 #endif // 0
894  }
895 
896  // Since Multiply won't do this
897  if(call_FillComplete_on_result)
898  C.FillComplete(B.DomainMap(),A.RangeMap());
899 
900  delete [] dwork;
901  delete [] iwork;
902  delete [] c_index;
903  return(0);
904 }
905 
906 
907 
908 
909 
910 /*****************************************************************************/
911 /*****************************************************************************/
912 /*****************************************************************************/
913 template<typename int_type>
914 int MatrixMatrix::Tmult_A_B(const Epetra_CrsMatrix & A,
915  CrsMatrixStruct & Aview,
916  const Epetra_CrsMatrix & B,
917  CrsMatrixStruct& Bview,
918  Epetra_CrsMatrix& C,
919  bool call_FillComplete_on_result,
920  bool keep_all_hard_zeros){
921 
922  int i,rv;
923  Epetra_Map* mapunion = 0;
924  const Epetra_Map * colmap_B = &(B.ColMap());
925  const Epetra_Map * colmap_C = &(C.ColMap());
926 
927  std::vector<int> Cremotepids;
928  std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
929  std::vector<int> Bimportcol2Ccol;
930  if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
931 
932 #ifdef ENABLE_MMM_TIMINGS
933  using Teuchos::TimeMonitor;
934  Teuchos::RCP<Teuchos::TimeMonitor> MM;
935 #endif
936 
937  // If the user doesn't want us to call FillComplete, use the general routine
938  if(!call_FillComplete_on_result) {
939 #ifdef ENABLE_MMM_TIMINGS
940  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
941 #endif
942  rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false,keep_all_hard_zeros);
943  return rv;
944  }
945 
946  // Is this a "clean" matrix
947  bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
948 
949  // Does ExtractCrsDataPointers work?
950  int *C_rowptr, *C_colind;
951  double * C_vals;
952  C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
953  bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
954 
955  // It's a new matrix that hasn't been fill-completed, use the general routine
956  if(!NewFlag && ExtractFailFlag){
957 #ifdef ENABLE_MMM_TIMINGS
958  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
959 #endif
960  rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result,keep_all_hard_zeros);
961  return rv;
962  }
963 
964 
965 #ifdef ENABLE_MMM_TIMINGS
966  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix CMap")));
967  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse CMap")));
968 #endif
969 
970  // If new, build & clobber a colmap for C
971  if(NewFlag){
972  if(Bview.importMatrix) {
973  EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
974  EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
975  }
976  else {
977  EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
978  for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
979 
980  // Copy B's remote list (if any)
981  if(B.Importer())
982  EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
983  }
984  }
985 
986 #ifdef ENABLE_MMM_TIMINGS
987  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Lookups")));
988  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Lookups")));
989 #endif
990 
991  // ********************************************
992  // Setup Bcol2Ccol / Bimportcol2Ccol lookups
993  // ********************************************
994  // Note: If we ran the map_union, we have this information already
995 
996  if(!NewFlag) {
997  if(colmap_B->SameAs(*colmap_C)){
998  // Maps are the same: Use local IDs as the hash
999  for(i=0;i<colmap_B->NumMyElements();i++)
1000  Bcol2Ccol[i]=i;
1001  }
1002  else {
1003  // Maps are not the same: Use the map's hash
1004  for(i=0;i<colmap_B->NumMyElements();i++){
1005  Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1006  if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1007  }
1008  }
1009 
1010  if(Bview.importMatrix){
1011  Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1012  for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1013  Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1014  if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1015  }
1016 
1017  }
1018  }
1019 #ifdef ENABLE_MMM_TIMINGS
1020  MM=Teuchos::null;
1021 #endif
1022 
1023  // Call the appropriate core routine
1024  if(NewFlag) {
1025  EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C,keep_all_hard_zeros));
1026  }
1027  else {
1028  // This always has a real map
1029  EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C,keep_all_hard_zeros));
1030  }
1031 
1032  // Cleanup
1033  delete mapunion;
1034  return 0;
1035 }
1036 
1037 
1038 /*****************************************************************************/
1039 /*****************************************************************************/
1040 /*****************************************************************************/
1041 int MatrixMatrix::mult_A_B(const Epetra_CrsMatrix & A,
1042  CrsMatrixStruct & Aview,
1043  const Epetra_CrsMatrix & B,
1044  CrsMatrixStruct& Bview,
1045  Epetra_CrsMatrix& C,
1046  bool call_FillComplete_on_result,\
1047  bool keep_all_hard_zeros){
1048 
1049 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1050  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1051  return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
1052  }
1053  else
1054 #endif
1055 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1057  return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
1058  }
1059  else
1060 #endif
1061  throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown";
1062 }
1063 
1064 
1065 /*****************************************************************************/
1066 /*****************************************************************************/
1067 /*****************************************************************************/
1068 template<typename int_type>
1069 int jacobi_A_B_reuse(double omega,
1070  const Epetra_Vector & Dinv,
1071  const Epetra_CrsMatrix & A,
1072  const Epetra_CrsMatrix & B,
1073  CrsMatrixStruct& Bview,
1074  std::vector<int> & Bcol2Ccol,
1075  std::vector<int> & Bimportcol2Ccol,
1076  Epetra_CrsMatrix& C){
1077 
1078 #ifdef ENABLE_MMM_TIMINGS
1079  using Teuchos::TimeMonitor;
1080  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse SerialCore")));
1081 #endif
1082 
1083  // *****************************
1084  // Improved Parallel Gustavson in Local IDs
1085  // *****************************
1086  const Epetra_Map * colmap_C = &(C.ColMap());
1087 
1088  int m=A.NumMyRows();
1089  int n=colmap_C->NumMyElements();
1090  int i,j,k;
1091 
1092  // DataPointers for A
1093  int *Arowptr, *Acolind;
1094  double *Avals;
1095  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1096 
1097  // DataPointers for B, Bimport
1098  int *Browptr, *Bcolind;
1099  double *Bvals;
1100  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1101 
1102  int *Irowptr=0, *Icolind=0;
1103  double *Ivals=0;
1104  if(Bview.importMatrix){
1105  Irowptr = &Bview.importMatrix->rowptr_[0];
1106  Icolind = &Bview.importMatrix->colind_[0];
1107  Ivals = &Bview.importMatrix->vals_[0];
1108  }
1109 
1110  // Data pointer for Dinv
1111  const double *Dvals = Dinv.Values();
1112 
1113  // DataPointers for C
1114  int *CSR_rowptr, *CSR_colind;
1115  double *CSR_vals;
1116  EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
1117 
1118 
1119  // The status array will contain the index into colind where this dude was last deposited.
1120  // c_status[i] < CSR_ip - not in the row yet.
1121  // c_status[i] >= CSR_ip, this is the entry where you can find the data
1122  // We start with this filled with -1's indicating that there are no entries yet.
1123  std::vector<int> c_status(n, -1);
1124 
1125  // Classic csr assembly
1126  int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
1127  int CSR_ip=0,OLD_ip=0;
1128 
1129  // For each row of C
1130  for(i=0; i<m; i++){
1131  double Dval = Dvals[i];
1132 
1133  // Entries of B
1134  for(k=Browptr[i]; k<Browptr[i+1]; k++){
1135  // int Bk = Bcolind[k];
1136  double Bval = Bvals[k];
1137  if(Bval==0) continue;
1138  int Ck=Bcol2Ccol[Bcolind[k]];
1139 
1140  // Assume no repeated entries in B
1141  c_status[Ck]=CSR_ip;
1142  CSR_colind[CSR_ip]=Ck;
1143  CSR_vals[CSR_ip]= Bvals[k];
1144  CSR_ip++;
1145  }
1146 
1147  // Entries of -omega * Dinv * A * B
1148  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1149  int Ak=Acolind[k];
1150  double Aval = Avals[k];
1151  if(Aval==0) continue;
1152 
1153  if(Bview.targetMapToOrigRow[Ak] != -1){
1154  // Local matrix
1155  int Bk = Bview.targetMapToOrigRow[Ak];
1156  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1157  int Cj=Bcol2Ccol[Bcolind[j]];
1158 
1159  if(c_status[Cj]<OLD_ip){
1160  // New entry
1161  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
1162  c_status[Cj]=CSR_ip;
1163  CSR_colind[CSR_ip]=Cj;
1164  CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
1165  CSR_ip++;
1166  }
1167  else
1168  CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1169  }
1170  }
1171  else{
1172  // Remote matrix
1173  int Ik = Bview.targetMapToImportRow[Ak];
1174  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1175  int Cj=Bimportcol2Ccol[Icolind[j]];
1176 
1177  if(c_status[Cj]<OLD_ip){
1178  // New entry
1179  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
1180  c_status[Cj]=CSR_ip;
1181  CSR_colind[CSR_ip]=Cj;
1182  CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1183  CSR_ip++;
1184  }
1185  else
1186  CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
1187  }
1188  }
1189  }
1190  OLD_ip=CSR_ip;
1191  }
1192 
1193 #ifdef ENABLE_MMM_TIMINGS
1194  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Final Sort")));
1195 #endif
1196  // Sort the entries
1197  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1198 
1199  return 0;
1200 }
1201 
1202 
1203 /*****************************************************************************/
1204 /*****************************************************************************/
1205 /*****************************************************************************/
1206 template<typename int_type>
1207 int jacobi_A_B_newmatrix(double omega,
1208  const Epetra_Vector & Dinv,
1209  const Epetra_CrsMatrix & A,
1210  const Epetra_CrsMatrix & B,
1211  CrsMatrixStruct& Bview,
1212  std::vector<int> & Bcol2Ccol,
1213  std::vector<int> & Bimportcol2Ccol,
1214  std::vector<int>& Cremotepids,
1215  Epetra_CrsMatrix& C)
1216 {
1217 #ifdef ENABLE_MMM_TIMINGS
1218  using Teuchos::TimeMonitor;
1219  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix SerialCore")));
1220 #endif
1221 
1222  // *****************************
1223  // Improved Parallel Gustavson in Local IDs
1224  // *****************************
1225  const Epetra_Map * colmap_C = &(C.ColMap());
1226  int NumMyDiagonals=0; // Counter to speed up ESFC
1227 
1228  int m=A.NumMyRows();
1229  int n=colmap_C->NumMyElements();
1230  int i,j,k;
1231 
1232  // DataPointers for A
1233  int *Arowptr, *Acolind;
1234  double *Avals;
1235  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1236 
1237  // DataPointers for B, Bimport
1238  int *Browptr, *Bcolind;
1239  double *Bvals;
1240  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1241 
1242  // Data pointer for Dinv
1243  const double *Dvals = Dinv.Values();
1244 
1245  int *Irowptr=0, *Icolind=0;
1246  double *Ivals=0;
1247  if(Bview.importMatrix){
1248  Irowptr = &Bview.importMatrix->rowptr_[0];
1249  Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
1250  Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
1251  }
1252 
1253  // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
1254  // This is needed because I'm about to walk all over the CrsGrapData...
1256 
1257  // The status array will contain the index into colind where this entry was last deposited.
1258  // c_status[i] < CSR_ip - not in the row yet.
1259  // c_status[i] >= CSR_ip, this is the entry where you can find the data
1260  // We start with this filled with -1's indicating that there are no entries yet.
1261  std::vector<int> c_status(n, -1);
1262 
1263  // Classic csr assembly (low memory edition)
1264  int CSR_alloc=C_estimate_nnz(A,B);
1265  if(CSR_alloc < B.NumMyNonzeros()) CSR_alloc = B.NumMyNonzeros(); // update for Jacobi
1266  int CSR_ip=0,OLD_ip=0;
1269  double *& CSR_vals = C.ExpertExtractValues();
1270 
1271  CSR_rowptr.Resize(m+1);
1272  CSR_colind.Resize(CSR_alloc);
1273  resize_doubles(0,CSR_alloc,CSR_vals);
1274 
1275  // Static Profile stuff
1276  std::vector<int> NumEntriesPerRow(m);
1277 
1278  // For each row of C
1279  for(i=0; i<m; i++){
1280  bool found_diagonal=false;
1281  CSR_rowptr[i]=CSR_ip;
1282  double Dval = Dvals[i];
1283 
1284  // Entries of B
1285  for(k=Browptr[i]; k<Browptr[i+1]; k++){
1286  //int Bk = Bcolind[k];
1287  double Bval = Bvals[k];
1288  if(Bval==0) continue;
1289  int Ck=Bcol2Ccol[Bcolind[k]];
1290 
1291  // Assume no repeated entries in B
1292  c_status[Ck]=CSR_ip;
1293  CSR_colind[CSR_ip]=Ck;
1294  CSR_vals[CSR_ip]= Bvals[k];
1295  CSR_ip++;
1296  }
1297 
1298  // Entries of -omega * Dinv * A * B
1299  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1300  int Ak = Acolind[k];
1301  double Aval = Avals[k];
1302  if(Aval==0) continue;
1303 
1304  if(Bview.targetMapToOrigRow[Ak] != -1){
1305  // Local matrix
1306  int Bk = Bview.targetMapToOrigRow[Ak];
1307  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1308  int Cj=Bcol2Ccol[Bcolind[j]];
1309 
1310  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
1311 
1312  if(c_status[Cj]<OLD_ip){
1313  // New entry
1314  c_status[Cj]=CSR_ip;
1315  CSR_colind[CSR_ip]=Cj;
1316  CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
1317  CSR_ip++;
1318  }
1319  else
1320  CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1321  }
1322  }
1323  else{
1324  // Remote matrix
1325  int Ik = Bview.targetMapToImportRow[Ak];
1326  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1327  int Cj=Bimportcol2Ccol[Icolind[j]];
1328 
1329  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
1330 
1331  if(c_status[Cj]<OLD_ip){
1332  // New entry
1333  c_status[Cj]=CSR_ip;
1334  CSR_colind[CSR_ip]=Cj;
1335  CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1336  CSR_ip++;
1337  }
1338  else
1339  CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
1340  }
1341  }
1342  }
1343  NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
1344 
1345  // Resize for next pass if needed
1346  if(CSR_ip + n > CSR_alloc){
1347  resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
1348  CSR_alloc*=2;
1349  CSR_colind.Resize(CSR_alloc);
1350  }
1351  OLD_ip=CSR_ip;
1352  }
1353 
1354  CSR_rowptr[m]=CSR_ip;
1355 
1356 #ifdef ENABLE_MMM_TIMINGS
1357  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Final Sort")));
1358 #endif
1359 
1360  // Sort the entries
1361  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1362 
1363 #ifdef ENABLE_MMM_TIMINGS
1364  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Fast IE")));
1365 #endif
1366 
1367  // Do a fast build of C's importer
1368  Epetra_Import * Cimport=0;
1369  int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
1370  int NumExports=0;
1371  int *ExportLIDs=0, *ExportPIDs=0;
1372  if(Bview.importMatrix) {
1373  NumExports = Bview.importMatrix->ExportLIDs_.size();
1374  ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
1375  ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
1376  }
1377  else if(B.Importer()) {
1378  // Grab the exports from B proper
1379  NumExports = B.Importer()->NumExportIDs();
1380  ExportLIDs = B.Importer()->ExportLIDs();
1381  ExportPIDs = B.Importer()->ExportPIDs();
1382  }
1383 
1384  if(!C.ColMap().SameAs(B.DomainMap()))
1385  Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
1386 
1387 #ifdef ENABLE_MMM_TIMINGS
1388  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix ESFC")));
1389 #endif
1390 
1391  // Update the CrsGraphData
1392  C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
1393 
1394  return 0;
1395 }
1396 
1397 
1398 
1399 /*****************************************************************************/
1400 /*****************************************************************************/
1401 /*****************************************************************************/
1402 template<typename int_type>
1403 int MatrixMatrix::Tjacobi_A_B(double omega,
1404  const Epetra_Vector & Dinv,
1405  const Epetra_CrsMatrix & A,
1406  CrsMatrixStruct & /* Aview */,
1407  const Epetra_CrsMatrix & B,
1408  CrsMatrixStruct& Bview,
1409  Epetra_CrsMatrix& C,
1410  bool call_FillComplete_on_result){
1411  int i,rv;
1412  Epetra_Map* mapunion = 0;
1413  const Epetra_Map * colmap_B = &(B.ColMap());
1414  const Epetra_Map * colmap_C = &(C.ColMap());
1415 
1416  std::vector<int> Cremotepids;
1417  std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
1418  std::vector<int> Bimportcol2Ccol;
1419  if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1420 
1421 #ifdef ENABLE_MMM_TIMINGS
1422  using Teuchos::TimeMonitor;
1423  Teuchos::RCP<Teuchos::TimeMonitor> MM;
1424 #endif
1425 
1426  // If the user doesn't want us to call FillComplete, use the general routine
1427  if(!call_FillComplete_on_result) {
1428 #ifdef ENABLE_MMM_TIMINGS
1429  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
1430 #endif
1431  throw std::runtime_error("jacobi_A_B_general not implemented");
1432  // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false);
1433  return rv;
1434  }
1435 
1436  // Is this a "clean" matrix
1437  bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
1438 
1439  // Does ExtractCrsDataPointers work?
1440  int *C_rowptr, *C_colind;
1441  double * C_vals;
1442  C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
1443  bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
1444 
1445  // It's a new matrix that hasn't been fill-completed, use the general routine
1446  if(!NewFlag && ExtractFailFlag){
1447 #ifdef ENABLE_MMM_TIMINGS
1448  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
1449 #endif
1450  throw std::runtime_error("jacobi_A_B_general not implemented");
1451  // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result);
1452  return rv;
1453  }
1454 
1455 #ifdef ENABLE_MMM_TIMINGS
1456  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix CMap")));
1457  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse CMap")));
1458 #endif
1459 
1460  // If new, build & clobber a colmap for C
1461  if(NewFlag){
1462  if(Bview.importMatrix) {
1463  EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
1464  EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
1465  }
1466  else {
1467  EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
1468  for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
1469 
1470  // Copy B's remote list (if any)
1471  if(B.Importer())
1472  EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
1473  }
1474  }
1475 
1476 #ifdef ENABLE_MMM_TIMINGS
1477  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix Lookups")));
1478  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Lookups")));
1479 #endif
1480 
1481  // ********************************************
1482  // Setup Bcol2Ccol / Bimportcol2Ccol lookups
1483  // ********************************************
1484  // Note: If we ran the map_union, we have this information already
1485 
1486  if(!NewFlag) {
1487  if(colmap_B->SameAs(*colmap_C)){
1488  // Maps are the same: Use local IDs as the hash
1489  for(i=0;i<colmap_B->NumMyElements();i++)
1490  Bcol2Ccol[i]=i;
1491  }
1492  else {
1493  // Maps are not the same: Use the map's hash
1494  for(i=0;i<colmap_B->NumMyElements();i++){
1495  Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1496  if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1497  }
1498  }
1499 
1500  if(Bview.importMatrix){
1501  Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1502  for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1503  Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1504  if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1505  }
1506 
1507  }
1508  }
1509 
1510 
1511  // Call the appropriate core routine
1512  if(NewFlag) {
1513  EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1514  }
1515  else {
1516  // This always has a real map
1517  EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1518  }
1519 
1520  // Cleanup
1521  delete mapunion;
1522  return 0;
1523 }
1524 
1525 
1526 /*****************************************************************************/
1527 /*****************************************************************************/
1528 /*****************************************************************************/
1529 int MatrixMatrix::jacobi_A_B(double omega,
1530  const Epetra_Vector & Dinv,
1531  const Epetra_CrsMatrix & A,
1532  CrsMatrixStruct & Aview,
1533  const Epetra_CrsMatrix & B,
1534  CrsMatrixStruct& Bview,
1535  Epetra_CrsMatrix& C,
1536  bool call_FillComplete_on_result)
1537 {
1538 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1539  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1540  return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1541  }
1542  else
1543 #endif
1544 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1546  return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1547  }
1548  else
1549 #endif
1550  throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown";
1551 }
1552 
1553 
1554 
1555 /*****************************************************************************/
1556 /*****************************************************************************/
1557 /*****************************************************************************/
1558 template<typename int_type>
1559 int MatrixMatrix::Tmult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C,bool keep_all_hard_zeros) {
1560  using Teuchos::RCP;
1561  using Teuchos::rcp;
1562 
1563 #ifdef ENABLE_MMM_TIMINGS
1564  using Teuchos::TimeMonitor;
1565  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T Transpose")));
1566 #endif
1567 
1568 
1569  /*************************************************************/
1570  /* 2/3) Call mult_A_B_newmatrix w/ fillComplete */
1571  /*************************************************************/
1572 #ifdef ENABLE_MMM_TIMINGS
1573  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T AB-core")));
1574 #endif
1575  RCP<Epetra_CrsMatrix> Ctemp;
1576 
1577  // If Atrans has no Exporter, we can use C instead of having to create a temp matrix
1578  bool needs_final_export = Atransview.origMatrix->Exporter() != 0;
1579  if(needs_final_export)
1580  Ctemp = rcp(new Epetra_CrsMatrix(Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0));
1581  else {
1582  EPETRA_CHK_ERR( C.ReplaceColMap(Bview.origMatrix->ColMap()) );
1583  Ctemp = rcp(&C,false);// don't allow deallocation
1584  }
1585 
1586  // Multiply
1587  std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols());
1588  for(int i=0; i<Bview.origMatrix->NumMyCols(); i++)
1589  Bcol2Ccol[i]=i;
1590  std::vector<int> Bimportcol2Ccol,Cremotepids;
1591  if(Bview.origMatrix->Importer())
1592  EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*Bview.origMatrix->Importer(),Cremotepids));
1593 
1594  EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview,
1595  Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
1596  *Ctemp,keep_all_hard_zeros));
1597 
1598  /*************************************************************/
1599  /* 4) ExportAndFillComplete matrix (if needed) */
1600  /*************************************************************/
1601 #ifdef ENABLE_MMM_TIMINGS
1602  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T ESFC")));
1603 #endif
1604 
1605  if(needs_final_export)
1606  C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),false);
1607 
1608  return 0;
1609 }
1610 
1611 
1612 
1613 /*****************************************************************************/
1614 /*****************************************************************************/
1615 /*****************************************************************************/
1616 int MatrixMatrix::mult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C, bool keep_all_hard_zeros) {
1617 
1618 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1619  if(Atransview.origMatrix->RowMap().GlobalIndicesInt() && Bview.origMatrix->RowMap().GlobalIndicesInt()) {
1620  return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C,keep_all_hard_zeros);
1621  }
1622  else
1623 #endif
1624 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1625  if(Atransview.origMatrix->RowMap().GlobalIndicesLongLong() && Bview.origMatrix->RowMap().GlobalIndicesLongLong()) {
1626  return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C,keep_all_hard_zeros);
1627  }
1628  else
1629 #endif
1630  throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown";
1631 }
1632 
1633 
1634 
1635 }//namespace EpetraExt
const int * LengthsFrom() const
LightweightCrsMatrix * importMatrix
bool DistributedGlobal() const
const Epetra_Map & RangeMap() const
std::vector< int > targetMapToOrigRow
int Resize(int Length_in)
virtual int SumIntoGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
int MaxLID() const
bool GlobalIndicesLongLong() const
void MyGlobalElementsPtr(int *&MyGlobalElementList) const
bool SameAs(const Epetra_BlockMap &Map) const
int NumSameIDs() const
int NumReceives() const
int MyGlobalElements(int *MyGlobalElementList) const
static int GetRemotePIDs(const Epetra_Import &Importer, std::vector< int > &RemotePIDs)
Epetra_IntSerialDenseVector & ExpertExtractIndexOffset()
int * ExportPIDs() const
bool GlobalIndicesInt() const
const Epetra_Map & ColMap() const
bool IndicesAreLocal() const
int MinLID() const
int mult_A_B_newmatrix(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, const CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)
int * ExportLIDs() const
virtual int MyPID() const =0
int FillComplete(bool OptimizeDataStorage=true)
int ExpertMakeUniqueCrsGraphData()
static int SortCrsEntries(int NumRows, const int *CRS_rowptr, int *CRS_colind, double *CRS_vals)
int NumExportIDs() const
const Epetra_Map & RowMap() const
int NumMyElements() const
void resize_doubles(int nold, int nnew, double *&d)
std::vector< int > targetMapToImportRow
const Epetra_Import * Importer() const
static void Sort(bool SortAscending, int NumKeys, T *Keys, int NumDoubleCompanions, double **DoubleCompanions, int NumIntCompanions, int **IntCompanions, int NumLongLongCompanions, long long **LongLongCompanions)
static int C_estimate_nnz(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B)
int jacobi_A_B_reuse(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
int NumMyRows() const
int LID(int GID) const
bool IndicesAreGlobal() const
int mult_A_B_general(const Epetra_CrsMatrix &A, CrsMatrixStruct &Aview, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, Epetra_CrsMatrix &C, bool call_FillComplete_on_result, bool keep_all_hard_zeros)
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map *&unionmap, std::vector< int > &Cremotepids, std::vector< int > &Bcols2Ccols, std::vector< int > &Icols2Ccols)
int jacobi_A_B_newmatrix(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
virtual int NumProc() const =0
Epetra_IntSerialDenseVector & ExpertExtractIndices()
bool Filled() const
const Epetra_Map & DomainMap() const
int mult_A_B_reuse(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)
const Epetra_Comm & Comm() const
int ExpertStaticFillComplete(const Epetra_Map &DomainMap, const Epetra_Map &RangeMap, const Epetra_Import *Importer=0, const Epetra_Export *Exporter=0, int NumMyDiagonals=-1)
double *& ExpertExtractValues()
int ReplaceColMap(const Epetra_BlockMap &newmap)
int ExtractCrsDataPointers(int *&IndexOffset, int *&Indices, double *&Values_in) const
static int GetPids(const Epetra_Import &Importer, std::vector< int > &pids, bool use_minus_one_for_local)
int NumMyNonzeros() const