49 #include <Epetra_Export.h>
50 #include <Epetra_Import.h>
51 #include <Epetra_Util.h>
52 #include <Epetra_Map.h>
53 #include <Epetra_Comm.h>
54 #include <Epetra_CrsMatrix.h>
55 #include <Epetra_Vector.h>
56 #include <Epetra_Directory.h>
57 #include <Epetra_HashTable.h>
58 #include <Epetra_Distributor.h>
59 #include <Epetra_IntSerialDenseVector.h>
66 #include <Epetra_MpiDistributor.h>
70 #include <Teuchos_TimeMonitor.hpp>
82 int nnzperrow=(int)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
85 return (
int)(A.
NumMyRows()*nnzperrow*0.75 + 100);
93 static inline int auto_resize(std::vector<int> &x,
int num_new){
94 int newsize=x.size() + EPETRA_MAX((
int)x.size(),num_new);
103 template<
typename int_type>
105 std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
109 #ifdef ENABLE_MMM_TIMINGS
110 using Teuchos::TimeMonitor;
111 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 1")));
117 int Bstart=0, Istart=0, Cstart=0,Pstart=0;
124 int_type * Bgids = 0;
125 BColMap.MyGlobalElementsPtr(Bgids);
127 int_type * Igids = 0;
131 if((
int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
132 if((
int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
140 if(!Distor) EPETRA_CHK_ERR(-2);
147 std::vector<int> Bpids(Nb), Ipids(Ni);
149 else Bpids.assign(Nb,-1);
163 std::vector<int_type> Cgids(Csize);
164 Cremotepids.resize(Psize);
166 #ifdef ENABLE_MMM_TIMINGS
167 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 2")));
179 for(i=0; i<DomainMap.
NumMyElements(); i++) Bcols2Ccols[i] = i;
184 for(i = 0; i < NumDomainElements; i++) {
185 int_type GID = (int_type) DomainMap.GID64(i);
186 int LID = BColMap.
LID(GID);
189 Bcols2Ccols[LID]=Cstart;
196 LID = IColMap.
LID(GID);
198 Icols2Ccols[LID]=Cstart;
207 for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
208 while(Cgids[j]!=Igids[i]) j++;
214 #ifdef ENABLE_MMM_TIMINGS
215 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 3")));
223 int initial_temp_length = 0;
224 const int * lengths_from=0;
227 for(i=0; i < Distor->
NumReceives(); i++) initial_temp_length += lengths_from[i];
229 else initial_temp_length=100;
231 std::vector<int_type> Btemp(initial_temp_length), Itemp(initial_temp_length);
232 std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
235 while (Bstart < Nb || Istart < Ni) {
236 int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
239 if(Bstart < Nb) Bproc=Bpids[Bstart];
240 if(Istart < Ni) Iproc=Ipids[Istart];
242 Cproc = (Bproc < Iproc)?Bproc:Iproc;
244 if(Bproc == Cproc && Iproc != Cproc) {
247 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
251 int tCsize = Bnext-Bstart;
252 if(Btemp.size() < (size_t)tCsize) {Btemp2.resize(tCsize);}
254 for(i=Bstart; i<Bnext; i++) {
255 Cremotepids[i-Bstart+Pstart] = Cproc;
256 Cgids[i-Bstart+Cstart] = Bgids[i];
257 Btemp2[i-Bstart] = i;
261 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
262 util.
Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
264 for(i=0, j=Cstart; i<tCsize; i++){
265 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
266 Bcols2Ccols[Btemp2[i]] = j;
272 else if(Bproc != Cproc && Iproc == Cproc) {
275 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
279 int tCsize = Inext-Istart;
280 if(Itemp.size() < (size_t)tCsize) {Itemp2.resize(tCsize);}
282 for(i=Istart; i<Inext; i++) {
283 Cremotepids[i-Istart+Pstart] = Cproc;
284 Cgids[i-Istart+Cstart] = Igids[i];
285 Itemp2[i-Istart] = i;
289 int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
290 util.
Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
292 for(i=0, j=Cstart; i<tCsize; i++){
293 while(Cgids[j] != Igids[Itemp2[i]]) j++;
294 Icols2Ccols[Itemp2[i]] = j;
304 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
308 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
312 int tBsize = Bnext-Bstart;
313 int tIsize = Inext-Istart;
315 if(Btemp.size() < (size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
316 if(Itemp.size() < (size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
318 for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
319 for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
322 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
323 util.
Sort(
true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0);
324 util.
Sort(
true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0);
325 typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
326 typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
328 for(i=0, j=Cstart; i<tBsize; i++){
329 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
330 Bcols2Ccols[Btemp2[i]] = j;
333 for(i=0, j=Cstart; i<tIsize; i++){
334 while(Cgids[j] != Igids[Itemp2[i]]) j++;
335 Icols2Ccols[Itemp2[i]] = j;
338 for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
339 Cstart = (last_el - mycstart) + Cstart;
340 Pstart = (last_el - mycstart) + Pstart;
346 #ifdef ENABLE_MMM_TIMINGS
347 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 4")));
351 Cremotepids.resize(Pstart);
357 unionmap=
new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.
ColMap().IndexBase64(),
371 double *tmp =
new double[nnew];
372 for(
int i=0; i<nold; i++)
383 template<
typename int_type>
387 std::vector<int> & Bcol2Ccol,
388 std::vector<int> & Bimportcol2Ccol,
389 std::vector<int>& Cremotepids,
391 bool keep_all_hard_zeros
393 #ifdef ENABLE_MMM_TIMINGS
394 using Teuchos::TimeMonitor;
395 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix SerialCore")));
404 int NumMyDiagonals=0;
411 int *Arowptr, *Acolind;
416 int *Browptr, *Bcolind;
420 int *Irowptr=0, *Icolind=0;
436 std::vector<int> c_status(n, -1);
440 if(CSR_alloc < n) CSR_alloc = n;
441 int CSR_ip=0,OLD_ip=0;
447 CSR_colind.
Resize(CSR_alloc);
451 std::vector<int> NumEntriesPerRow(m);
455 bool found_diagonal=
false;
456 CSR_rowptr[i]=CSR_ip;
458 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
460 double Aval = Avals[k];
461 if(!keep_all_hard_zeros && Aval==0)
continue;
466 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
467 int Cj=Bcol2Ccol[Bcolind[j]];
469 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
471 if(c_status[Cj]<OLD_ip){
474 CSR_colind[CSR_ip]=Cj;
475 CSR_vals[CSR_ip]=Aval*Bvals[j];
479 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
485 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
486 int Cj=Bimportcol2Ccol[Icolind[j]];
488 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
490 if(c_status[Cj]<OLD_ip){
493 CSR_colind[CSR_ip]=Cj;
494 CSR_vals[CSR_ip]=Aval*Ivals[j];
498 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
502 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
505 if(CSR_ip + n > CSR_alloc){
508 CSR_colind.
Resize(CSR_alloc);
513 CSR_rowptr[m]=CSR_ip;
515 #ifdef ENABLE_MMM_TIMINGS
516 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Final Sort")));
522 #ifdef ENABLE_MMM_TIMINGS
523 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Fast IE")));
528 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
530 int *ExportLIDs=0, *ExportPIDs=0;
550 #ifdef ENABLE_MMM_TIMINGS
551 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix ESFC")));
566 template<
typename int_type>
570 std::vector<int> & Bcol2Ccol,
571 std::vector<int> & Bimportcol2Ccol,
573 bool keep_all_hard_zeros){
575 #ifdef ENABLE_MMM_TIMINGS
576 using Teuchos::TimeMonitor;
577 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse SerialCore")));
590 int *Arowptr, *Acolind;
595 int *Browptr, *Bcolind;
599 int *Irowptr=0, *Icolind=0;
608 int *CSR_rowptr, *CSR_colind;
617 std::vector<int> c_status(n, -1);
620 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
621 int CSR_ip=0,OLD_ip=0;
625 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
627 double Aval = Avals[k];
628 if(!keep_all_hard_zeros && Aval==0)
continue;
633 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
634 int Cj=Bcol2Ccol[Bcolind[j]];
636 if(c_status[Cj]<OLD_ip){
638 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
640 CSR_colind[CSR_ip]=Cj;
641 CSR_vals[CSR_ip]=Aval*Bvals[j];
645 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
651 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
652 int Cj=Bimportcol2Ccol[Icolind[j]];
654 if(c_status[Cj]<OLD_ip){
656 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
658 CSR_colind[CSR_ip]=Cj;
659 CSR_vals[CSR_ip]=Aval*Ivals[j];
663 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
670 #ifdef ENABLE_MMM_TIMINGS
671 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Final Sort")));
686 template<
typename int_type>
692 bool call_FillComplete_on_result,
693 bool keep_all_hard_zeros)
698 int C_firstCol_import = 0;
699 int C_lastCol_import = -1;
702 Bview.
colMap->MyGlobalElementsPtr(bcols);
703 int_type* bcols_import = NULL;
710 int C_numCols = C_lastCol - C_firstCol + 1;
711 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
713 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
716 double* dwork =
new double[C_numCols];
717 int_type* iwork =
new int_type[C_numCols];
718 int_type *c_cols=iwork;
719 double *c_vals=dwork;
720 int *c_index=
new int[C_numCols];
726 int *Arowptr, *Acolind;
738 for(k=0;k<C_numCols;k++) c_index[k]=-1;
743 int_type global_row = (int_type) Aview.
rowMap->GID64(i);
791 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
793 double Aval = Avals[k];
795 if(Bview.
remote[Ak] || (!keep_all_hard_zeros && Aval==0))
continue;
797 int* Bcol_inds = Bview.
indices[Ak];
798 double* Bvals_k = Bview.
values[Ak];
801 int col=Bcol_inds[j];
808 c_cols[c_current]=col;
809 c_vals[c_current]=Aval*Bvals_k[j];
810 c_index[col]=c_current;
815 c_vals[c_index[col]]+=Aval*Bvals_k[j];
820 for(k=0; k<c_current; k++){
821 c_index[c_cols[k]]=-1;
822 c_cols[k]=bcols[c_cols[k]];
852 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
854 double Aval = Avals[k];
856 if(!Bview.
remote[Ak] || Aval==0)
continue;
858 int* Bcol_inds = Bview.
indices[Ak];
859 double* Bvals_k = Bview.
values[Ak];
862 int col=Bcol_inds[j];
864 c_cols[c_current]=col;
865 c_vals[c_current]=Aval*Bvals_k[j];
866 c_index[col]=c_current;
870 c_vals[c_index[col]]+=Aval*Bvals_k[j];
875 for(k=0; k<c_current; k++){
876 c_index[c_cols[k]]=-1;
877 c_cols[k]=bcols_import[c_cols[k]];
897 if(call_FillComplete_on_result)
913 template<
typename int_type>
915 CrsMatrixStruct & Aview,
917 CrsMatrixStruct& Bview,
919 bool call_FillComplete_on_result,
920 bool keep_all_hard_zeros){
927 std::vector<int> Cremotepids;
929 std::vector<int> Bimportcol2Ccol;
930 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
932 #ifdef ENABLE_MMM_TIMINGS
933 using Teuchos::TimeMonitor;
934 Teuchos::RCP<Teuchos::TimeMonitor> MM;
938 if(!call_FillComplete_on_result) {
939 #ifdef ENABLE_MMM_TIMINGS
940 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
942 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,
false,keep_all_hard_zeros);
950 int *C_rowptr, *C_colind;
953 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
956 if(!NewFlag && ExtractFailFlag){
957 #ifdef ENABLE_MMM_TIMINGS
958 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
960 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result,keep_all_hard_zeros);
965 #ifdef ENABLE_MMM_TIMINGS
966 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix CMap")));
967 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse CMap")));
972 if(Bview.importMatrix) {
973 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
986 #ifdef ENABLE_MMM_TIMINGS
987 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Lookups")));
988 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Lookups")));
997 if(colmap_B->
SameAs(*colmap_C)){
1005 Bcol2Ccol[i]=colmap_C->
LID((int_type) colmap_B->GID64(i));
1006 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1010 if(Bview.importMatrix){
1011 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1012 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1013 Bimportcol2Ccol[i]=colmap_C->
LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1014 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1019 #ifdef ENABLE_MMM_TIMINGS
1025 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C,keep_all_hard_zeros));
1029 EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C,keep_all_hard_zeros));
1042 CrsMatrixStruct & Aview,
1044 CrsMatrixStruct& Bview,
1046 bool call_FillComplete_on_result,\
1047 bool keep_all_hard_zeros){
1049 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1051 return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
1055 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1057 return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
1061 throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown";
1068 template<
typename int_type>
1074 std::vector<int> & Bcol2Ccol,
1075 std::vector<int> & Bimportcol2Ccol,
1078 #ifdef ENABLE_MMM_TIMINGS
1079 using Teuchos::TimeMonitor;
1080 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse SerialCore")));
1093 int *Arowptr, *Acolind;
1098 int *Browptr, *Bcolind;
1102 int *Irowptr=0, *Icolind=0;
1111 const double *Dvals = Dinv.Values();
1114 int *CSR_rowptr, *CSR_colind;
1123 std::vector<int> c_status(n, -1);
1126 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
1127 int CSR_ip=0,OLD_ip=0;
1131 double Dval = Dvals[i];
1134 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1136 double Bval = Bvals[k];
1137 if(Bval==0)
continue;
1138 int Ck=Bcol2Ccol[Bcolind[k]];
1141 c_status[Ck]=CSR_ip;
1142 CSR_colind[CSR_ip]=Ck;
1143 CSR_vals[CSR_ip]= Bvals[k];
1148 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1150 double Aval = Avals[k];
1151 if(Aval==0)
continue;
1156 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1157 int Cj=Bcol2Ccol[Bcolind[j]];
1159 if(c_status[Cj]<OLD_ip){
1161 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
1162 c_status[Cj]=CSR_ip;
1163 CSR_colind[CSR_ip]=Cj;
1164 CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
1168 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1174 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1175 int Cj=Bimportcol2Ccol[Icolind[j]];
1177 if(c_status[Cj]<OLD_ip){
1179 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
1180 c_status[Cj]=CSR_ip;
1181 CSR_colind[CSR_ip]=Cj;
1182 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1186 CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
1193 #ifdef ENABLE_MMM_TIMINGS
1194 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Final Sort")));
1206 template<
typename int_type>
1212 std::vector<int> & Bcol2Ccol,
1213 std::vector<int> & Bimportcol2Ccol,
1214 std::vector<int>& Cremotepids,
1217 #ifdef ENABLE_MMM_TIMINGS
1218 using Teuchos::TimeMonitor;
1219 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix SerialCore")));
1226 int NumMyDiagonals=0;
1233 int *Arowptr, *Acolind;
1238 int *Browptr, *Bcolind;
1243 const double *Dvals = Dinv.Values();
1245 int *Irowptr=0, *Icolind=0;
1261 std::vector<int> c_status(n, -1);
1266 int CSR_ip=0,OLD_ip=0;
1272 CSR_colind.
Resize(CSR_alloc);
1276 std::vector<int> NumEntriesPerRow(m);
1280 bool found_diagonal=
false;
1281 CSR_rowptr[i]=CSR_ip;
1282 double Dval = Dvals[i];
1285 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1287 double Bval = Bvals[k];
1288 if(Bval==0)
continue;
1289 int Ck=Bcol2Ccol[Bcolind[k]];
1292 c_status[Ck]=CSR_ip;
1293 CSR_colind[CSR_ip]=Ck;
1294 CSR_vals[CSR_ip]= Bvals[k];
1299 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1300 int Ak = Acolind[k];
1301 double Aval = Avals[k];
1302 if(Aval==0)
continue;
1307 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1308 int Cj=Bcol2Ccol[Bcolind[j]];
1310 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
1312 if(c_status[Cj]<OLD_ip){
1314 c_status[Cj]=CSR_ip;
1315 CSR_colind[CSR_ip]=Cj;
1316 CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
1320 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1326 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1327 int Cj=Bimportcol2Ccol[Icolind[j]];
1329 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
1331 if(c_status[Cj]<OLD_ip){
1333 c_status[Cj]=CSR_ip;
1334 CSR_colind[CSR_ip]=Cj;
1335 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1339 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
1343 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
1346 if(CSR_ip + n > CSR_alloc){
1349 CSR_colind.
Resize(CSR_alloc);
1354 CSR_rowptr[m]=CSR_ip;
1356 #ifdef ENABLE_MMM_TIMINGS
1357 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Final Sort")));
1363 #ifdef ENABLE_MMM_TIMINGS
1364 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Fast IE")));
1369 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
1371 int *ExportLIDs=0, *ExportPIDs=0;
1387 #ifdef ENABLE_MMM_TIMINGS
1388 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix ESFC")));
1402 template<
typename int_type>
1403 int MatrixMatrix::Tjacobi_A_B(
double omega,
1408 CrsMatrixStruct& Bview,
1410 bool call_FillComplete_on_result){
1416 std::vector<int> Cremotepids;
1418 std::vector<int> Bimportcol2Ccol;
1419 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1421 #ifdef ENABLE_MMM_TIMINGS
1422 using Teuchos::TimeMonitor;
1423 Teuchos::RCP<Teuchos::TimeMonitor> MM;
1427 if(!call_FillComplete_on_result) {
1428 #ifdef ENABLE_MMM_TIMINGS
1429 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
1431 throw std::runtime_error(
"jacobi_A_B_general not implemented");
1440 int *C_rowptr, *C_colind;
1443 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
1446 if(!NewFlag && ExtractFailFlag){
1447 #ifdef ENABLE_MMM_TIMINGS
1448 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
1450 throw std::runtime_error(
"jacobi_A_B_general not implemented");
1455 #ifdef ENABLE_MMM_TIMINGS
1456 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix CMap")));
1457 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse CMap")));
1462 if(Bview.importMatrix) {
1463 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
1476 #ifdef ENABLE_MMM_TIMINGS
1477 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix Lookups")));
1478 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Lookups")));
1487 if(colmap_B->
SameAs(*colmap_C)){
1495 Bcol2Ccol[i]=colmap_C->
LID((int_type) colmap_B->GID64(i));
1496 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1500 if(Bview.importMatrix){
1501 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1502 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1503 Bimportcol2Ccol[i]=colmap_C->
LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1504 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1513 EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1517 EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1529 int MatrixMatrix::jacobi_A_B(
double omega,
1532 CrsMatrixStruct & Aview,
1534 CrsMatrixStruct& Bview,
1536 bool call_FillComplete_on_result)
1538 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1540 return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1544 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1546 return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1550 throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown";
1558 template<
typename int_type>
1559 int MatrixMatrix::Tmult_AT_B_newmatrix(
const CrsMatrixStruct & Atransview,
const CrsMatrixStruct & Bview,
Epetra_CrsMatrix & C,
bool keep_all_hard_zeros) {
1563 #ifdef ENABLE_MMM_TIMINGS
1564 using Teuchos::TimeMonitor;
1565 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T Transpose")));
1572 #ifdef ENABLE_MMM_TIMINGS
1573 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T AB-core")));
1575 RCP<Epetra_CrsMatrix> Ctemp;
1578 bool needs_final_export = Atransview.origMatrix->Exporter() != 0;
1579 if(needs_final_export)
1580 Ctemp = rcp(
new Epetra_CrsMatrix(Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0));
1582 EPETRA_CHK_ERR( C.
ReplaceColMap(Bview.origMatrix->ColMap()) );
1583 Ctemp = rcp(&C,
false);
1587 std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols());
1588 for(
int i=0; i<Bview.origMatrix->NumMyCols(); i++)
1590 std::vector<int> Bimportcol2Ccol,Cremotepids;
1591 if(Bview.origMatrix->Importer())
1594 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview,
1595 Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
1596 *Ctemp,keep_all_hard_zeros));
1601 #ifdef ENABLE_MMM_TIMINGS
1602 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T ESFC")));
1605 if(needs_final_export)
1606 C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),
false);
1616 int MatrixMatrix::mult_AT_B_newmatrix(
const CrsMatrixStruct & Atransview,
const CrsMatrixStruct & Bview,
Epetra_CrsMatrix & C,
bool keep_all_hard_zeros) {
1618 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1619 if(Atransview.origMatrix->RowMap().GlobalIndicesInt() && Bview.origMatrix->RowMap().GlobalIndicesInt()) {
1620 return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C,keep_all_hard_zeros);
1624 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1625 if(Atransview.origMatrix->RowMap().GlobalIndicesLongLong() && Bview.origMatrix->RowMap().GlobalIndicesLongLong()) {
1626 return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C,keep_all_hard_zeros);
1630 throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown";
const int * LengthsFrom() const
LightweightCrsMatrix * importMatrix
bool DistributedGlobal() const
const Epetra_Map & RangeMap() const
std::vector< int > targetMapToOrigRow
int Resize(int Length_in)
virtual int SumIntoGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
bool GlobalIndicesLongLong() const
void MyGlobalElementsPtr(int *&MyGlobalElementList) const
bool SameAs(const Epetra_BlockMap &Map) const
int MyGlobalElements(int *MyGlobalElementList) const
static int GetRemotePIDs(const Epetra_Import &Importer, std::vector< int > &RemotePIDs)
const Epetra_Map * rowMap
Epetra_IntSerialDenseVector & ExpertExtractIndexOffset()
std::vector< int > colind_
std::vector< int > ExportPIDs_
bool GlobalIndicesInt() const
const Epetra_Map & ColMap() const
bool IndicesAreLocal() const
int mult_A_B_newmatrix(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, const CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)
virtual int MyPID() const =0
int FillComplete(bool OptimizeDataStorage=true)
int ExpertMakeUniqueCrsGraphData()
static int SortCrsEntries(int NumRows, const int *CRS_rowptr, int *CRS_colind, double *CRS_vals)
const Epetra_Map & RowMap() const
int NumMyElements() const
void resize_doubles(int nold, int nnew, double *&d)
std::vector< int > targetMapToImportRow
const Epetra_Import * Importer() const
int NumMyElements() const
static int C_estimate_nnz(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B)
int jacobi_A_B_reuse(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
std::vector< double > vals_
bool IndicesAreGlobal() const
int mult_A_B_general(const Epetra_CrsMatrix &A, CrsMatrixStruct &Aview, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, Epetra_CrsMatrix &C, bool call_FillComplete_on_result, bool keep_all_hard_zeros)
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map *&unionmap, std::vector< int > &Cremotepids, std::vector< int > &Bcols2Ccols, std::vector< int > &Icols2Ccols)
int jacobi_A_B_newmatrix(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
virtual int NumProc() const =0
Epetra_IntSerialDenseVector & ExpertExtractIndices()
const Epetra_Map & DomainMap() const
static void EPETRA_LIB_DLL_EXPORT Sort(bool SortAscending, int NumKeys, T *Keys, int NumDoubleCompanions, double **DoubleCompanions, int NumIntCompanions, int **IntCompanions, int NumLongLongCompanions, long long **LongLongCompanions)
std::vector< int > ColMapOwningPIDs_
std::vector< int > ExportLIDs_
std::vector< int > rowptr_
const Epetra_Map * colMap
int mult_A_B_reuse(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)
const Epetra_Comm & Comm() const
int ExpertStaticFillComplete(const Epetra_Map &DomainMap, const Epetra_Map &RangeMap, const Epetra_Import *Importer=0, const Epetra_Export *Exporter=0, int NumMyDiagonals=-1)
double *& ExpertExtractValues()
int ReplaceColMap(const Epetra_BlockMap &newmap)
int ExtractCrsDataPointers(int *&IndexOffset, int *&Indices, double *&Values_in) const
static int GetPids(const Epetra_Import &Importer, std::vector< int > &pids, bool use_minus_one_for_local)
int NumMyNonzeros() const