10 #include <fei_Reducer.hpp>
11 #include <fei_MatrixGraph.hpp>
12 #include <fei_Matrix.hpp>
13 #include <fei_Matrix_core.hpp>
14 #include <fei_Vector.hpp>
15 #include <fei_Graph_Impl.hpp>
16 #include <fei_ArrayUtils.hpp>
17 #include <fei_TemplateUtils.hpp>
18 #include <fei_SparseRowGraph.hpp>
19 #include <fei_Vector.hpp>
20 #include <fei_impl_utils.hpp>
47 localUnreducedEqns_(),
54 firstLocalReducedEqn_(0),
55 lastLocalReducedEqn_(0),
56 lowestGlobalSlaveEqn_(0),
57 highestGlobalSlaveEqn_(0),
61 dbgprefix_(
"Reducer: "),
71 csrD_ = *globalSlaveDependencyMatrix;
72 if (g_vector.
get() != NULL) {
82 numGlobalSlaves_ = csrD_.getNumRows();
83 slavesPtr_ = &((csrD_.getGraph().rowNumbers)[0]);
84 lowestGlobalSlaveEqn_ = slavesPtr_[0];
85 highestGlobalSlaveEqn_ = slavesPtr_[numGlobalSlaves_-1];
87 if (csg_.size() > 0) {
88 double* gptr = &(csg_.coefs()[0]);
89 for(
size_t i=0; i<csg_.size(); ++i) {
103 nonslaves_.resize(numGlobalSlaves_*2);
105 int nonslave_counter = 0;
107 int num_consecutive = 0;
108 while(slaveOffset < numGlobalSlaves_-1) {
109 int gap = slavesPtr_[slaveOffset+1]-slavesPtr_[slaveOffset]-1;
111 nonslaves_[nonslave_counter] = slavesPtr_[slaveOffset]+1;
112 nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
121 nonslaves_[nonslave_counter] = highestGlobalSlaveEqn_+1;
122 nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
124 reverse_.resize(nonslave_counter);
125 int first = lowestGlobalSlaveEqn_;
127 for(
int i=1; i<nonslave_counter; ++i) {
128 reverse_[i] = reverse_[i-1] +
129 (nonslaves_[i]-nonslaves_[i-1] - nonslaves_[numGlobalSlaves_+i] - 1);
133 MPI_Comm_rank(comm_, &localProc_);
134 MPI_Comm_size(comm_, &numProcs_);
137 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
138 FEI_OSTREAM& os = *output_stream_;
139 os << dbgprefix_<<
"ctor, numGlobalSlaves="<<numGlobalSlaves_
143 if (numGlobalSlaves_ < 1) {
144 throw std::runtime_error(
"ERROR: don't use fei::Reducer when numGlobalSlaves==0. Report to Alan Williams.");
163 localUnreducedEqns_(),
170 firstLocalReducedEqn_(0),
171 lastLocalReducedEqn_(0),
172 lowestGlobalSlaveEqn_(0),
173 highestGlobalSlaveEqn_(0),
177 dbgprefix_(
"Reducer: "),
189 std::vector<int> indices;
191 setLocalUnreducedEqns(indices);
196 delete [] isSlaveEqn_; isSlaveEqn_ = 0;
197 delete [] bool_array_; bool_array_ = 0;
198 delete [] int_array_; int_array_ = 0;
199 delete [] double_array_; double_array_ = 0;
204 Reducer::setLocalUnreducedEqns(
const std::vector<int>& localUnreducedEqns)
206 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
207 FEI_OSTREAM& os = *output_stream_;
208 os << dbgprefix_<<
"setLocalUnreducedEqns, numLocalEqns="
209 <<localUnreducedEqns.size() << FEI_ENDL;
212 if (localUnreducedEqns_ != localUnreducedEqns) {
213 localUnreducedEqns_ = localUnreducedEqns;
216 int num = localUnreducedEqns_.size();
218 if (isSlaveEqn_ != NULL)
delete [] isSlaveEqn_;
220 isSlaveEqn_ = num > 0 ?
new bool[localUnreducedEqns_.size()] : NULL;
224 for(
size_t i=0; i<localUnreducedEqns_.size(); ++i) {
226 slavesPtr_, numGlobalSlaves_);
228 isSlaveEqn_[i] =
false;
231 isSlaveEqn_[i] =
true;
234 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
235 FEI_OSTREAM& os = *output_stream_;
236 os << dbgprefix_<<
" slave " << localUnreducedEqns_[i] <<
" depends on ";
237 int offset = csrD_.getGraph().rowOffsets[idx];
238 int rowlen = csrD_.getGraph().rowOffsets[idx+1]-offset;
239 int* indices = &(csrD_.getGraph().packedColumnIndices[offset]);
240 for(
int j=0; j<rowlen; ++j) {
241 os << indices[j] <<
" ";
249 int num_slaves_on_lower_procs = 0;
255 std::vector<int> procNumLocalSlaves(numProcs_);
257 MPI_Allgather(&numLocalSlaves_, 1, MPI_INT, &procNumLocalSlaves[0],
260 for(
int p=0; p<localProc_; ++p) {
261 num_slaves_on_lower_procs += procNumLocalSlaves[p];
266 if (!localUnreducedEqns_.empty()) {
267 unsigned first_non_slave_offset = 0;
268 while(first_non_slave_offset < localUnreducedEqns_.size() &&
269 isSlaveEqn_[first_non_slave_offset] ==
true) {
270 ++first_non_slave_offset;
273 firstLocalReducedEqn_ = localUnreducedEqns_[first_non_slave_offset]
274 - num_slaves_on_lower_procs - first_non_slave_offset;
276 int num_local_eqns = localUnreducedEqns_.size() - numLocalSlaves_;
278 lastLocalReducedEqn_ = firstLocalReducedEqn_ + num_local_eqns - 1;
280 localReducedEqns_.resize(num_local_eqns);
284 int eqn = firstLocalReducedEqn_;
285 for(
unsigned i=0; i<localUnreducedEqns_.size(); ++i) {
286 if (isSlaveEqn_[i] ==
false) {
287 localReducedEqns_[offset++] = eqn++;
291 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
292 FEI_OSTREAM& os = *output_stream_;
293 if (localUnreducedEqns_.size() > 0) {
294 os << dbgprefix_<<
"first local eqn="
295 <<localUnreducedEqns_[0]<<
", last local eqn="
296 <<localUnreducedEqns_[localUnreducedEqns_.size()-1] << FEI_ENDL;
298 os << dbgprefix_<<
"numLocalSlaves_="<<numLocalSlaves_
299 <<
", firstLocalReducedEqn_="<<firstLocalReducedEqn_
300 <<
", lastLocalReducedEqn_="<<lastLocalReducedEqn_<<FEI_ENDL;
307 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
308 FEI_OSTREAM& os = *output_stream_;
309 os << dbgprefix_<<
"addGraphEntries"<<FEI_ENDL;
315 std::vector<int>& rowNumbers = matrixGraph->
rowNumbers;
316 std::vector<int>& rowOffsets = matrixGraph->
rowOffsets;
319 for(
unsigned i=0; i<rowNumbers.size(); ++i) {
320 int row = rowNumbers[i];
322 bool slave_row = isSlaveEqn(row);
324 int rowLength = rowOffsets[i+1]-rowOffsets[i];
325 int* cols = &packedCols[rowOffsets[i]];
328 fei::CSVec* Kdd_row = Kdd_.create_or_getRow(row);
329 fei::CSVec* Kdi_row = Kdi_.create_or_getRow(row);
331 for(
int j=0; j<rowLength; ++j) {
333 bool slave_col = isSlaveEqn(col);
336 add_entry(*Kdd_row, col, 0.0);
339 add_entry(*Kdi_row, col, 0.0);
346 fei::CSVec* Kid_row = Kid_.create_or_getRow(row);
347 fei::CSVec* Kii_row = Kii_.create_or_getRow(row);
349 for(
int j=0; j<rowLength; ++j) {
351 bool slave_col = isSlaveEqn(col);
354 add_entry(*Kid_row, col, 0.0);
357 add_entry(*Kii_row, col, 0.0);
365 Reducer::expand_work_arrays(
int size)
367 if (size <= array_len_)
return;
370 delete [] bool_array_;
371 delete [] int_array_;
372 delete [] double_array_;
373 bool_array_ =
new bool[array_len_];
374 int_array_ =
new int[array_len_];
375 double_array_ =
new double[array_len_];
379 Reducer::addGraphIndices(
int numRows,
const int* rows,
380 int numCols,
const int* cols,
383 expand_work_arrays(numCols);
385 bool no_slave_cols =
true;
386 for(
int i=0; i<numCols; ++i) {
387 bool_array_[i] = isSlaveEqn(cols[i]);
388 if (bool_array_[i]) no_slave_cols =
false;
391 for(
int i=0; i<numRows; ++i) {
392 bool slave_row = isSlaveEqn(rows[i]);
395 fei::CSVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
396 fei::CSVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
398 for(
int j=0; j<numCols; ++j) {
399 if (bool_array_[j]) {
400 add_entry(*Kdd_row, cols[j], 0.0);
403 add_entry(*Kdi_row, cols[j], 0.0);
412 NULL : Kid_.create_or_getRow(rows[i]);
414 unsigned num_non_slave_cols = 0;
416 for(
int j=0; j<numCols; ++j) {
417 if (bool_array_[j]) {
418 add_entry(*Kid_row, cols[j], 0.0);
422 int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
426 if (num_non_slave_cols > 0) {
427 int reduced_row = translateToReducedEqn(rows[i]);
428 graph.
addIndices(reduced_row, num_non_slave_cols, int_array_);
433 if (mat_counter_ > 600) {
434 assembleReducedGraph(&graph,
false);
439 Reducer::addSymmetricGraphIndices(
int numIndices,
const int* indices,
444 for(
int i=0; i<numIndices; ++i) {
445 addGraphIndices(1, &indices[i], 1, &indices[i], graph);
449 addGraphIndices(numIndices, indices, numIndices, indices, graph);
454 Reducer::assembleReducedGraph(
fei::Graph* graph,
457 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
458 FEI_OSTREAM& os = *output_stream_;
459 os << dbgprefix_<<
"assembleReducedGraph(fei::Graph)"<<FEI_ENDL;
460 if (output_level_ >= fei::FULL_LOGS) {
461 os << dbgprefix_<<
"Kdi:"<<FEI_ENDL<<Kdi_;
477 fei::impl_utils::translate_to_reduced_eqns(*
this, tmpMat1_);
478 fei::impl_utils::translate_to_reduced_eqns(*
this, tmpMat2_);
480 fei::impl_utils::add_to_graph(tmpMat1_, *graph);
481 fei::impl_utils::add_to_graph(tmpMat2_, *graph);
490 fei::impl_utils::translate_to_reduced_eqns(*
this, tmpMat2_);
492 fei::impl_utils::add_to_graph(tmpMat2_, *graph);
496 fei::impl_utils::translate_to_reduced_eqns(*
this, csrKii);
497 fei::impl_utils::add_to_graph(csrKii, *graph);
514 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
515 FEI_OSTREAM& os = *output_stream_;
516 os << dbgprefix_<<
"assembleReducedGraph(fei::SparseRowGraph)"<<FEI_ENDL;
519 fei::Graph_Impl graph(comm_, firstLocalReducedEqn_, lastLocalReducedEqn_);
520 assembleReducedGraph(&graph);
525 Reducer::assembleReducedMatrix(
fei::Matrix& matrix)
527 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
528 FEI_OSTREAM& os = *output_stream_;
529 os << dbgprefix_<<
"assembleReducedMatrix(fei::Matrix)"<<FEI_ENDL;
530 if (output_level_ >= fei::FULL_LOGS) {
531 os << dbgprefix_<<
"Kid:"<<FEI_ENDL<<Kid_;
532 os << dbgprefix_<<
"Kdi:"<<FEI_ENDL<<Kdi_;
546 fei::impl_utils::translate_to_reduced_eqns(*
this, tmpMat1_);
548 fei::impl_utils::add_to_matrix(tmpMat1_,
true, matrix);
550 fei::impl_utils::translate_to_reduced_eqns(*
this, tmpMat2_);
551 fei::impl_utils::add_to_matrix(tmpMat2_,
true, matrix);
565 fi_.subtract(tmpVec1_);
573 fi_.subtract(tmpVec1_);
577 fei::impl_utils::translate_to_reduced_eqns(*
this, tmpMat2_);
578 fei::impl_utils::add_to_matrix(tmpMat2_,
true, matrix);
582 fei::impl_utils::translate_to_reduced_eqns(*
this, csrKii);
583 fei::impl_utils::add_to_matrix(csrKii,
true, matrix);
594 Reducer::isSlaveEqn(
int unreducedEqn)
const
596 int num = localUnreducedEqns_.size();
598 int offset = num>0 ? unreducedEqn - localUnreducedEqns_[0] : -1;
599 if (offset < 0 || offset >= (
int)localUnreducedEqns_.size()) {
600 return(isSlaveCol(unreducedEqn));
603 return(isSlaveEqn_[offset]);
607 Reducer::getSlaveMasterEqns(
int slaveEqn, std::vector<int>& masterEqns)
611 std::vector<int>& rows = csrD_.getGraph().rowNumbers;
613 std::vector<int>::iterator iter =
614 std::lower_bound(rows.begin(), rows.end(), slaveEqn);
616 if (iter == rows.end() || *iter != slaveEqn) {
620 size_t offset = iter - rows.begin();
622 int rowBegin = csrD_.getGraph().rowOffsets[offset];
623 int rowEnd = csrD_.getGraph().rowOffsets[offset+1];
624 std::vector<int>& cols = csrD_.getGraph().packedColumnIndices;
626 for(
int j=rowBegin; j<rowEnd; ++j) {
627 masterEqns.push_back(cols[j]);
632 Reducer::isSlaveCol(
int unreducedEqn)
const
635 slavesPtr_, numGlobalSlaves_);
641 Reducer::translateToReducedEqn(
int eqn)
const
643 if (eqn < lowestGlobalSlaveEqn_) {
647 if (eqn > highestGlobalSlaveEqn_) {
648 return(eqn - numGlobalSlaves_);
655 if (foundOffset >= 0) {
656 throw std::runtime_error(
"Reducer::translateToReducedEqn ERROR, input is slave eqn.");
663 Reducer::translateFromReducedEqn(
int reduced_eqn)
const
667 reverse_.size(), index);
669 return(nonslaves_[offset]);
676 int adjustment = reduced_eqn - reverse_[index-1];
678 return(nonslaves_[index-1] + adjustment);
682 Reducer::addMatrixValues(
int numRows,
const int* rows,
683 int numCols,
const int* cols,
684 const double*
const* values,
689 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
690 FEI_OSTREAM& os = *output_stream_;
691 os << dbgprefix_<<
"addMatrixValues(fei::Matrix)"<<FEI_ENDL;
694 expand_work_arrays(numCols+numRows);
696 const double** myvalues =
const_cast<const double**
>(values);
697 if (format != FEI_DENSE_ROW) {
698 if (format != FEI_DENSE_COL) {
699 throw std::runtime_error(
"fei::Reducer::addMatrixValues ERROR, submatrix format must be either FEI_DENSE_ROW or FEI_DENSE_COL. Other formats not supported with slave constraints.");
702 fei::Matrix_core::copyTransposeToWorkArrays(numRows, numCols, values,
704 myvalues = &work_2D_[0];
707 bool no_slave_cols =
true;
708 unsigned num_non_slave_cols = 0;
709 for(
int j=0; j<numCols; ++j) {
710 bool_array_[j] = isSlaveEqn(cols[j]);
711 if (bool_array_[j]) no_slave_cols =
false;
712 else int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
715 bool no_slave_rows =
true;
716 for(
int i=0; i<numRows; ++i) {
717 bool_array_[numCols+i] = isSlaveEqn(rows[i]);
718 if (bool_array_[numCols+i]) no_slave_rows =
false;
719 else int_array_[numCols+i] = translateToReducedEqn(rows[i]);
722 if (no_slave_rows && no_slave_cols) {
724 feimat.
sumIn(numRows, int_array_+numCols,
725 numCols, int_array_, myvalues, FEI_DENSE_ROW);
728 feimat.
copyIn(numRows, int_array_+numCols,
729 numCols, int_array_, myvalues, FEI_DENSE_ROW);
735 for(
int i=0; i<numRows; ++i) {
736 if (bool_array_[numCols+i]) {
739 fei::CSVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
740 fei::CSVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
742 for(
int j=0; j<numCols; ++j) {
743 if (bool_array_[j]) {
744 add_entry(*Kdd_row, cols[j], myvalues[i][j]);
747 add_entry(*Kdi_row, cols[j], myvalues[i][j]);
754 int reduced_row = int_array_[numCols+i];
755 const double* rowvals = myvalues[i];
757 feimat.
sumIn(1, &reduced_row, numCols, int_array_,
761 feimat.
copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
769 fei::CSVec* Kid_row = Kid_.create_or_getRow(rows[i]);
772 for(
int j=0; j<numCols; ++j) {
773 if (bool_array_[j]) {
774 add_entry(*Kid_row, cols[j], myvalues[i][j]);
778 double_array_[offset++] = myvalues[i][j];
782 if (num_non_slave_cols > 0) {
783 int reduced_row = int_array_[numCols+i];
785 feimat.
sumIn(1, &reduced_row, num_non_slave_cols, int_array_,
786 &double_array_, format);
789 feimat.
copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
790 &double_array_, format);
796 if (mat_counter_ > 600) {
797 assembleReducedMatrix(feimat);
804 Reducer::addVectorValues(
int numValues,
805 const int* globalIndices,
806 const double* values,
812 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
813 FEI_OSTREAM& os = *output_stream_;
814 os << dbgprefix_<<
"addVectorValues(fei::Vector)"<<FEI_ENDL;
817 for(
int i=0; i<numValues; ++i) {
818 if (isSlaveEqn(globalIndices[i])) {
820 if (!soln_vector) add_entry(fd_, globalIndices[i], values[i]);
823 if (!soln_vector) put_entry(fd_, globalIndices[i], values[i]);
825 if (!soln_vector) ++rhs_vec_counter_;
828 int reduced_index = translateToReducedEqn(globalIndices[i]);
831 feivec.
sumIn(1, &reduced_index, &values[i], vectorIndex);
834 feivec.
copyIn(1, &reduced_index, &values[i], vectorIndex);
839 if (rhs_vec_counter_ > 600) {
840 assembleReducedVector(soln_vector, feivec);
847 Reducer::assembleReducedVector(
bool soln_vector,
850 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
851 FEI_OSTREAM& os = *output_stream_;
852 os << dbgprefix_<<
"assembleReducedVector(fei::Vector)"<<FEI_ENDL;
861 if (vec.size() > 0) {
868 fei::impl_utils::translate_to_reduced_eqns(*
this, tmpVec1_);
870 int which_vector = 0;
871 feivec.
sumIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
872 &(tmpVec1_.coefs()[0]), which_vector);
877 if (vec_i.size() > 0) {
879 fei::impl_utils::translate_to_reduced_eqns(*
this, csvec_i);
881 int which_vector = 0;
882 feivec.
sumIn(csvec_i.size(), &(csvec_i.indices()[0]),
883 &(csvec_i.coefs()[0]), which_vector);
888 rhs_vec_counter_ = 0;
892 Reducer::copyOutVectorValues(
int numValues,
893 const int* globalIndices,
899 if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
900 FEI_OSTREAM& os = *output_stream_;
901 os << dbgprefix_<<
"copyOutVectorValues"<<FEI_ENDL;
906 std::vector<int> reduced_indices;
907 std::vector<int> offsets;
909 for(
int i=0; i<numValues; ++i) {
910 if (isSlaveEqn(globalIndices[i])) {
911 fei::put_entry(tmpVec1_, globalIndices[i], 0.0);
912 offsets.push_back(i);
915 int reduced_idx = translateToReducedEqn(globalIndices[i]);
916 feivec.
copyOut(1, &reduced_idx, &values[i], vectorIndex);
920 if (tmpVec1_.size() > 0) {
922 int* tmpVec2Indices = tmpVec2_.indices().empty() ? NULL : &(tmpVec2_.indices()[0]);
923 double* tmpVec2Coefs = tmpVec2_.coefs().empty() ? NULL : &(tmpVec2_.coefs()[0]);
924 for(
size_t i=0; i<tmpVec2_.size(); ++i) {
925 reduced_indices.push_back(translateToReducedEqn(tmpVec2Indices[i]));
928 int* reduced_indices_ptr = reduced_indices.empty() ? NULL : &reduced_indices[0];
929 feivec.
copyOut(tmpVec2_.size(), reduced_indices_ptr, tmpVec2Coefs, vectorIndex);
934 int* ginds = &(csg_.indices()[0]);
935 double* gcoefs = &(csg_.coefs()[0]);
936 for(
size_t ii=0; ii<csg_.size(); ++ii) {
937 fei::add_entry(tmpVec1_, ginds[ii], gcoefs[ii]);
941 int len = tmpVec1_.size();
942 int* indices = &(tmpVec1_.indices()[0]);
943 double* coefs = &(tmpVec1_.coefs()[0]);
945 for(
unsigned ii=0; ii<offsets.size(); ++ii) {
946 int index = globalIndices[offsets[ii]];
952 values[offsets[ii]] = coefs[idx];
960 Reducer::getLocalReducedEqns()
962 return(localReducedEqns_);
MPI_Comm getCommunicator() const
virtual int copyIn(int numValues, const int *indices, const double *values, int vectorIndex=0)=0
void multiply_CSRMat_CSRMat(const CSRMat &A, const CSRMat &B, CSRMat &C, bool storeResultZeros)
std::vector< int > rowNumbers
virtual int gatherFromOverlap()=0
void multiply_trans_CSRMat_CSVec(const CSRMat &A, const CSVec &x, CSVec &y)
virtual table_type * getLocalGraph()=0
std::vector< int > packedColumnIndices
std::vector< int > rowOffsets
virtual int addIndices(int row, int len, const int *indices)=0
virtual fei::SharedPtr< fei::VectorSpace > getRowSpace()=0
int binarySearch(const T &item, const T *list, int len)
virtual int sumIn(int numRows, const int *rows, int numCols, const int *cols, const double *const *values, int format=0)=0
virtual int sumIn(int numValues, const int *indices, const double *values, int vectorIndex=0)=0
int getIndices_Owned(std::vector< int > &globalIndices) const
void multiply_trans_CSRMat_CSRMat(const CSRMat &A, const CSRMat &B, CSRMat &C, bool storeResultZeros)
void multiply_CSRMat_CSVec(const CSRMat &A, const CSVec &x, CSVec &y)
virtual int copyIn(int numRows, const int *rows, int numCols, const int *cols, const double *const *values, int format=0)=0
void copyToSparseRowGraph(snl_fei::RaggedTable< MAP_TYPE, SET_TYPE > &table, fei::SparseRowGraph &srg)
virtual int copyOut(int numValues, const int *indices, double *values, int vectorIndex=0) const =0