FEI  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
fei_Reducer.cpp
1 
2 /*--------------------------------------------------------------------*/
3 /* Copyright 2006 Sandia Corporation. */
4 /* Under the terms of Contract DE-AC04-94AL85000, there is a */
5 /* non-exclusive license for use of this work by or on behalf */
6 /* of the U.S. Government. Export of this program may require */
7 /* a license from the United States Government. */
8 /*--------------------------------------------------------------------*/
9 
10 #include <fei_Reducer.hpp>
11 #include <fei_MatrixGraph.hpp>
12 #include <fei_Matrix.hpp>
13 #include <fei_Matrix_core.hpp>
14 #include <fei_Vector.hpp>
15 #include <fei_Graph_Impl.hpp>
16 #include <fei_ArrayUtils.hpp>
17 #include <fei_TemplateUtils.hpp>
18 #include <fei_SparseRowGraph.hpp>
19 #include <fei_Vector.hpp>
20 #include <fei_impl_utils.hpp>
21 
22 namespace fei {
23 
24 Reducer::Reducer(fei::SharedPtr<FillableMat> globalSlaveDependencyMatrix,
25  fei::SharedPtr<CSVec> g_vector,
26  MPI_Comm comm)
27  : csrD_(),
28  slavesPtr_(NULL),
29  Kii_(),
30  Kid_(),
31  Kdi_(),
32  Kdd_(),
33  csrKii(),
34  csrKid(),
35  csrKdi(),
36  csrKdd(),
37  fi_(),
38  fd_(),
39  csvec(),
40  csvec_i(),
41  tmpMat1_(),
42  tmpMat2_(),
43  tmpVec1_(),
44  tmpVec2_(),
45  csg_(),
46  g_nonzero_(false),
47  localUnreducedEqns_(),
48  localReducedEqns_(),
49  nonslaves_(),
50  reverse_(),
51  isSlaveEqn_(NULL),
52  numGlobalSlaves_(0),
53  numLocalSlaves_(0),
54  firstLocalReducedEqn_(0),
55  lastLocalReducedEqn_(0),
56  lowestGlobalSlaveEqn_(0),
57  highestGlobalSlaveEqn_(0),
58  localProc_(0),
59  numProcs_(1),
60  comm_(comm),
61  dbgprefix_("Reducer: "),
62  mat_counter_(0),
63  rhs_vec_counter_(0),
64  bool_array_(0),
65  int_array_(0),
66  double_array_(0),
67  array_len_(0),
68  work_1D_(),
69  work_2D_()
70 {
71  csrD_ = *globalSlaveDependencyMatrix;
72  if (g_vector.get() != NULL) {
73  csg_ = *g_vector;
74  }
75 
76  initialize();
77 }
78 
79 void
80 Reducer::initialize()
81 {
82  numGlobalSlaves_ = csrD_.getNumRows();
83  slavesPtr_ = &((csrD_.getGraph().rowNumbers)[0]);
84  lowestGlobalSlaveEqn_ = slavesPtr_[0];
85  highestGlobalSlaveEqn_ = slavesPtr_[numGlobalSlaves_-1];
86 
87  if (csg_.size() > 0) {
88  double* gptr = &(csg_.coefs()[0]);
89  for(size_t i=0; i<csg_.size(); ++i) {
90  if (gptr[i] != 0.0) {
91  g_nonzero_ = true;
92  break;
93  }
94  }
95  }
96 
97  //The following code sets up the mappings (nonslaves_ and reverse_)
98  //necessary to implement the method 'translateFromReducedEqn'.
99  //This code is ugly and inelegant, and it took me quite a while to get
100  //it right. I won't even bother trying to comment it, but note that its
101  //correctness is tested in test_utils/test_Reducer.cpp.
102 
103  nonslaves_.resize(numGlobalSlaves_*2);
104 
105  int nonslave_counter = 0;
106  int slaveOffset = 0;
107  int num_consecutive = 0;
108  while(slaveOffset < numGlobalSlaves_-1) {
109  int gap = slavesPtr_[slaveOffset+1]-slavesPtr_[slaveOffset]-1;
110  if (gap > 0) {
111  nonslaves_[nonslave_counter] = slavesPtr_[slaveOffset]+1;
112  nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
113  num_consecutive = 0;
114  }
115  else {
116  ++num_consecutive;
117  }
118  ++slaveOffset;
119  }
120 
121  nonslaves_[nonslave_counter] = highestGlobalSlaveEqn_+1;
122  nonslaves_[numGlobalSlaves_+nonslave_counter++] = num_consecutive;
123 
124  reverse_.resize(nonslave_counter);
125  int first = lowestGlobalSlaveEqn_;
126  reverse_[0] = first;
127  for(int i=1; i<nonslave_counter; ++i) {
128  reverse_[i] = reverse_[i-1] +
129  (nonslaves_[i]-nonslaves_[i-1] - nonslaves_[numGlobalSlaves_+i] - 1);
130  }
131 
132 #ifndef FEI_SER
133  MPI_Comm_rank(comm_, &localProc_);
134  MPI_Comm_size(comm_, &numProcs_);
135 #endif
136 
137  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
138  FEI_OSTREAM& os = *output_stream_;
139  os << dbgprefix_<< "ctor, numGlobalSlaves="<<numGlobalSlaves_
140  << FEI_ENDL;
141  }
142 
143  if (numGlobalSlaves_ < 1) {
144  throw std::runtime_error("ERROR: don't use fei::Reducer when numGlobalSlaves==0. Report to Alan Williams.");
145  }
146 }
147 
148 Reducer::Reducer(fei::SharedPtr<fei::MatrixGraph> matrixGraph)
149  : csrD_(),
150  slavesPtr_(NULL),
151  Kii_(),
152  Kid_(),
153  Kdi_(),
154  Kdd_(),
155  fi_(),
156  fd_(),
157  tmpMat1_(),
158  tmpMat2_(),
159  tmpVec1_(),
160  tmpVec2_(),
161  csg_(),
162  g_nonzero_(false),
163  localUnreducedEqns_(),
164  localReducedEqns_(),
165  nonslaves_(),
166  reverse_(),
167  isSlaveEqn_(NULL),
168  numGlobalSlaves_(0),
169  numLocalSlaves_(0),
170  firstLocalReducedEqn_(0),
171  lastLocalReducedEqn_(0),
172  lowestGlobalSlaveEqn_(0),
173  highestGlobalSlaveEqn_(0),
174  localProc_(0),
175  numProcs_(1),
176  comm_(),
177  dbgprefix_("Reducer: "),
178  mat_counter_(0),
179  rhs_vec_counter_(0),
180  bool_array_(0),
181  int_array_(0),
182  double_array_(0),
183  array_len_(0)
184 {
185  fei::SharedPtr<fei::VectorSpace> vecSpace = matrixGraph->getRowSpace();
186  comm_ = vecSpace->getCommunicator();
187  initialize();
188 
189  std::vector<int> indices;
190  vecSpace->getIndices_Owned(indices);
191  setLocalUnreducedEqns(indices);
192 }
193 
194 Reducer::~Reducer()
195 {
196  delete [] isSlaveEqn_; isSlaveEqn_ = 0;
197  delete [] bool_array_; bool_array_ = 0;
198  delete [] int_array_; int_array_ = 0;
199  delete [] double_array_; double_array_ = 0;
200  array_len_ = 0;
201 }
202 
203 void
204 Reducer::setLocalUnreducedEqns(const std::vector<int>& localUnreducedEqns)
205 {
206  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
207  FEI_OSTREAM& os = *output_stream_;
208  os << dbgprefix_<< "setLocalUnreducedEqns, numLocalEqns="
209  <<localUnreducedEqns.size() << FEI_ENDL;
210  }
211 
212  if (localUnreducedEqns_ != localUnreducedEqns) {
213  localUnreducedEqns_ = localUnreducedEqns;
214  }
215 
216  int num = localUnreducedEqns_.size();
217 
218  if (isSlaveEqn_ != NULL) delete [] isSlaveEqn_;
219 
220  isSlaveEqn_ = num > 0 ? new bool[localUnreducedEqns_.size()] : NULL;
221 
222  numLocalSlaves_ = 0;
223 
224  for(size_t i=0; i<localUnreducedEqns_.size(); ++i) {
225  int idx = fei::binarySearch(localUnreducedEqns_[i],
226  slavesPtr_, numGlobalSlaves_);
227  if (idx < 0) {
228  isSlaveEqn_[i] = false;
229  }
230  else {
231  isSlaveEqn_[i] = true;
232  ++numLocalSlaves_;
233 
234  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
235  FEI_OSTREAM& os = *output_stream_;
236  os << dbgprefix_<<" slave " << localUnreducedEqns_[i] << " depends on ";
237  int offset = csrD_.getGraph().rowOffsets[idx];
238  int rowlen = csrD_.getGraph().rowOffsets[idx+1]-offset;
239  int* indices = &(csrD_.getGraph().packedColumnIndices[offset]);
240  for(int j=0; j<rowlen; ++j) {
241  os << indices[j] << " ";
242  }
243  os << FEI_ENDL;
244  }
245  }
246 
247  }
248 
249  int num_slaves_on_lower_procs = 0;
250 
251 
252 #ifndef FEI_SER
253 
254  if (numProcs_ > 1) {
255  std::vector<int> procNumLocalSlaves(numProcs_);
256 
257  MPI_Allgather(&numLocalSlaves_, 1, MPI_INT, &procNumLocalSlaves[0],
258  1, MPI_INT, comm_);
259 
260  for(int p=0; p<localProc_; ++p) {
261  num_slaves_on_lower_procs += procNumLocalSlaves[p];
262  }
263  }
264 #endif
265 
266  if (!localUnreducedEqns_.empty()) {
267  unsigned first_non_slave_offset = 0;
268  while(first_non_slave_offset < localUnreducedEqns_.size() &&
269  isSlaveEqn_[first_non_slave_offset] == true) {
270  ++first_non_slave_offset;
271  }
272 
273  firstLocalReducedEqn_ = localUnreducedEqns_[first_non_slave_offset]
274  - num_slaves_on_lower_procs - first_non_slave_offset;
275 
276  int num_local_eqns = localUnreducedEqns_.size() - numLocalSlaves_;
277 
278  lastLocalReducedEqn_ = firstLocalReducedEqn_ + num_local_eqns - 1;
279 
280  localReducedEqns_.resize(num_local_eqns);
281  }
282 
283  unsigned offset = 0;
284  int eqn = firstLocalReducedEqn_;
285  for(unsigned i=0; i<localUnreducedEqns_.size(); ++i) {
286  if (isSlaveEqn_[i] == false) {
287  localReducedEqns_[offset++] = eqn++;
288  }
289  }
290 
291  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
292  FEI_OSTREAM& os = *output_stream_;
293  if (localUnreducedEqns_.size() > 0) {
294  os << dbgprefix_<< "first local eqn="
295  <<localUnreducedEqns_[0]<<", last local eqn="
296  <<localUnreducedEqns_[localUnreducedEqns_.size()-1] << FEI_ENDL;
297  }
298  os << dbgprefix_<<"numLocalSlaves_="<<numLocalSlaves_
299  <<", firstLocalReducedEqn_="<<firstLocalReducedEqn_
300  <<", lastLocalReducedEqn_="<<lastLocalReducedEqn_<<FEI_ENDL;
301  }
302 }
303 
304 void
305 Reducer::addGraphEntries(fei::SharedPtr<fei::SparseRowGraph> matrixGraph)
306 {
307  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
308  FEI_OSTREAM& os = *output_stream_;
309  os << dbgprefix_<<"addGraphEntries"<<FEI_ENDL;
310  }
311 
312  //iterate through the incoming matrixGraph, putting its contents into
313  //Kdd, Kdi, Kid and Kii as appropriate.
314 
315  std::vector<int>& rowNumbers = matrixGraph->rowNumbers;
316  std::vector<int>& rowOffsets = matrixGraph->rowOffsets;
317  std::vector<int>& packedCols = matrixGraph->packedColumnIndices;
318 
319  for(unsigned i=0; i<rowNumbers.size(); ++i) {
320  int row = rowNumbers[i];
321 
322  bool slave_row = isSlaveEqn(row);
323 
324  int rowLength = rowOffsets[i+1]-rowOffsets[i];
325  int* cols = &packedCols[rowOffsets[i]];
326 
327  if (slave_row) {
328  fei::CSVec* Kdd_row = Kdd_.create_or_getRow(row);
329  fei::CSVec* Kdi_row = Kdi_.create_or_getRow(row);
330 
331  for(int j=0; j<rowLength; ++j) {
332  int col = cols[j];
333  bool slave_col = isSlaveEqn(col);
334 
335  if (slave_col) {
336  add_entry(*Kdd_row, col, 0.0);
337  }
338  else {
339  add_entry(*Kdi_row, col, 0.0);
340  }
341  }
342  }
343  else {
344  //not a slave row, so add slave columns to Kid, and non-slave
345  //columns to graph.
346  fei::CSVec* Kid_row = Kid_.create_or_getRow(row);
347  fei::CSVec* Kii_row = Kii_.create_or_getRow(row);
348 
349  for(int j=0; j<rowLength; ++j) {
350  int col = cols[j];
351  bool slave_col = isSlaveEqn(col);
352 
353  if (slave_col) {
354  add_entry(*Kid_row, col, 0.0);
355  }
356  else {
357  add_entry(*Kii_row, col, 0.0);
358  }
359  }
360  }
361  }
362 }
363 
364 void
365 Reducer::expand_work_arrays(int size)
366 {
367  if (size <= array_len_) return;
368 
369  array_len_ = size;
370  delete [] bool_array_;
371  delete [] int_array_;
372  delete [] double_array_;
373  bool_array_ = new bool[array_len_];
374  int_array_ = new int[array_len_];
375  double_array_ = new double[array_len_];
376 }
377 
378 void
379 Reducer::addGraphIndices(int numRows, const int* rows,
380  int numCols, const int* cols,
381  fei::Graph& graph)
382 {
383  expand_work_arrays(numCols);
384 
385  bool no_slave_cols = true;
386  for(int i=0; i<numCols; ++i) {
387  bool_array_[i] = isSlaveEqn(cols[i]);
388  if (bool_array_[i]) no_slave_cols = false;
389  }
390 
391  for(int i=0; i<numRows; ++i) {
392  bool slave_row = isSlaveEqn(rows[i]);
393 
394  if (slave_row) {
395  fei::CSVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
396  fei::CSVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
397 
398  for(int j=0; j<numCols; ++j) {
399  if (bool_array_[j]) {
400  add_entry(*Kdd_row, cols[j], 0.0);
401  }
402  else {
403  add_entry(*Kdi_row, cols[j], 0.0);
404  }
405  }
406  ++mat_counter_;
407  }
408  else {
409  //not a slave row, so add slave columns to Kid, and non-slave
410  //columns to graph.
411  fei::CSVec* Kid_row = no_slave_cols ?
412  NULL : Kid_.create_or_getRow(rows[i]);
413 
414  unsigned num_non_slave_cols = 0;
415 
416  for(int j=0; j<numCols; ++j) {
417  if (bool_array_[j]) {
418  add_entry(*Kid_row, cols[j], 0.0);
419  ++mat_counter_;
420  }
421  else {
422  int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
423  }
424  }
425 
426  if (num_non_slave_cols > 0) {
427  int reduced_row = translateToReducedEqn(rows[i]);
428  graph.addIndices(reduced_row, num_non_slave_cols, int_array_);
429  }
430  }
431  }
432 
433  if (mat_counter_ > 600) {
434  assembleReducedGraph(&graph, false);
435  }
436 }
437 
438 void
439 Reducer::addSymmetricGraphIndices(int numIndices, const int* indices,
440  bool diagonal,
441  fei::Graph& graph)
442 {
443  if (diagonal) {
444  for(int i=0; i<numIndices; ++i) {
445  addGraphIndices(1, &indices[i], 1, &indices[i], graph);
446  }
447  }
448  else {
449  addGraphIndices(numIndices, indices, numIndices, indices, graph);
450  }
451 }
452 
453 void
454 Reducer::assembleReducedGraph(fei::Graph* graph,
455  bool global_gather)
456 {
457  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
458  FEI_OSTREAM& os = *output_stream_;
459  os << dbgprefix_<<"assembleReducedGraph(fei::Graph)"<<FEI_ENDL;
460  if (output_level_ >= fei::FULL_LOGS) {
461  os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
462  }
463  }
464 
465  //This function performs the appropriate manipulations (matrix-matrix
466  //products, etc., on the Kid,Kdi,Kdd matrices and then assembles the
467  //results into the input graph structure.
468  //
469 
470  //form tmpMat1_ = Kid*D
471  csrKid = Kid_;
472  fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_, true);
473 
474  csrKdi = Kdi_;
475  fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_, true);
476 
477  fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
478  fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
479 
480  fei::impl_utils::add_to_graph(tmpMat1_, *graph);
481  fei::impl_utils::add_to_graph(tmpMat2_, *graph);
482 
483  //form tmpMat1_ = D^T*Kdd
484  csrKdd = Kdd_;
485  fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_, true);
486 
487  //form tmpMat2_ = tmpMat1_*D = D^T*Kdd*D
488  fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_, true);
489 
490  fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
491 
492  fei::impl_utils::add_to_graph(tmpMat2_, *graph);
493 
494  //lastly, translate Kii and add it to the graph.
495  csrKii = Kii_;
496  fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
497  fei::impl_utils::add_to_graph(csrKii, *graph);
498 
499  Kii_.clear();
500  Kdi_.clear();
501  Kid_.clear();
502  Kdd_.clear();
503 
504  mat_counter_ = 0;
505 
506  if (global_gather) {
507  graph->gatherFromOverlap();
508  }
509 }
510 
511 void
512 Reducer::assembleReducedGraph(fei::SparseRowGraph* srgraph)
513 {
514  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
515  FEI_OSTREAM& os = *output_stream_;
516  os << dbgprefix_<<"assembleReducedGraph(fei::SparseRowGraph)"<<FEI_ENDL;
517  }
518 
519  fei::Graph_Impl graph(comm_, firstLocalReducedEqn_, lastLocalReducedEqn_);
520  assembleReducedGraph(&graph);
521  fei::copyToSparseRowGraph(*(graph.getLocalGraph()), *srgraph);
522 }
523 
524 void
525 Reducer::assembleReducedMatrix(fei::Matrix& matrix)
526 {
527  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
528  FEI_OSTREAM& os = *output_stream_;
529  os << dbgprefix_<<"assembleReducedMatrix(fei::Matrix)"<<FEI_ENDL;
530  if (output_level_ >= fei::FULL_LOGS) {
531  os << dbgprefix_<<"Kid:"<<FEI_ENDL<<Kid_;
532  os << dbgprefix_<<"Kdi:"<<FEI_ENDL<<Kdi_;
533  }
534  }
535 
536  //form tmpMat1_ = Kid_*D
537  csrKid = Kid_;
538 
539  fei::multiply_CSRMat_CSRMat(csrKid, csrD_, tmpMat1_);
540 
541  //form tmpMat2_ = D^T*Kdi_
542  csrKdi = Kdi_;
543  fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdi, tmpMat2_);
544 
545  //accumulate the above two results into the global system matrix.
546  fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat1_);
547 
548  fei::impl_utils::add_to_matrix(tmpMat1_, true, matrix);
549 
550  fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
551  fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
552 
553  //form tmpMat1_ = D^T*Kdd_
554  csrKdd = Kdd_;
555  fei::multiply_trans_CSRMat_CSRMat(csrD_, csrKdd, tmpMat1_);
556 
557  //form tmpMat2_ = tmpMat1_*D = D^T*Kdd_*D
558  fei::multiply_CSRMat_CSRMat(tmpMat1_, csrD_, tmpMat2_);
559 
560  if (g_nonzero_) {
561  //form tmpVec1_ = Kid_*g_
562  fei::multiply_CSRMat_CSVec(csrKid, csg_, tmpVec1_);
563 
564  //subtract tmpVec1_ from fi_
565  fi_.subtract(tmpVec1_);
566 
567  //we already have tmpMat1_ = D^T*Kdd which was computed above, and we need
568  //to form tmpVec1_ = D^T*Kdd*g_.
569  //So we can simply form tmpVec1_ = tmpMat1_*g_.
570  fei::multiply_CSRMat_CSVec(tmpMat1_, csg_, tmpVec1_);
571 
572  //now subtract tmpVec1_ from the right-hand-side fi_
573  fi_.subtract(tmpVec1_);
574  }
575 
576  //accumulate tmpMat2_ = D^T*Kdd_*D into the global system matrix.
577  fei::impl_utils::translate_to_reduced_eqns(*this, tmpMat2_);
578  fei::impl_utils::add_to_matrix(tmpMat2_, true, matrix);
579 
580  //lastly, translate Kii and add it to the graph.
581  csrKii = Kii_;
582  fei::impl_utils::translate_to_reduced_eqns(*this, csrKii);
583  fei::impl_utils::add_to_matrix(csrKii, true, matrix);
584 
585  Kii_.clear();
586  Kdi_.clear();
587  Kid_.clear();
588  Kdd_.clear();
589 
590  mat_counter_ = 0;
591 }
592 
593 bool
594 Reducer::isSlaveEqn(int unreducedEqn) const
595 {
596  int num = localUnreducedEqns_.size();
597 
598  int offset = num>0 ? unreducedEqn - localUnreducedEqns_[0] : -1;
599  if (offset < 0 || offset >= (int)localUnreducedEqns_.size()) {
600  return(isSlaveCol(unreducedEqn));
601  }
602 
603  return(isSlaveEqn_[offset]);
604 }
605 
606 void
607 Reducer::getSlaveMasterEqns(int slaveEqn, std::vector<int>& masterEqns)
608 {
609  masterEqns.clear();
610 
611  std::vector<int>& rows = csrD_.getGraph().rowNumbers;
612 
613  std::vector<int>::iterator iter =
614  std::lower_bound(rows.begin(), rows.end(), slaveEqn);
615 
616  if (iter == rows.end() || *iter != slaveEqn) {
617  return;
618  }
619 
620  size_t offset = iter - rows.begin();
621 
622  int rowBegin = csrD_.getGraph().rowOffsets[offset];
623  int rowEnd = csrD_.getGraph().rowOffsets[offset+1];
624  std::vector<int>& cols = csrD_.getGraph().packedColumnIndices;
625 
626  for(int j=rowBegin; j<rowEnd; ++j) {
627  masterEqns.push_back(cols[j]);
628  }
629 }
630 
631 bool
632 Reducer::isSlaveCol(int unreducedEqn) const
633 {
634  int idx = fei::binarySearch(unreducedEqn,
635  slavesPtr_, numGlobalSlaves_);
636 
637  return(idx>=0);
638 }
639 
640 int
641 Reducer::translateToReducedEqn(int eqn) const
642 {
643  if (eqn < lowestGlobalSlaveEqn_) {
644  return(eqn);
645  }
646 
647  if (eqn > highestGlobalSlaveEqn_) {
648  return(eqn - numGlobalSlaves_);
649  }
650 
651  int index = 0;
652  int foundOffset = fei::binarySearch(eqn, slavesPtr_, numGlobalSlaves_,
653  index);
654 
655  if (foundOffset >= 0) {
656  throw std::runtime_error("Reducer::translateToReducedEqn ERROR, input is slave eqn.");
657  }
658 
659  return(eqn - index);
660 }
661 
662 int
663 Reducer::translateFromReducedEqn(int reduced_eqn) const
664 {
665  int index = -1;
666  int offset = fei::binarySearch(reduced_eqn, &reverse_[0],
667  reverse_.size(), index);
668  if (offset >= 0) {
669  return(nonslaves_[offset]);
670  }
671 
672  if (index == 0) {
673  return(reduced_eqn);
674  }
675 
676  int adjustment = reduced_eqn - reverse_[index-1];
677 
678  return(nonslaves_[index-1] + adjustment);
679 }
680 
681 int
682 Reducer::addMatrixValues(int numRows, const int* rows,
683  int numCols, const int* cols,
684  const double* const* values,
685  bool sum_into,
686  fei::Matrix& feimat,
687  int format)
688 {
689  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
690  FEI_OSTREAM& os = *output_stream_;
691  os << dbgprefix_<<"addMatrixValues(fei::Matrix)"<<FEI_ENDL;
692  }
693 
694  expand_work_arrays(numCols+numRows);
695 
696  const double** myvalues = const_cast<const double**>(values);
697  if (format != FEI_DENSE_ROW) {
698  if (format != FEI_DENSE_COL) {
699  throw std::runtime_error("fei::Reducer::addMatrixValues ERROR, submatrix format must be either FEI_DENSE_ROW or FEI_DENSE_COL. Other formats not supported with slave constraints.");
700  }
701 
702  fei::Matrix_core::copyTransposeToWorkArrays(numRows, numCols, values,
703  work_1D_, work_2D_);
704  myvalues = &work_2D_[0];
705  }
706 
707  bool no_slave_cols = true;
708  unsigned num_non_slave_cols = 0;
709  for(int j=0; j<numCols; ++j) {
710  bool_array_[j] = isSlaveEqn(cols[j]);
711  if (bool_array_[j]) no_slave_cols = false;
712  else int_array_[num_non_slave_cols++] = translateToReducedEqn(cols[j]);
713  }
714 
715  bool no_slave_rows = true;
716  for(int i=0; i<numRows; ++i) {
717  bool_array_[numCols+i] = isSlaveEqn(rows[i]);
718  if (bool_array_[numCols+i]) no_slave_rows = false;
719  else int_array_[numCols+i] = translateToReducedEqn(rows[i]);
720  }
721 
722  if (no_slave_rows && no_slave_cols) {
723  if (sum_into) {
724  feimat.sumIn(numRows, int_array_+numCols,
725  numCols, int_array_, myvalues, FEI_DENSE_ROW);
726  }
727  else {
728  feimat.copyIn(numRows, int_array_+numCols,
729  numCols, int_array_, myvalues, FEI_DENSE_ROW);
730  }
731 
732  return(0);
733  }
734 
735  for(int i=0; i<numRows; ++i) {
736  if (bool_array_[numCols+i]) {
737  //slave row: slave columns go into Kdd, non-slave columns go
738  //into Kdi.
739  fei::CSVec* Kdd_row = Kdd_.create_or_getRow(rows[i]);
740  fei::CSVec* Kdi_row = Kdi_.create_or_getRow(rows[i]);
741 
742  for(int j=0; j<numCols; ++j) {
743  if (bool_array_[j]) {
744  add_entry(*Kdd_row, cols[j], myvalues[i][j]);
745  }
746  else {
747  add_entry(*Kdi_row, cols[j], myvalues[i][j]);
748  }
749  }
750  ++mat_counter_;
751  }
752  else {//not slave row
753  if (no_slave_cols) {
754  int reduced_row = int_array_[numCols+i];
755  const double* rowvals = myvalues[i];
756  if (sum_into) {
757  feimat.sumIn(1, &reduced_row, numCols, int_array_,
758  &rowvals, format);
759  }
760  else {
761  feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
762  &rowvals, format);
763  }
764  continue;
765  }
766 
767  //put non-slave columns into Kii,
768  //and slave columns into Kid.
769  fei::CSVec* Kid_row = Kid_.create_or_getRow(rows[i]);
770 
771  unsigned offset = 0;
772  for(int j=0; j<numCols; ++j) {
773  if (bool_array_[j]) {
774  add_entry(*Kid_row, cols[j], myvalues[i][j]);
775  ++mat_counter_;
776  }
777  else {
778  double_array_[offset++] = myvalues[i][j];
779  }
780  }
781 
782  if (num_non_slave_cols > 0) {
783  int reduced_row = int_array_[numCols+i];
784  if (sum_into) {
785  feimat.sumIn(1, &reduced_row, num_non_slave_cols, int_array_,
786  &double_array_, format);
787  }
788  else {
789  feimat.copyIn(1, &reduced_row, num_non_slave_cols, int_array_,
790  &double_array_, format);
791  }
792  }
793  }
794  }
795 
796  if (mat_counter_ > 600) {
797  assembleReducedMatrix(feimat);
798  }
799 
800  return(0);
801 }
802 
803 int
804 Reducer::addVectorValues(int numValues,
805  const int* globalIndices,
806  const double* values,
807  bool sum_into,
808  bool soln_vector,
809  int vectorIndex,
810  fei::Vector& feivec)
811 {
812  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
813  FEI_OSTREAM& os = *output_stream_;
814  os << dbgprefix_<<"addVectorValues(fei::Vector)"<<FEI_ENDL;
815  }
816 
817  for(int i=0; i<numValues; ++i) {
818  if (isSlaveEqn(globalIndices[i])) {
819  if (sum_into) {
820  if (!soln_vector) add_entry(fd_, globalIndices[i], values[i]);
821  }
822  else {
823  if (!soln_vector) put_entry(fd_, globalIndices[i], values[i]);
824  }
825  if (!soln_vector) ++rhs_vec_counter_;
826  }
827  else {
828  int reduced_index = translateToReducedEqn(globalIndices[i]);
829 
830  if (sum_into) {
831  feivec.sumIn(1, &reduced_index, &values[i], vectorIndex);
832  }
833  else {
834  feivec.copyIn(1, &reduced_index, &values[i], vectorIndex);
835  }
836  }
837  }
838 
839  if (rhs_vec_counter_ > 600) {
840  assembleReducedVector(soln_vector, feivec);
841  }
842 
843  return(0);
844 }
845 
846 void
847 Reducer::assembleReducedVector(bool soln_vector,
848  fei::Vector& feivec)
849 {
850  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
851  FEI_OSTREAM& os = *output_stream_;
852  os << dbgprefix_<<"assembleReducedVector(fei::Vector)"<<FEI_ENDL;
853  }
854 
855  if (soln_vector) {
856  return;
857  }
858 
859  fei::CSVec& vec = fd_;
860 
861  if (vec.size() > 0) {
862  //form tmpVec1 = D^T*vec.
863  csvec = vec;
864  fei::multiply_trans_CSRMat_CSVec(csrD_, csvec, tmpVec1_);
865 
866  vec.clear();
867 
868  fei::impl_utils::translate_to_reduced_eqns(*this, tmpVec1_);
869 
870  int which_vector = 0;
871  feivec.sumIn(tmpVec1_.size(), &(tmpVec1_.indices()[0]),
872  &(tmpVec1_.coefs()[0]), which_vector);
873  }
874 
875  fei::CSVec& vec_i = fi_;
876 
877  if (vec_i.size() > 0) {
878  csvec_i = vec_i;
879  fei::impl_utils::translate_to_reduced_eqns(*this, csvec_i);
880 
881  int which_vector = 0;
882  feivec.sumIn(csvec_i.size(), &(csvec_i.indices()[0]),
883  &(csvec_i.coefs()[0]), which_vector);
884 
885  vec_i.clear();
886  }
887 
888  rhs_vec_counter_ = 0;
889 }
890 
891 int
892 Reducer::copyOutVectorValues(int numValues,
893  const int* globalIndices,
894  double* values,
895  bool soln_vector,
896  int vectorIndex,
897  fei::Vector& feivec)
898 {
899  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
900  FEI_OSTREAM& os = *output_stream_;
901  os << dbgprefix_<<"copyOutVectorValues"<<FEI_ENDL;
902  }
903 
904  tmpVec1_.clear();
905  tmpVec2_.clear();
906  std::vector<int> reduced_indices;
907  std::vector<int> offsets;
908 
909  for(int i=0; i<numValues; ++i) {
910  if (isSlaveEqn(globalIndices[i])) {
911  fei::put_entry(tmpVec1_, globalIndices[i], 0.0);
912  offsets.push_back(i);
913  }
914  else {
915  int reduced_idx = translateToReducedEqn(globalIndices[i]);
916  feivec.copyOut(1, &reduced_idx, &values[i], vectorIndex);
917  }
918  }
919 
920  if (tmpVec1_.size() > 0) {
921  fei::multiply_trans_CSRMat_CSVec(csrD_, tmpVec1_, tmpVec2_);
922  int* tmpVec2Indices = tmpVec2_.indices().empty() ? NULL : &(tmpVec2_.indices()[0]);
923  double* tmpVec2Coefs = tmpVec2_.coefs().empty() ? NULL : &(tmpVec2_.coefs()[0]);
924  for(size_t i=0; i<tmpVec2_.size(); ++i) {
925  reduced_indices.push_back(translateToReducedEqn(tmpVec2Indices[i]));
926  }
927 
928  int* reduced_indices_ptr = reduced_indices.empty() ? NULL : &reduced_indices[0];
929  feivec.copyOut(tmpVec2_.size(), reduced_indices_ptr, tmpVec2Coefs, vectorIndex);
930 
931  fei::multiply_CSRMat_CSVec(csrD_, tmpVec2_, tmpVec1_);
932 
933  if (g_nonzero_) {
934  int* ginds = &(csg_.indices()[0]);
935  double* gcoefs = &(csg_.coefs()[0]);
936  for(size_t ii=0; ii<csg_.size(); ++ii) {
937  fei::add_entry(tmpVec1_, ginds[ii], gcoefs[ii]);
938  }
939  }
940 
941  int len = tmpVec1_.size();
942  int* indices = &(tmpVec1_.indices()[0]);
943  double* coefs = &(tmpVec1_.coefs()[0]);
944 
945  for(unsigned ii=0; ii<offsets.size(); ++ii) {
946  int index = globalIndices[offsets[ii]];
947  int idx = fei::binarySearch(index, indices, len);
948  if (idx < 0) {
949  continue;
950  }
951 
952  values[offsets[ii]] = coefs[idx];
953  }
954  }
955 
956  return(0);
957 }
958 
959 std::vector<int>&
960 Reducer::getLocalReducedEqns()
961 {
962  return(localReducedEqns_);
963 }
964 
965 }//namespace fei
966 
MPI_Comm getCommunicator() const
virtual int copyIn(int numValues, const int *indices, const double *values, int vectorIndex=0)=0
void multiply_CSRMat_CSRMat(const CSRMat &A, const CSRMat &B, CSRMat &C, bool storeResultZeros)
Definition: fei_CSRMat.cpp:189
std::vector< int > rowNumbers
virtual int gatherFromOverlap()=0
void multiply_trans_CSRMat_CSVec(const CSRMat &A, const CSVec &x, CSVec &y)
Definition: fei_CSRMat.cpp:148
virtual table_type * getLocalGraph()=0
std::vector< int > packedColumnIndices
std::vector< int > rowOffsets
virtual int addIndices(int row, int len, const int *indices)=0
virtual fei::SharedPtr< fei::VectorSpace > getRowSpace()=0
int binarySearch(const T &item, const T *list, int len)
virtual int sumIn(int numRows, const int *rows, int numCols, const int *cols, const double *const *values, int format=0)=0
virtual int sumIn(int numValues, const int *indices, const double *values, int vectorIndex=0)=0
T * get() const
int getIndices_Owned(std::vector< int > &globalIndices) const
void multiply_trans_CSRMat_CSRMat(const CSRMat &A, const CSRMat &B, CSRMat &C, bool storeResultZeros)
Definition: fei_CSRMat.cpp:268
void multiply_CSRMat_CSVec(const CSRMat &A, const CSVec &x, CSVec &y)
Definition: fei_CSRMat.cpp:102
virtual int copyIn(int numRows, const int *rows, int numCols, const int *cols, const double *const *values, int format=0)=0
void copyToSparseRowGraph(snl_fei::RaggedTable< MAP_TYPE, SET_TYPE > &table, fei::SparseRowGraph &srg)
virtual int copyOut(int numValues, const int *indices, double *values, int vectorIndex=0) const =0