Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineDragonflyRCA.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
2 #define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
3 
4 #include <Teuchos_Comm.hpp>
5 #include <Teuchos_CommHelpers.hpp>
6 #include <Zoltan2_Machine.hpp>
7 
8 #ifdef HAVE_ZOLTAN2_RCALIB
9 extern "C"{
10 #include <rca_lib.h>
11 }
12 #endif
13 
14 namespace Zoltan2{
15 
72 template <typename pcoord_t, typename part_t>
73 class MachineDragonflyRCA : public Machine <pcoord_t, part_t> {
74 
75 public:
76 
84  MachineDragonflyRCA(const Teuchos::Comm<int> &comm):
85  Machine<pcoord_t,part_t>(comm),
86  transformed_networkDim(3),
87  actual_networkDim(3),
88  transformed_procCoords(NULL),
89  actual_procCoords(NULL),
90  transformed_machine_extent(NULL),
91  actual_machine_extent(NULL),
92  num_unique_groups(0),
93  group_count(NULL),
94  is_transformed(false),
95  pl(NULL) {
96 
97  actual_machine_extent = new int[actual_networkDim];
98  this->getActualMachineExtent(this->actual_machine_extent);
99 
100  // Number of ranks in each Dragonfly network group
101  // (i.e. RCA's X coord == Grp g)
102  group_count = new part_t[actual_machine_extent[0]];
103 
104  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
105 
106  // Transformed dims = 1 + N_y + N_z
107  transformed_networkDim = 1 + actual_machine_extent[1] +
108  actual_machine_extent[2];
109  transformed_machine_extent = new int[transformed_networkDim];
110 
111  // Allocate memory for processor coords
112  actual_procCoords = new pcoord_t *[actual_networkDim];
113  transformed_procCoords = new pcoord_t *[transformed_networkDim];
114 
115  for (int i = 0; i < actual_networkDim; ++i) {
116  actual_procCoords[i] = new pcoord_t[this->numRanks];
117  memset(actual_procCoords[i], 0,
118  sizeof(pcoord_t) * this->numRanks);
119  }
120 
121  pcoord_t *xyz = new pcoord_t[transformed_networkDim];
123  for (int i = 0; i < actual_networkDim; ++i)
124  actual_procCoords[i][this->myRank] = xyz[i];
125  delete [] xyz;
126 
127  // Gather number of ranks in each Dragonfly network group from
128  // across all ranks
129  part_t * tmp_vec = new part_t[actual_machine_extent[0]];
130  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
131 
132  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
133  actual_machine_extent[0],
134  group_count,
135  tmp_vec);
136 
137  // remove zero entries from reduced array
138  num_unique_groups = 0;
139 
140  for (int i = 0; i < actual_machine_extent[0]; ++i) {
141  if (tmp_vec[i] > 0) {
142  ++num_unique_groups;
143  }
144  }
145 
146  // Reset group_count array to new size
147  delete[] group_count;
148  group_count = new part_t[num_unique_groups];
149 
150  int pos = 0;
151  for (int i = 0; i < actual_machine_extent[0]; ++i) {
152  if (tmp_vec[i] > 0) {
153  group_count[pos] = tmp_vec[i];
154  ++pos;
155  }
156  }
157 
158  delete[] tmp_vec;
159 
160  // reduceAll the coordinates of each processor.
161  gatherMachineCoordinates(this->actual_procCoords,
162  this->actual_networkDim, comm);
163  }
164 
165  // No necessary wrap arounds for dragonfly networks. Groups
166  // have wrap around, but group all-to-all connection makes unneccessary.
167  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
168  return false;
169  }
170 
171 
181  MachineDragonflyRCA(const Teuchos::Comm<int> &comm,
182  const Teuchos::ParameterList &pl_ ):
183  Machine<pcoord_t,part_t>(comm),
184  transformed_networkDim(3),
185  actual_networkDim(3),
186  transformed_procCoords(NULL),
187  actual_procCoords(NULL),
188  transformed_machine_extent(NULL),
189  actual_machine_extent(NULL),
190  num_unique_groups(0),
191  group_count(NULL),
192  is_transformed(false),
193  pl(&pl_)
194  {
195  actual_machine_extent = new int[actual_networkDim];
196  this->getActualMachineExtent(this->actual_machine_extent);
197 
198  // Number of parts in each Group (i.e. RCA's X coord == Grp g)
199  group_count = new part_t[actual_machine_extent[0]];
200 
201  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
202 
203  // Allocate memory for processor coords
204  actual_procCoords = new pcoord_t *[actual_networkDim];
205  transformed_procCoords = new pcoord_t *[transformed_networkDim];
206 
207  pcoord_t *xyz = new pcoord_t[actual_networkDim];
209 
210  // Gather number of ranks in each Dragonfly network group
211  // from across all ranks
212  part_t * tmp_vec = new part_t[actual_machine_extent[0]];
213  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
214 
215  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
216  actual_machine_extent[0],
217  group_count,
218  tmp_vec);
219 
220  // Remove zero entries from reduced array
221  num_unique_groups = 0;
222 
223  for (int i = 0; i < actual_machine_extent[0]; ++i) {
224  if (tmp_vec[i] > 0) {
225  ++num_unique_groups;
226  }
227  }
228 
229  // Reset group_count array to new size
230  delete[] group_count;
231  group_count = new part_t[num_unique_groups];
232 
233  int pos = 0;
234  for (int i = 0; i < actual_machine_extent[0]; ++i) {
235  if (tmp_vec[i] > 0) {
236  group_count[pos] = tmp_vec[i];
237  ++pos;
238  }
239  }
240  delete[] tmp_vec;
241 
242  const Teuchos::ParameterEntry *pe2 =
243  this->pl->getEntryPtr("Machine_Optimization_Level");
244 
245  // Transform with mach opt level
246  if (pe2) {
247  int optimization_level;
248  optimization_level = pe2->getValue<int>(&optimization_level);
249 
250  if (optimization_level > 0) {
251  is_transformed = true;
252 
253  // Transformed dims = 1 + N_y + N_z
254  transformed_networkDim = 1 + actual_machine_extent[1] +
255  actual_machine_extent[2];
256  transformed_machine_extent = new int[transformed_networkDim];
257 
258  transformed_procCoords = new pcoord_t *[transformed_networkDim];
259 
260  // Allocate memory for transformed coordinates
261  for (int i = 0; i < transformed_networkDim; ++i) {
262  transformed_procCoords[i] = new pcoord_t[this->numRanks];
263  memset(transformed_procCoords[i], 0,
264  sizeof(pcoord_t) * this->numRanks);
265  }
266 
267  // Calculate transformed coordinates and machine extents
268  int nx = this->actual_machine_extent[0];
269  int ny = this->actual_machine_extent[1];
270  int nz = this->actual_machine_extent[2];
271 
272  const Teuchos::ParameterEntry *pe_x =
273  this->pl->getEntryPtr("Machine_X_Stretch");
274  const Teuchos::ParameterEntry *pe_y =
275  this->pl->getEntryPtr("Machine_Y_Stretch");
276  const Teuchos::ParameterEntry *pe_z =
277  this->pl->getEntryPtr("Machine_Z_Stretch");
278 
279  // Default X,Y,Z stretches
280  int x_stretch = 3;
281  int y_stretch = 2;
282  int z_stretch = 1;
283 
284  if (pe_x)
285  x_stretch = pe_x->getValue<int>(&x_stretch);
286  if (pe_y)
287  y_stretch = pe_y->getValue<int>(&y_stretch);
288  if (pe_z)
289  z_stretch = pe_z->getValue<int>(&z_stretch);
290 
291  // Transform X coords
292  transformed_procCoords[0][this->myRank] =
293  x_stretch * xyz[0] * ny * nz;
294 
295  // Transform Y coords
296  for (int i = 1; i < 1 + ny; ++i) {
297  // Shift y-coord given a group, xyz[0];
298  transformed_procCoords[i][this->myRank] = 0;
299  // Increment in the dim where y-coord present
300  if (xyz[1] == i - 1)
301  transformed_procCoords[i][this->myRank] = y_stretch;
302  }
303  // Transform Z coords
304  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
305  // Shift z-coord given a group, xyz[0];
306  transformed_procCoords[i][this->myRank] = 0;
307  // Increment in the dim where z-coord present
308  if (xyz[2] == i - (1 + ny))
309  transformed_procCoords[i][this->myRank] = z_stretch;
310  }
311 
312  this->transformed_machine_extent = new int[transformed_networkDim];
313 
314  // Maximum extents in shifted high dim coordinate system
315  this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
316  for (int i = 1; i < 1 + ny; ++i) {
317  this->transformed_machine_extent[i] = y_stretch;
318  }
319  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
320  this->transformed_machine_extent[i] = z_stretch;
321  }
322 
323  // reduceAll the transformed coordinates of each processor.
324  gatherMachineCoordinates(this->transformed_procCoords,
325  this->transformed_networkDim, comm);
326 
327  this->printAllocation();
328  }
329  }
330  // If no coordinate transformation, gather actual coords
331  if (!is_transformed) {
332 
333  for (int i = 0; i < actual_networkDim; ++i) {
334  actual_procCoords[i] = new pcoord_t[this->numRanks];
335  memset(actual_procCoords[i], 0,
336  sizeof(pcoord_t) * this->numRanks);
337  }
338 
339  for (int i = 0; i < actual_networkDim; ++i)
340  actual_procCoords[i][this->myRank] = xyz[i];
341 
342  // reduceAll the actual coordinates of each processor
343  gatherMachineCoordinates(this->actual_procCoords,
344  this->actual_networkDim, comm);
345 
346  this->printAllocation();
347  }
348  delete [] xyz;
349  }
350 
351  // Destructor
353  if (is_transformed) {
354  is_transformed = false;
355  if (this->numRanks > 1) {
356  for (int i = 0; i < transformed_networkDim; ++i) {
357  delete [] transformed_procCoords[i];
358  }
359  }
360  delete [] transformed_machine_extent;
361  }
362  else {
363  if (this->numRanks > 1) {
364  for (int i = 0; i < actual_networkDim; ++i) {
365  delete [] actual_procCoords[i];
366  }
367  }
368  }
369 
370  delete [] actual_procCoords;
371  delete [] transformed_procCoords;
372 
373  delete [] actual_machine_extent;
374  delete [] group_count;
375  }
376 
377  bool hasMachineCoordinates() const { return true; }
378 
379  // Return dimensions of coords, transformed or actual
380  int getMachineDim() const {
381  if (is_transformed)
382  return this->transformed_networkDim;
383  else
384  return this->actual_networkDim;
385  }
386 
387  // Return the transformed maximum machine extents
388  bool getTransformedMachineExtent(int *nxyz) const {
389  if (is_transformed) {
390  for (int dim = 0; dim < transformed_networkDim; ++dim)
391  nxyz[dim] = this->transformed_machine_extent[dim];
392 
393  return true;
394  }
395  else
396  return false;
397  }
398 
399  // Return the actual RCA maximum machine extents
400  bool getActualMachineExtent(int *nxyz) const {
401 #if defined (HAVE_ZOLTAN2_RCALIB)
402  mesh_coord_t mxyz;
403  rca_get_max_dimension(&mxyz);
404 
405  int dim = 0; // Example extents on Cori
406  nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
407  nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
408  nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
409  return true;
410 #else
411  return false;
412 #endif
413  }
414 
415  // Return machine extents, transformed or actual
416  bool getMachineExtent(int *nxyz) const {
417  if (is_transformed)
418  this->getTransformedMachineExtent(nxyz);
419  else
420  this->getActualMachineExtent(nxyz);
421 
422  return true;
423  }
424 
425  // Return number of groups (RCA X-dim) with allocated nodes
426  part_t getNumUniqueGroups() const override{
427  return this->num_unique_groups;
428  }
429 
430  // Return number of ranks in each group (RCA X-dim) in an allocation
431  bool getGroupCount(part_t *grp_count) const override {
432 
433  if (group_count != NULL) {
434  for (int i = 0; i < num_unique_groups; ++i) {
435  grp_count[i] = this->group_count[i];
436  }
437 
438  return true;
439  }
440  else
441  return false;
442  }
443 
444  // Print allocation coords and extents on rank 0, transformed or actual
446  if (this->myRank == 0) {
447  // Print transformed coordinates and extents
448  if (is_transformed) {
449  for (int i = 0; i < this->numRanks; ++i) {
450  std::cout << "Rank:" << i;
451  for (int j = 0; j < this->transformed_networkDim; ++j) {
452  std::cout << " " << transformed_procCoords[j][i];
453  }
454  std::cout << std::endl;
455  }
456 
457  std::cout << std::endl << "Transformed Machine Extent: ";
458  for (int i = 0; i < this->transformed_networkDim; ++i) {
459  std::cout << " " << this->transformed_machine_extent[i];
460  }
461  std::cout << std::endl;
462  }
463  // Print actual coordinates and extents
464  else {
465  for (int i = 0; i < this->numRanks; ++i) {
466  std::cout << "Rank:" << i;
467  for (int j = 0; j < this->actual_networkDim; ++j) {
468  std::cout << " " << actual_procCoords[j][i];
469  }
470  std::cout << std::endl;
471  }
472 
473  std::cout << std::endl << "Actual Machine Extent: ";
474  for (int i = 0; i < this->actual_networkDim; ++i) {
475  std::cout << " " << this->actual_machine_extent[i];
476  }
477  std::cout << std::endl;
478  }
479  }
480  }
481 
482  // Return transformed coord for this rank
483  bool getMyTransformedMachineCoordinate(pcoord_t *xyz) {
484  if (is_transformed) {
485  for (int i = 0; i < this->transformed_networkDim; ++i) {
486  xyz[i] = transformed_procCoords[i][this->myRank];
487  }
488 
489  return true;
490  }
491  else
492  return false;
493  }
494 
495  // Return actual RCA coord for this rank
496  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
497 #if defined (HAVE_ZOLTAN2_RCALIB)
498  // Cray node info for current node
499  rs_node_t nodeInfo;
500  rca_get_nodeid(&nodeInfo);
501 
502  // Current node ID
503  int NIDs = (int)nodeInfo.rs_node_s._node_id;
504 
505  mesh_coord_t node_coord;
506  int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
507  if (returnval == -1) {
508  return false;
509  }
510 
511  int x = node_coord.mesh_x;
512  int y = node_coord.mesh_y;
513  int z = node_coord.mesh_z;
514 
515  xyz[0] = x;
516  xyz[1] = y;
517  xyz[2] = z;
518 
519  group_count[x]++;
520 
521  return true;
522 #else
523  return false;
524 #endif
525  }
526 
527  // Return machine coordinate for this rank, transformed or actual
528  bool getMyMachineCoordinate(pcoord_t *xyz) {
529  if (is_transformed)
531  else
532  this->getMyActualMachineCoordinate(xyz);
533 
534  return true;
535  }
536 
537  // Return machine coord of given rank, transformed or actual
538  inline bool getMachineCoordinate(const int rank,
539  pcoord_t *xyz) const {
540  if (is_transformed) {
541  for (int i = 0; i < this->transformed_networkDim; ++i) {
542  xyz[i] = transformed_procCoords[i][rank];
543  }
544  }
545  else {
546  for (int i = 0; i < this->actual_networkDim; ++i) {
547  xyz[i] = actual_procCoords[i][rank];
548  }
549  }
550 
551  return true;
552  }
553 
554  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
555  return false; // cannot yet return from nodename
556  }
557 
558  // Return view of all machine coords, transformed or actual
559  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
560  if (is_transformed) {
561  allCoords = transformed_procCoords;
562  }
563  else {
564  allCoords = actual_procCoords;
565  }
566 
567  return true;
568  }
569 
570  // Return (approx) hop count from rank1 to rank2. Does not account for
571  // Dragonfly's dynamic routing.
572  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
573  hops = 0;
574 
575  if (is_transformed) {
576  // Case: ranks in different groups
577  // Does not account for location of group to group connection.
578  // (Most group to group messages will take 5 hops)
579  if (this->transformed_procCoords[0][rank1] !=
580  this->transformed_procCoords[0][rank2])
581  {
582  hops = 5;
583  return true;
584  }
585 
586  // Case: ranks in same group
587  // For each 2 differences in transformed_coordinates then
588  // 1 hop
589  for (int i = 1; i < this->transformed_networkDim; ++i) {
590  if (this->transformed_procCoords[i][rank1] !=
591  this->transformed_procCoords[i][rank2])
592  ++hops;
593  }
594  hops /= 2;
595  }
596  else {
597  // Case: ranks in different groups
598  // Does not account for location of group to group connection.
599  // (Nearly all group to group messages will take 5 hops)
600  if (this->actual_procCoords[0][rank1] !=
601  this->actual_procCoords[0][rank2])
602  {
603  hops = 5;
604  return true;
605  }
606 
607  // Case: ranks in same group
608  // For each difference in actual_coordinates then
609  // 1 hop
610  for (int i = 1; i < actual_networkDim; ++i) {
611  if (this->actual_procCoords[i][rank1] !=
612  this->actual_procCoords[i][rank2])
613  ++hops;
614  }
615  }
616 
617  return true;
618  }
619 
620 private:
621 
622  // # of dimensions in the stored coordinates, transformed or actual
623  int transformed_networkDim;
624  int actual_networkDim;
625 
626  // Machine Coordinates
627  pcoord_t **transformed_procCoords;
628  pcoord_t **actual_procCoords;
629 
630  // Maximum extents for each dimension, transformed or actual
631  part_t *transformed_machine_extent;
632  part_t *actual_machine_extent;
633 
634  // Number of groups (RCA X-dim) with nonzero nodes allocated
635  part_t num_unique_groups;
636  // Distribution of nodes in each group (zero node groups have been trimmed)
637  part_t *group_count;
638 
639  // Are our coordinates transformed?
640  bool is_transformed;
641 
642  const Teuchos::ParameterList *pl;
643 
644  // reduceAll the machine coordinates
645  void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
646  const Teuchos::Comm<int> &comm) {
647  // Reduces and stores all machine coordinates.
648  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
649 
650  for (int i = 0; i < netDim; ++i) {
651  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
652  this->numRanks,
653  coords[i], tmpVect);
654  pcoord_t *tmp = tmpVect;
655  tmpVect = coords[i];
656  coords[i] = tmp;
657  }
658  delete [] tmpVect;
659  }
660 
661 };
662 
663 } // namespace Zoltan2
664 
665 #endif
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getTransformedMachineExtent(int *nxyz) const
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineClass Base class for representing machine coordinates, networks, etc.
bool getGroupCount(part_t *grp_count) const override
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...
SparseMatrixAdapter_t::part_t part_t
MachineDragonflyRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) network machine description;.
MachineDragonflyRCA(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) network machine description;.
A Dragonfly (e.g. Cori, Trinity, &amp; Theta) Machine Class for task mapping.