Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineDragonflyRCA.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
11 #define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIB_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 #include <Zoltan2_Machine.hpp>
16 
17 #ifdef HAVE_ZOLTAN2_RCALIB
18 extern "C"{
19 #include <rca_lib.h>
20 }
21 #endif
22 
23 namespace Zoltan2{
24 
81 template <typename pcoord_t, typename part_t>
82 class MachineDragonflyRCA : public Machine <pcoord_t, part_t> {
83 
84 public:
85 
93  MachineDragonflyRCA(const Teuchos::Comm<int> &comm):
94  Machine<pcoord_t,part_t>(comm),
95  transformed_networkDim(3),
96  actual_networkDim(3),
97  transformed_procCoords(NULL),
98  actual_procCoords(NULL),
99  transformed_machine_extent(NULL),
100  actual_machine_extent(NULL),
101  num_unique_groups(0),
102  group_count(NULL),
103  is_transformed(false),
104  pl(NULL) {
105 
106  actual_machine_extent = new int[actual_networkDim];
107  this->getActualMachineExtent(this->actual_machine_extent);
108 
109  // Number of ranks in each Dragonfly network group
110  // (i.e. RCA's X coord == Grp g)
111  group_count = new part_t[actual_machine_extent[0]];
112 
113  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
114 
115  // Transformed dims = 1 + N_y + N_z
116  transformed_networkDim = 1 + actual_machine_extent[1] +
117  actual_machine_extent[2];
118  transformed_machine_extent = new int[transformed_networkDim];
119 
120  // Allocate memory for processor coords
121  actual_procCoords = new pcoord_t *[actual_networkDim];
122  transformed_procCoords = new pcoord_t *[transformed_networkDim];
123 
124  for (int i = 0; i < actual_networkDim; ++i) {
125  actual_procCoords[i] = new pcoord_t[this->numRanks];
126  memset(actual_procCoords[i], 0,
127  sizeof(pcoord_t) * this->numRanks);
128  }
129 
130  pcoord_t *xyz = new pcoord_t[transformed_networkDim];
132  for (int i = 0; i < actual_networkDim; ++i)
133  actual_procCoords[i][this->myRank] = xyz[i];
134  delete [] xyz;
135 
136  // Gather number of ranks in each Dragonfly network group from
137  // across all ranks
138  part_t * tmp_vec = new part_t[actual_machine_extent[0]];
139  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
140 
141  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
142  actual_machine_extent[0],
143  group_count,
144  tmp_vec);
145 
146  // remove zero entries from reduced array
147  num_unique_groups = 0;
148 
149  for (int i = 0; i < actual_machine_extent[0]; ++i) {
150  if (tmp_vec[i] > 0) {
151  ++num_unique_groups;
152  }
153  }
154 
155  // Reset group_count array to new size
156  delete[] group_count;
157  group_count = new part_t[num_unique_groups];
158 
159  int pos = 0;
160  for (int i = 0; i < actual_machine_extent[0]; ++i) {
161  if (tmp_vec[i] > 0) {
162  group_count[pos] = tmp_vec[i];
163  ++pos;
164  }
165  }
166 
167  delete[] tmp_vec;
168 
169  // reduceAll the coordinates of each processor.
170  gatherMachineCoordinates(this->actual_procCoords,
171  this->actual_networkDim, comm);
172  }
173 
174  // No necessary wrap arounds for dragonfly networks. Groups
175  // have wrap around, but group all-to-all connection makes unneccessary.
176  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
177  return false;
178  }
179 
180 
190  MachineDragonflyRCA(const Teuchos::Comm<int> &comm,
191  const Teuchos::ParameterList &pl_ ):
192  Machine<pcoord_t,part_t>(comm),
193  transformed_networkDim(3),
194  actual_networkDim(3),
195  transformed_procCoords(NULL),
196  actual_procCoords(NULL),
197  transformed_machine_extent(NULL),
198  actual_machine_extent(NULL),
199  num_unique_groups(0),
200  group_count(NULL),
201  is_transformed(false),
202  pl(&pl_)
203  {
204  actual_machine_extent = new int[actual_networkDim];
205  this->getActualMachineExtent(this->actual_machine_extent);
206 
207  // Number of parts in each Group (i.e. RCA's X coord == Grp g)
208  group_count = new part_t[actual_machine_extent[0]];
209 
210  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
211 
212  // Allocate memory for processor coords
213  actual_procCoords = new pcoord_t *[actual_networkDim];
214  transformed_procCoords = new pcoord_t *[transformed_networkDim];
215 
216  pcoord_t *xyz = new pcoord_t[actual_networkDim];
218 
219  // Gather number of ranks in each Dragonfly network group
220  // from across all ranks
221  part_t * tmp_vec = new part_t[actual_machine_extent[0]];
222  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
223 
224  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
225  actual_machine_extent[0],
226  group_count,
227  tmp_vec);
228 
229  // Remove zero entries from reduced array
230  num_unique_groups = 0;
231 
232  for (int i = 0; i < actual_machine_extent[0]; ++i) {
233  if (tmp_vec[i] > 0) {
234  ++num_unique_groups;
235  }
236  }
237 
238  // Reset group_count array to new size
239  delete[] group_count;
240  group_count = new part_t[num_unique_groups];
241 
242  int pos = 0;
243  for (int i = 0; i < actual_machine_extent[0]; ++i) {
244  if (tmp_vec[i] > 0) {
245  group_count[pos] = tmp_vec[i];
246  ++pos;
247  }
248  }
249  delete[] tmp_vec;
250 
251  const Teuchos::ParameterEntry *pe2 =
252  this->pl->getEntryPtr("Machine_Optimization_Level");
253 
254  // Transform with mach opt level
255  if (pe2) {
256  int optimization_level;
257  optimization_level = pe2->getValue<int>(&optimization_level);
258 
259  if (optimization_level > 0) {
260  is_transformed = true;
261 
262  // Transformed dims = 1 + N_y + N_z
263  transformed_networkDim = 1 + actual_machine_extent[1] +
264  actual_machine_extent[2];
265  transformed_machine_extent = new int[transformed_networkDim];
266 
267  transformed_procCoords = new pcoord_t *[transformed_networkDim];
268 
269  // Allocate memory for transformed coordinates
270  for (int i = 0; i < transformed_networkDim; ++i) {
271  transformed_procCoords[i] = new pcoord_t[this->numRanks];
272  memset(transformed_procCoords[i], 0,
273  sizeof(pcoord_t) * this->numRanks);
274  }
275 
276  // Calculate transformed coordinates and machine extents
277  int nx = this->actual_machine_extent[0];
278  int ny = this->actual_machine_extent[1];
279  int nz = this->actual_machine_extent[2];
280 
281  const Teuchos::ParameterEntry *pe_x =
282  this->pl->getEntryPtr("Machine_X_Stretch");
283  const Teuchos::ParameterEntry *pe_y =
284  this->pl->getEntryPtr("Machine_Y_Stretch");
285  const Teuchos::ParameterEntry *pe_z =
286  this->pl->getEntryPtr("Machine_Z_Stretch");
287 
288  // Default X,Y,Z stretches
289  int x_stretch = 3;
290  int y_stretch = 2;
291  int z_stretch = 1;
292 
293  if (pe_x)
294  x_stretch = pe_x->getValue<int>(&x_stretch);
295  if (pe_y)
296  y_stretch = pe_y->getValue<int>(&y_stretch);
297  if (pe_z)
298  z_stretch = pe_z->getValue<int>(&z_stretch);
299 
300  // Transform X coords
301  transformed_procCoords[0][this->myRank] =
302  x_stretch * xyz[0] * ny * nz;
303 
304  // Transform Y coords
305  for (int i = 1; i < 1 + ny; ++i) {
306  // Shift y-coord given a group, xyz[0];
307  transformed_procCoords[i][this->myRank] = 0;
308  // Increment in the dim where y-coord present
309  if (xyz[1] == i - 1)
310  transformed_procCoords[i][this->myRank] = y_stretch;
311  }
312  // Transform Z coords
313  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
314  // Shift z-coord given a group, xyz[0];
315  transformed_procCoords[i][this->myRank] = 0;
316  // Increment in the dim where z-coord present
317  if (xyz[2] == i - (1 + ny))
318  transformed_procCoords[i][this->myRank] = z_stretch;
319  }
320 
321  this->transformed_machine_extent = new int[transformed_networkDim];
322 
323  // Maximum extents in shifted high dim coordinate system
324  this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
325  for (int i = 1; i < 1 + ny; ++i) {
326  this->transformed_machine_extent[i] = y_stretch;
327  }
328  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
329  this->transformed_machine_extent[i] = z_stretch;
330  }
331 
332  // reduceAll the transformed coordinates of each processor.
333  gatherMachineCoordinates(this->transformed_procCoords,
334  this->transformed_networkDim, comm);
335 
336  this->printAllocation();
337  }
338  }
339  // If no coordinate transformation, gather actual coords
340  if (!is_transformed) {
341 
342  for (int i = 0; i < actual_networkDim; ++i) {
343  actual_procCoords[i] = new pcoord_t[this->numRanks];
344  memset(actual_procCoords[i], 0,
345  sizeof(pcoord_t) * this->numRanks);
346  }
347 
348  for (int i = 0; i < actual_networkDim; ++i)
349  actual_procCoords[i][this->myRank] = xyz[i];
350 
351  // reduceAll the actual coordinates of each processor
352  gatherMachineCoordinates(this->actual_procCoords,
353  this->actual_networkDim, comm);
354 
355  this->printAllocation();
356  }
357  delete [] xyz;
358  }
359 
360  // Destructor
362  if (is_transformed) {
363  is_transformed = false;
364  if (this->numRanks > 1) {
365  for (int i = 0; i < transformed_networkDim; ++i) {
366  delete [] transformed_procCoords[i];
367  }
368  }
369  delete [] transformed_machine_extent;
370  }
371  else {
372  if (this->numRanks > 1) {
373  for (int i = 0; i < actual_networkDim; ++i) {
374  delete [] actual_procCoords[i];
375  }
376  }
377  }
378 
379  delete [] actual_procCoords;
380  delete [] transformed_procCoords;
381 
382  delete [] actual_machine_extent;
383  delete [] group_count;
384  }
385 
386  bool hasMachineCoordinates() const { return true; }
387 
388  // Return dimensions of coords, transformed or actual
389  int getMachineDim() const {
390  if (is_transformed)
391  return this->transformed_networkDim;
392  else
393  return this->actual_networkDim;
394  }
395 
396  // Return the transformed maximum machine extents
397  bool getTransformedMachineExtent(int *nxyz) const {
398  if (is_transformed) {
399  for (int dim = 0; dim < transformed_networkDim; ++dim)
400  nxyz[dim] = this->transformed_machine_extent[dim];
401 
402  return true;
403  }
404  else
405  return false;
406  }
407 
408  // Return the actual RCA maximum machine extents
409  bool getActualMachineExtent(int *nxyz) const {
410 #if defined (HAVE_ZOLTAN2_RCALIB)
411  mesh_coord_t mxyz;
412  rca_get_max_dimension(&mxyz);
413 
414  int dim = 0; // Example extents on Cori
415  nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
416  nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
417  nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
418  return true;
419 #else
420  return false;
421 #endif
422  }
423 
424  // Return machine extents, transformed or actual
425  bool getMachineExtent(int *nxyz) const {
426  if (is_transformed)
427  this->getTransformedMachineExtent(nxyz);
428  else
429  this->getActualMachineExtent(nxyz);
430 
431  return true;
432  }
433 
434  // Return number of groups (RCA X-dim) with allocated nodes
435  part_t getNumUniqueGroups() const override{
436  return this->num_unique_groups;
437  }
438 
439  // Return number of ranks in each group (RCA X-dim) in an allocation
440  bool getGroupCount(part_t *grp_count) const override {
441 
442  if (group_count != NULL) {
443  for (int i = 0; i < num_unique_groups; ++i) {
444  grp_count[i] = this->group_count[i];
445  }
446 
447  return true;
448  }
449  else
450  return false;
451  }
452 
453  // Print allocation coords and extents on rank 0, transformed or actual
455  if (this->myRank == 0) {
456  // Print transformed coordinates and extents
457  if (is_transformed) {
458  for (int i = 0; i < this->numRanks; ++i) {
459  std::cout << "Rank:" << i;
460  for (int j = 0; j < this->transformed_networkDim; ++j) {
461  std::cout << " " << transformed_procCoords[j][i];
462  }
463  std::cout << std::endl;
464  }
465 
466  std::cout << std::endl << "Transformed Machine Extent: ";
467  for (int i = 0; i < this->transformed_networkDim; ++i) {
468  std::cout << " " << this->transformed_machine_extent[i];
469  }
470  std::cout << std::endl;
471  }
472  // Print actual coordinates and extents
473  else {
474  for (int i = 0; i < this->numRanks; ++i) {
475  std::cout << "Rank:" << i;
476  for (int j = 0; j < this->actual_networkDim; ++j) {
477  std::cout << " " << actual_procCoords[j][i];
478  }
479  std::cout << std::endl;
480  }
481 
482  std::cout << std::endl << "Actual Machine Extent: ";
483  for (int i = 0; i < this->actual_networkDim; ++i) {
484  std::cout << " " << this->actual_machine_extent[i];
485  }
486  std::cout << std::endl;
487  }
488  }
489  }
490 
491  // Return transformed coord for this rank
492  bool getMyTransformedMachineCoordinate(pcoord_t *xyz) {
493  if (is_transformed) {
494  for (int i = 0; i < this->transformed_networkDim; ++i) {
495  xyz[i] = transformed_procCoords[i][this->myRank];
496  }
497 
498  return true;
499  }
500  else
501  return false;
502  }
503 
504  // Return actual RCA coord for this rank
505  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
506 #if defined (HAVE_ZOLTAN2_RCALIB)
507  // Cray node info for current node
508  rs_node_t nodeInfo;
509  rca_get_nodeid(&nodeInfo);
510 
511  // Current node ID
512  int NIDs = (int)nodeInfo.rs_node_s._node_id;
513 
514  mesh_coord_t node_coord;
515  int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
516  if (returnval == -1) {
517  return false;
518  }
519 
520  int x = node_coord.mesh_x;
521  int y = node_coord.mesh_y;
522  int z = node_coord.mesh_z;
523 
524  xyz[0] = x;
525  xyz[1] = y;
526  xyz[2] = z;
527 
528  group_count[x]++;
529 
530  return true;
531 #else
532  return false;
533 #endif
534  }
535 
536  // Return machine coordinate for this rank, transformed or actual
537  bool getMyMachineCoordinate(pcoord_t *xyz) {
538  if (is_transformed)
540  else
541  this->getMyActualMachineCoordinate(xyz);
542 
543  return true;
544  }
545 
546  // Return machine coord of given rank, transformed or actual
547  inline bool getMachineCoordinate(const int rank,
548  pcoord_t *xyz) const {
549  if (is_transformed) {
550  for (int i = 0; i < this->transformed_networkDim; ++i) {
551  xyz[i] = transformed_procCoords[i][rank];
552  }
553  }
554  else {
555  for (int i = 0; i < this->actual_networkDim; ++i) {
556  xyz[i] = actual_procCoords[i][rank];
557  }
558  }
559 
560  return true;
561  }
562 
563  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
564  return false; // cannot yet return from nodename
565  }
566 
567  // Return view of all machine coords, transformed or actual
568  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
569  if (is_transformed) {
570  allCoords = transformed_procCoords;
571  }
572  else {
573  allCoords = actual_procCoords;
574  }
575 
576  return true;
577  }
578 
579  // Return (approx) hop count from rank1 to rank2. Does not account for
580  // Dragonfly's dynamic routing.
581  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
582  hops = 0;
583 
584  if (is_transformed) {
585  // Case: ranks in different groups
586  // Does not account for location of group to group connection.
587  // (Most group to group messages will take 5 hops)
588  if (this->transformed_procCoords[0][rank1] !=
589  this->transformed_procCoords[0][rank2])
590  {
591  hops = 5;
592  return true;
593  }
594 
595  // Case: ranks in same group
596  // For each 2 differences in transformed_coordinates then
597  // 1 hop
598  for (int i = 1; i < this->transformed_networkDim; ++i) {
599  if (this->transformed_procCoords[i][rank1] !=
600  this->transformed_procCoords[i][rank2])
601  ++hops;
602  }
603  hops /= 2;
604  }
605  else {
606  // Case: ranks in different groups
607  // Does not account for location of group to group connection.
608  // (Nearly all group to group messages will take 5 hops)
609  if (this->actual_procCoords[0][rank1] !=
610  this->actual_procCoords[0][rank2])
611  {
612  hops = 5;
613  return true;
614  }
615 
616  // Case: ranks in same group
617  // For each difference in actual_coordinates then
618  // 1 hop
619  for (int i = 1; i < actual_networkDim; ++i) {
620  if (this->actual_procCoords[i][rank1] !=
621  this->actual_procCoords[i][rank2])
622  ++hops;
623  }
624  }
625 
626  return true;
627  }
628 
629 private:
630 
631  // # of dimensions in the stored coordinates, transformed or actual
632  int transformed_networkDim;
633  int actual_networkDim;
634 
635  // Machine Coordinates
636  pcoord_t **transformed_procCoords;
637  pcoord_t **actual_procCoords;
638 
639  // Maximum extents for each dimension, transformed or actual
640  part_t *transformed_machine_extent;
641  part_t *actual_machine_extent;
642 
643  // Number of groups (RCA X-dim) with nonzero nodes allocated
644  part_t num_unique_groups;
645  // Distribution of nodes in each group (zero node groups have been trimmed)
646  part_t *group_count;
647 
648  // Are our coordinates transformed?
649  bool is_transformed;
650 
651  const Teuchos::ParameterList *pl;
652 
653  // reduceAll the machine coordinates
654  void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
655  const Teuchos::Comm<int> &comm) {
656  // Reduces and stores all machine coordinates.
657  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
658 
659  for (int i = 0; i < netDim; ++i) {
660  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
661  this->numRanks,
662  coords[i], tmpVect);
663  pcoord_t *tmp = tmpVect;
664  tmpVect = coords[i];
665  coords[i] = tmp;
666  }
667  delete [] tmpVect;
668  }
669 
670 };
671 
672 } // namespace Zoltan2
673 
674 #endif
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getTransformedMachineExtent(int *nxyz) const
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineClass Base class for representing machine coordinates, networks, etc.
bool getGroupCount(part_t *grp_count) const override
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...
SparseMatrixAdapter_t::part_t part_t
MachineDragonflyRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) network machine description;.
MachineDragonflyRCA(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) network machine description;.
A Dragonfly (e.g. Cori, Trinity, &amp; Theta) Machine Class for task mapping.