Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineDragonflyRCAForTesting.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
2 #define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
3 
4 #include <Teuchos_Comm.hpp>
5 #include <Teuchos_CommHelpers.hpp>
6 #include <Zoltan2_Machine.hpp>
7 
8 #include <cstdlib> /* srand, rand */
9 #include <fstream>
10 #include <string>
11 
12 namespace Zoltan2{
13 
21 template <typename pcoord_t, typename part_t>
22 class MachineDragonflyRCAForTesting : public Machine <pcoord_t, part_t> {
23 
24 public:
33  MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm):
34  Machine<pcoord_t,part_t>(comm),
35  transformed_networkDim(3),
36  actual_networkDim(3),
37  transformed_procCoords(NULL),
38  actual_procCoords(NULL),
39  transformed_machine_extent(NULL),
40  actual_machine_extent(NULL),
41  num_unique_groups(0),
42  group_count(NULL),
43  is_transformed(false),
44  pl(NULL) {
45 
46  actual_machine_extent = new int[actual_networkDim];
47  this->getActualMachineExtent(this->actual_machine_extent);
48 
49  // Number of ranks in each Dragonfly network group
50  // (i.e. RCA's X coord == Grp g)
51  group_count = new part_t[actual_machine_extent[0]];
52 
53  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
54 
55  // Transformed dims = 1 + N_y + N_z
56  transformed_networkDim = 1 + actual_machine_extent[1] +
57  actual_machine_extent[2];
58  transformed_machine_extent = new int[transformed_networkDim];
59 
60  // Allocate memory for processor coords
61  actual_procCoords = new pcoord_t *[actual_networkDim];
62  transformed_procCoords = new pcoord_t *[transformed_networkDim];
63 
64  for (int i = 0; i < actual_networkDim; ++i) {
65  actual_procCoords[i] = new pcoord_t[this->numRanks];
66  memset(actual_procCoords[i], 0,
67  sizeof(pcoord_t) * this->numRanks);
68  }
69 
70  pcoord_t *xyz = new pcoord_t[transformed_networkDim];
72  for (int i = 0; i < actual_networkDim; ++i)
73  actual_procCoords[i][this->myRank] = xyz[i];
74  delete [] xyz;
75 
76  // Gather number of ranks in each Dragonfly network group
77  // from across all ranks
78  part_t *tmp_vec = new part_t[actual_machine_extent[0]];
79  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
80 
81  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
82  actual_machine_extent[0],
83  group_count,
84  tmp_vec);
85 
86  // remove zero entries from reduced array
87  num_unique_groups = 0;
88 
89  for (int i = 0; i < actual_machine_extent[0]; ++i) {
90  if (tmp_vec[i] > 0) {
91  ++num_unique_groups;
92  }
93  }
94 
95  // Reset group_count array to new size
96  delete[] group_count;
97  group_count = new part_t[num_unique_groups];
98 
99  int pos = 0;
100  for (int i = 0; i < actual_machine_extent[0]; ++i) {
101  if (tmp_vec[i] > 0) {
102  group_count[pos] = tmp_vec[i];
103  ++pos;
104  }
105  }
106 
107  delete[] tmp_vec;
108 
109  // reduceAll the coordinates of each processor.
110  gatherMachineCoordinates(this->actual_procCoords,
111  this->actual_networkDim, comm);
112  }
113 
114  // No necessary wrap arounds for dragonfly networks. Groups
115  // have wrap around, but group all-to-all connection makes unneccessary.
116  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
117  return false;
118  }
119 
120 
130  MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm,
131  const Teuchos::ParameterList &pl_ ):
132  Machine<pcoord_t,part_t>(comm),
133  transformed_networkDim(3),
134  actual_networkDim(3),
135  transformed_procCoords(NULL),
136  actual_procCoords(NULL),
137  transformed_machine_extent(NULL),
138  actual_machine_extent(NULL),
139  num_unique_groups(0),
140  group_count(NULL),
141  is_transformed(false),
142  pl(&pl_) {
143 
144  actual_machine_extent = new int[actual_networkDim];
145  this->getActualMachineExtent(this->actual_machine_extent);
146 
147  // Number of parts in each Dragonfly network group
148  // (i.e. RCA's X coord == Grp g)
149  group_count = new part_t[actual_machine_extent[0]];
150 
151  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
152 
153  // Allocate memory for processor coords
154  actual_procCoords = new pcoord_t *[actual_networkDim];
155  transformed_procCoords = new pcoord_t *[transformed_networkDim];
156 
157  pcoord_t *xyz = new pcoord_t[actual_networkDim];
159 
160  // Gather number of ranks in each Dragonfly network group
161  // from across all ranks
162  part_t *tmp_vec = new part_t[actual_machine_extent[0]];
163  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
164 
165  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
166  actual_machine_extent[0],
167  group_count,
168  tmp_vec);
169 
170  // remove zero entries from reduced vector
171  num_unique_groups = 0;
172 
173  for (int i = 0; i < actual_machine_extent[0]; ++i) {
174  if (tmp_vec[i] > 0) {
175  ++num_unique_groups;
176  }
177  }
178 
179  // Reset group_count array to new size (# of nonzero groups)
180  delete[] group_count;
181  group_count = new part_t[num_unique_groups];
182 
183  int pos = 0;
184  for (int i = 0; i < actual_machine_extent[0]; ++i)
185  {
186  if (tmp_vec[i] > 0) {
187  group_count[pos] = tmp_vec[i];
188  ++pos;
189  }
190  }
191  delete[] tmp_vec;
192 
193  const Teuchos::ParameterEntry *pe2 =
194  this->pl->getEntryPtr("Machine_Optimization_Level");
195 
196  // Transform with mach opt level
197  if (pe2) {
198  int optimization_level;
199  optimization_level = pe2->getValue<int>(&optimization_level);
200 
201  if (optimization_level > 0) {
202  is_transformed = true;
203 
204  // Transformed dims = 1 + N_y + N_z
205  transformed_networkDim = 1 + actual_machine_extent[1] +
206  actual_machine_extent[2];
207  transformed_machine_extent = new int[transformed_networkDim];
208 
209  transformed_procCoords = new pcoord_t *[transformed_networkDim];
210 
211  // Allocate memory for transformed coordinates
212  for (int i = 0; i < transformed_networkDim; ++i) {
213  transformed_procCoords[i] = new pcoord_t[this->numRanks];
214  memset(transformed_procCoords[i], 0,
215  sizeof(pcoord_t) * this->numRanks);
216  }
217 
218  // Calculate transformed coordinates and machine extents
219  int nx = this->actual_machine_extent[0];
220  int ny = this->actual_machine_extent[1];
221  int nz = this->actual_machine_extent[2];
222 
223  const Teuchos::ParameterEntry *pe_x =
224  this->pl->getEntryPtr("Machine_X_Stretch");
225  const Teuchos::ParameterEntry *pe_y =
226  this->pl->getEntryPtr("Machine_Y_Stretch");
227  const Teuchos::ParameterEntry *pe_z =
228  this->pl->getEntryPtr("Machine_Z_Stretch");
229 
230  // Default X,Y,Z stretches
231  int x_stretch = 3;
232  int y_stretch = 2;
233  int z_stretch = 1;
234 
235  if (pe_x)
236  x_stretch = pe_x->getValue<int>(&x_stretch);
237  if (pe_y)
238  y_stretch = pe_y->getValue<int>(&y_stretch);
239  if (pe_x)
240  z_stretch = pe_z->getValue<int>(&z_stretch);
241 
242  // Transform X coords
243  transformed_procCoords[0][this->myRank] =
244  x_stretch * xyz[0] * ny * nz;
245 
246  // Transform Y coords
247  for (int i = 1; i < 1 + ny; ++i) {
248  // Shift y-coord given a group, xyz[0];
249  transformed_procCoords[i][this->myRank] = 0;
250  // Increment in the dim where y-coord present
251  if (xyz[1] == i - 1) {
252  transformed_procCoords[i][this->myRank] = y_stretch;
253  }
254  }
255  // Transform Z coords
256  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
257  // Shift z-coord given a group, xyz[0];
258  transformed_procCoords[i][this->myRank] = 0;
259  // Increment in the dim where z-coord present
260  if (xyz[2] == i - (1 + ny))
261  transformed_procCoords[i][this->myRank] = z_stretch;
262  }
263 
264  this->transformed_machine_extent = new int[transformed_networkDim];
265 
266  // Maximum extents in shifted high dim coordinate system
267  this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
268  for (int i = 1; i < 1 + ny; ++i) {
269  this->transformed_machine_extent[i] = y_stretch;
270  }
271  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
272  this->transformed_machine_extent[i] = z_stretch;
273  }
274 
275  // reduceAll the transformed coordinates of each processor.
276  gatherMachineCoordinates(this->transformed_procCoords,
277  this->transformed_networkDim, comm);
278 
279  this->printAllocation();
280  }
281  }
282  // If no coordinate transformation, gather actual coords
283  if (!is_transformed) {
284 
285  for (int i = 0; i < actual_networkDim; ++i) {
286  actual_procCoords[i] = new pcoord_t[this->numRanks];
287  memset(actual_procCoords[i], 0,
288  sizeof(pcoord_t) * this->numRanks);
289  }
290 
291  for (int i = 0; i < actual_networkDim; ++i)
292  actual_procCoords[i][this->myRank] = xyz[i];
293 
294  // reduceAll the actual coordinates of each processor
295  gatherMachineCoordinates(this->actual_procCoords,
296  this->actual_networkDim, comm);
297 
298  this->printAllocation();
299  }
300  delete [] xyz;
301  }
302 
303  // Destructor
305  if (is_transformed) {
306  is_transformed = false;
307  if (this->numRanks > 1) {
308  for (int i = 0; i < transformed_networkDim; ++i) {
309  delete [] transformed_procCoords[i];
310  }
311  }
312  delete [] transformed_machine_extent;
313  }
314  else {
315  if (this->numRanks > 1) {
316  for (int i = 0; i < actual_networkDim; ++i) {
317  delete [] actual_procCoords[i];
318  }
319  }
320  }
321 
322  delete [] actual_procCoords;
323  delete [] transformed_procCoords;
324 
325  delete [] actual_machine_extent;
326  delete [] group_count;
327  }
328 
329  bool hasMachineCoordinates() const { return true; }
330 
331  // Return dimensions of coords, transformed or actual
332  int getMachineDim() const {
333  if (is_transformed)
334  return this->transformed_networkDim;
335  else
336  return this->actual_networkDim;
337  }
338 
339  // Return the transformed maximum machine extents
340  bool getTransformedMachineExtent(int *nxyz) const {
341  if (is_transformed) {
342  for (int dim = 0; dim < transformed_networkDim; ++dim)
343  nxyz[dim] = this->transformed_machine_extent[dim];
344 
345  return true;
346  }
347  else
348  return false;
349  }
350 
351  // Return the fake "RCA" machine extents for testing
352  bool getActualMachineExtent(int *nxyz) const {
353 /*
354 #if defined (HAVE_ZOLTAN2_RCALIB)
355  mesh_coord_t mxyz;
356  rca_get_max_dimension(&mxyz);
357 
358  int dim = 0;
359  nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
360  nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
361  nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
362  return true;
363 #else
364  return false;
365 #endif
366 */
367 
368  nxyz[0] = 11; // X - group
369  nxyz[1] = 6; // Y - row within group
370  nxyz[2] = 16; // Z - col within group
371 
372  // Needed for test/unit_test/Machine.cpp PASS
373 // nxyz[0] = 4;
374 // nxyz[1] = 8;
375 // nxyz[2] = 12;
376 
377  return true;
378  }
379 
380  // Return machine extents, transformed or actual
381  bool getMachineExtent(int *nxyz) const {
382  if (is_transformed)
383  this->getTransformedMachineExtent(nxyz);
384  else
385  this->getActualMachineExtent(nxyz);
386 
387  return true;
388  }
389 
390  // Return number of groups (RCA X-dim) with allocated nodes
391  part_t getNumUniqueGroups() const override{
392  return this->num_unique_groups;
393  }
394 
395  // Return number of ranks in each group (RCA X-dim) in an allocation
396  bool getGroupCount(part_t *grp_count) const override {
397 
398  if (group_count != NULL) {
399  for (int i = 0; i < num_unique_groups; ++i) {
400  grp_count[i] = this->group_count[i];
401  }
402 
403  return true;
404  }
405  else
406  return false;
407  }
408 
409  // Print allocation coords and extents on rank 0, transformed or actual
411  if (this->myRank >= 0) {
412  // Print transformed coordinates and extents
413  if (is_transformed) {
414  for (int i = 0; i < this->numRanks; ++i) {
415  std::cout << "Rank:" << i << " ";
416  for (int j = 0; j < this->transformed_networkDim; ++j) {
417  std::cout << " " << this->transformed_procCoords[j][i];
418  }
419  std::cout << std::endl;
420  }
421 
422  std::cout << std::endl << "Transformed Machine Extent: ";
423  for (int i = 0; i < this->transformed_networkDim; ++i) {
424  std::cout << " " << this->transformed_machine_extent[i];
425  }
426  std::cout << std::endl;
427  }
428  // Print actual coordinates and extents
429  else {
430  for (int i = 0; i < this->numRanks; ++i) {
431  std::cout << "Rank:" << i;
432  for (int j = 0; j < this->actual_networkDim; ++j) {
433  std::cout << " " << actual_procCoords[j][i];
434  }
435  std::cout << std::endl;
436  }
437 
438  std::cout << std::endl << "Actual Machine Extent: ";
439  for (int i = 0; i < this->actual_networkDim; ++i) {
440  std::cout << " " << this->actual_machine_extent[i];
441  }
442  std::cout << std::endl;
443  }
444  }
445  }
446 
447  // Return transformed coord for this rank
448  bool getMyTransformedMachineCoordinate(pcoord_t *xyz) {
449  if (is_transformed) {
450  for (int i = 0; i < this->transformed_networkDim; ++i) {
451  xyz[i] = transformed_procCoords[i][this->myRank];
452  }
453 
454  return true;
455  }
456  else
457  return false;
458  }
459 
460  // Return the fake "RCA" coord for this rank for testing
461  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
462 /*
463 #if defined (HAVE_ZOLTAN2_RCALIB)
464  // Cray node info for current node
465  rs_node_t nodeInfo;
466  rca_get_nodeid(&nodeInfo);
467 
468  // Current node ID
469  int NIDs = (int)nodeInfo.rs_node_s._node_id;
470 
471  mesh_coord_t node_coord;
472  int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
473  if (returnval == -1) {
474  return false;
475  }
476  xyz[0] = node_coord.mesh_x;
477  xyz[1] = node_coord.mesh_y;
478  xyz[2] = node_coord.mesh_z;
479  return true;
480 #else
481  return false;
482 #endif
483 */
484  srand(this->myRank);
485 
486  int x = rand() % 11;
487  int y = rand() % 6;
488  int z = rand() % 16;
489 
490  xyz[0] = x;
491  xyz[1] = y;
492  xyz[2] = z;
493 
494  // Needed for test/unit_test/Machine.cpp PASS
495 // xyz[0] = this->myRank;
496 // xyz[1] = this->numRanks;
497 // xyz[2] = this->numRanks + 1;
498 
499  group_count[x]++;
500 
501  return true;
502  }
503 
504  // Return machine coordinate for this rank, transformed or actual
505  bool getMyMachineCoordinate(pcoord_t *xyz) {
506  if (is_transformed)
508  else
509  this->getMyActualMachineCoordinate(xyz);
510 
511  return true;
512  }
513 
514  // Return machine coord of given rank, transformed or actual
515  inline bool getMachineCoordinate(const int rank,
516  pcoord_t *xyz) const {
517  if (is_transformed) {
518  for (int i = 0; i < this->transformed_networkDim; ++i) {
519  xyz[i] = transformed_procCoords[i][rank];
520  }
521  }
522  else {
523  for (int i = 0; i < this->actual_networkDim; ++i) {
524  xyz[i] = actual_procCoords[i][rank];
525  }
526  }
527 
528  return true;
529  }
530 
531  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
532  return false; // cannot yet return from nodename
533  }
534 
535  // Return view of all machine coords, transformed or actual
536  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
537  if (is_transformed) {
538  allCoords = transformed_procCoords;
539  }
540  else {
541  allCoords = actual_procCoords;
542  }
543 
544  return true;
545  }
546 
547  // Return (approx) hop count from rank1 to rank2. Does not account for
548  // Dragonfly's dynamic routing.
549  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) {
550  hops = 0;
551 
552  if (rank1 == rank2)
553  return true;
554  if (rank1 >= this->numRanks || rank2 >= this->numRanks) {
555  std::cerr << "Rank outside bounds for the machine ranks";
556  exit(1);
557  }
558 
559  if (this->is_transformed) {
560  // Case: ranks in different groups (i.e. different RCA x-coords)
561  // Does not account for location of group to group connection.
562  // (Most group to group messages will take 5 hops)
563  if (this->transformed_procCoords[0][rank1] !=
564  this->transformed_procCoords[0][rank2])
565  {
566  hops = 5;
567 
568  return true;
569  }
570 
571  // Case: ranks in same group
572  // For each 2 differences in transformed_coordinates then
573  // 1 hop
574  for (int i = 1; i < this->transformed_networkDim; ++i) {
575  if (this->transformed_procCoords[i][rank1] !=
576  this->transformed_procCoords[i][rank2])
577  ++hops;
578  }
579  hops /= 2;
580  }
581  else {
582  // Case: ranks in different groups
583  // Does not account for location of group to group connection.
584  // (Nearly all group to group messages will take 5 hops)
585  if (this->actual_procCoords[0][rank1] !=
586  this->actual_procCoords[0][rank2])
587  {
588  hops = 5;
589  return true;
590  }
591 
592  // Case: ranks in same group
593  // For each difference in actual_coordinates then
594  // 1 hop
595  for (int i = 1; i < this->actual_networkDim; ++i) {
596  if (this->actual_procCoords[i][rank1] !=
597  this->actual_procCoords[i][rank2])
598  ++hops;
599  }
600  }
601 
602  return true;
603  }
604 
605 private:
606 
607  // # of dimensions in the stored coordinates, transformed or actual
608  int transformed_networkDim;
609  int actual_networkDim;
610 
611  // Machine Coordinates
612  pcoord_t **transformed_procCoords;
613  pcoord_t **actual_procCoords;
614 
615  // Maximum extents for each dimension, transformed or actual
616  part_t *transformed_machine_extent;
617  part_t *actual_machine_extent;
618 
619  // Number of groups (RCA X-dim) with nonzero nodes allocated
620  part_t num_unique_groups;
621  // Distribution of nodes in each group (zero node groups have been trimmed)
622  part_t *group_count;
623 
624  // Are out coordinates transformed?
625  bool is_transformed;
626 
627  const Teuchos::ParameterList *pl;
628 
629 
630  // reduceAll the machine coordinates
631  void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
632  const Teuchos::Comm<int> &comm) {
633  // Reduces and stores all machine coordinates.
634  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
635 
636  for (int i = 0; i < netDim; ++i) {
637  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
638  this->numRanks,
639  coords[i], tmpVect);
640  pcoord_t *tmp = tmpVect;
641  tmpVect = coords[i];
642  coords[i] = tmp;
643  }
644  delete [] tmpVect;
645  }
646 
647 };
648 
649 } // namespace Zoltan2
650 
651 #endif
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops)
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) RCA network machine description;.
A Dragonfly (e.g. Cori, Trinity, Theta) Machine Class for testing only. A more realistic machine shou...
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) network machine description;.
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineClass Base class for representing machine coordinates, networks, etc.
SparseMatrixAdapter_t::part_t part_t
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...