Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineDragonflyRCAForTesting.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
11 #define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 #include <Zoltan2_Machine.hpp>
16 
17 #include <cstdlib> /* srand, rand */
18 #include <fstream>
19 #include <string>
20 
21 namespace Zoltan2{
22 
30 template <typename pcoord_t, typename part_t>
31 class MachineDragonflyRCAForTesting : public Machine <pcoord_t, part_t> {
32 
33 public:
42  MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm):
43  Machine<pcoord_t,part_t>(comm),
44  transformed_networkDim(3),
45  actual_networkDim(3),
46  transformed_procCoords(NULL),
47  actual_procCoords(NULL),
48  transformed_machine_extent(NULL),
49  actual_machine_extent(NULL),
50  num_unique_groups(0),
51  group_count(NULL),
52  is_transformed(false),
53  pl(NULL) {
54 
55  actual_machine_extent = new int[actual_networkDim];
56  this->getActualMachineExtent(this->actual_machine_extent);
57 
58  // Number of ranks in each Dragonfly network group
59  // (i.e. RCA's X coord == Grp g)
60  group_count = new part_t[actual_machine_extent[0]];
61 
62  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
63 
64  // Transformed dims = 1 + N_y + N_z
65  transformed_networkDim = 1 + actual_machine_extent[1] +
66  actual_machine_extent[2];
67  transformed_machine_extent = new int[transformed_networkDim];
68 
69  // Allocate memory for processor coords
70  actual_procCoords = new pcoord_t *[actual_networkDim];
71  transformed_procCoords = new pcoord_t *[transformed_networkDim];
72 
73  for (int i = 0; i < actual_networkDim; ++i) {
74  actual_procCoords[i] = new pcoord_t[this->numRanks];
75  memset(actual_procCoords[i], 0,
76  sizeof(pcoord_t) * this->numRanks);
77  }
78 
79  pcoord_t *xyz = new pcoord_t[transformed_networkDim];
81  for (int i = 0; i < actual_networkDim; ++i)
82  actual_procCoords[i][this->myRank] = xyz[i];
83  delete [] xyz;
84 
85  // Gather number of ranks in each Dragonfly network group
86  // from across all ranks
87  part_t *tmp_vec = new part_t[actual_machine_extent[0]];
88  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
89 
90  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
91  actual_machine_extent[0],
92  group_count,
93  tmp_vec);
94 
95  // remove zero entries from reduced array
96  num_unique_groups = 0;
97 
98  for (int i = 0; i < actual_machine_extent[0]; ++i) {
99  if (tmp_vec[i] > 0) {
100  ++num_unique_groups;
101  }
102  }
103 
104  // Reset group_count array to new size
105  delete[] group_count;
106  group_count = new part_t[num_unique_groups];
107 
108  int pos = 0;
109  for (int i = 0; i < actual_machine_extent[0]; ++i) {
110  if (tmp_vec[i] > 0) {
111  group_count[pos] = tmp_vec[i];
112  ++pos;
113  }
114  }
115 
116  delete[] tmp_vec;
117 
118  // reduceAll the coordinates of each processor.
119  gatherMachineCoordinates(this->actual_procCoords,
120  this->actual_networkDim, comm);
121  }
122 
123  // No necessary wrap arounds for dragonfly networks. Groups
124  // have wrap around, but group all-to-all connection makes unneccessary.
125  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
126  return false;
127  }
128 
129 
139  MachineDragonflyRCAForTesting(const Teuchos::Comm<int> &comm,
140  const Teuchos::ParameterList &pl_ ):
141  Machine<pcoord_t,part_t>(comm),
142  transformed_networkDim(3),
143  actual_networkDim(3),
144  transformed_procCoords(NULL),
145  actual_procCoords(NULL),
146  transformed_machine_extent(NULL),
147  actual_machine_extent(NULL),
148  num_unique_groups(0),
149  group_count(NULL),
150  is_transformed(false),
151  pl(&pl_) {
152 
153  actual_machine_extent = new int[actual_networkDim];
154  this->getActualMachineExtent(this->actual_machine_extent);
155 
156  // Number of parts in each Dragonfly network group
157  // (i.e. RCA's X coord == Grp g)
158  group_count = new part_t[actual_machine_extent[0]];
159 
160  memset(group_count, 0, sizeof(part_t) * actual_machine_extent[0]);
161 
162  // Allocate memory for processor coords
163  actual_procCoords = new pcoord_t *[actual_networkDim];
164  transformed_procCoords = new pcoord_t *[transformed_networkDim];
165 
166  pcoord_t *xyz = new pcoord_t[actual_networkDim];
168 
169  // Gather number of ranks in each Dragonfly network group
170  // from across all ranks
171  part_t *tmp_vec = new part_t[actual_machine_extent[0]];
172  memset(tmp_vec, 0, sizeof(part_t) * actual_machine_extent[0]);
173 
174  Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
175  actual_machine_extent[0],
176  group_count,
177  tmp_vec);
178 
179  // remove zero entries from reduced vector
180  num_unique_groups = 0;
181 
182  for (int i = 0; i < actual_machine_extent[0]; ++i) {
183  if (tmp_vec[i] > 0) {
184  ++num_unique_groups;
185  }
186  }
187 
188  // Reset group_count array to new size (# of nonzero groups)
189  delete[] group_count;
190  group_count = new part_t[num_unique_groups];
191 
192  int pos = 0;
193  for (int i = 0; i < actual_machine_extent[0]; ++i)
194  {
195  if (tmp_vec[i] > 0) {
196  group_count[pos] = tmp_vec[i];
197  ++pos;
198  }
199  }
200  delete[] tmp_vec;
201 
202  const Teuchos::ParameterEntry *pe2 =
203  this->pl->getEntryPtr("Machine_Optimization_Level");
204 
205  // Transform with mach opt level
206  if (pe2) {
207  int optimization_level;
208  optimization_level = pe2->getValue<int>(&optimization_level);
209 
210  if (optimization_level > 0) {
211  is_transformed = true;
212 
213  // Transformed dims = 1 + N_y + N_z
214  transformed_networkDim = 1 + actual_machine_extent[1] +
215  actual_machine_extent[2];
216  transformed_machine_extent = new int[transformed_networkDim];
217 
218  transformed_procCoords = new pcoord_t *[transformed_networkDim];
219 
220  // Allocate memory for transformed coordinates
221  for (int i = 0; i < transformed_networkDim; ++i) {
222  transformed_procCoords[i] = new pcoord_t[this->numRanks];
223  memset(transformed_procCoords[i], 0,
224  sizeof(pcoord_t) * this->numRanks);
225  }
226 
227  // Calculate transformed coordinates and machine extents
228  int nx = this->actual_machine_extent[0];
229  int ny = this->actual_machine_extent[1];
230  int nz = this->actual_machine_extent[2];
231 
232  const Teuchos::ParameterEntry *pe_x =
233  this->pl->getEntryPtr("Machine_X_Stretch");
234  const Teuchos::ParameterEntry *pe_y =
235  this->pl->getEntryPtr("Machine_Y_Stretch");
236  const Teuchos::ParameterEntry *pe_z =
237  this->pl->getEntryPtr("Machine_Z_Stretch");
238 
239  // Default X,Y,Z stretches
240  int x_stretch = 3;
241  int y_stretch = 2;
242  int z_stretch = 1;
243 
244  if (pe_x)
245  x_stretch = pe_x->getValue<int>(&x_stretch);
246  if (pe_y)
247  y_stretch = pe_y->getValue<int>(&y_stretch);
248  if (pe_x)
249  z_stretch = pe_z->getValue<int>(&z_stretch);
250 
251  // Transform X coords
252  transformed_procCoords[0][this->myRank] =
253  x_stretch * xyz[0] * ny * nz;
254 
255  // Transform Y coords
256  for (int i = 1; i < 1 + ny; ++i) {
257  // Shift y-coord given a group, xyz[0];
258  transformed_procCoords[i][this->myRank] = 0;
259  // Increment in the dim where y-coord present
260  if (xyz[1] == i - 1) {
261  transformed_procCoords[i][this->myRank] = y_stretch;
262  }
263  }
264  // Transform Z coords
265  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
266  // Shift z-coord given a group, xyz[0];
267  transformed_procCoords[i][this->myRank] = 0;
268  // Increment in the dim where z-coord present
269  if (xyz[2] == i - (1 + ny))
270  transformed_procCoords[i][this->myRank] = z_stretch;
271  }
272 
273  this->transformed_machine_extent = new int[transformed_networkDim];
274 
275  // Maximum extents in shifted high dim coordinate system
276  this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
277  for (int i = 1; i < 1 + ny; ++i) {
278  this->transformed_machine_extent[i] = y_stretch;
279  }
280  for (int i = 1 + ny; i < transformed_networkDim; ++i) {
281  this->transformed_machine_extent[i] = z_stretch;
282  }
283 
284  // reduceAll the transformed coordinates of each processor.
285  gatherMachineCoordinates(this->transformed_procCoords,
286  this->transformed_networkDim, comm);
287 
288  this->printAllocation();
289  }
290  }
291  // If no coordinate transformation, gather actual coords
292  if (!is_transformed) {
293 
294  for (int i = 0; i < actual_networkDim; ++i) {
295  actual_procCoords[i] = new pcoord_t[this->numRanks];
296  memset(actual_procCoords[i], 0,
297  sizeof(pcoord_t) * this->numRanks);
298  }
299 
300  for (int i = 0; i < actual_networkDim; ++i)
301  actual_procCoords[i][this->myRank] = xyz[i];
302 
303  // reduceAll the actual coordinates of each processor
304  gatherMachineCoordinates(this->actual_procCoords,
305  this->actual_networkDim, comm);
306 
307  this->printAllocation();
308  }
309  delete [] xyz;
310  }
311 
312  // Destructor
314  if (is_transformed) {
315  is_transformed = false;
316  if (this->numRanks > 1) {
317  for (int i = 0; i < transformed_networkDim; ++i) {
318  delete [] transformed_procCoords[i];
319  }
320  }
321  delete [] transformed_machine_extent;
322  }
323  else {
324  if (this->numRanks > 1) {
325  for (int i = 0; i < actual_networkDim; ++i) {
326  delete [] actual_procCoords[i];
327  }
328  }
329  }
330 
331  delete [] actual_procCoords;
332  delete [] transformed_procCoords;
333 
334  delete [] actual_machine_extent;
335  delete [] group_count;
336  }
337 
338  bool hasMachineCoordinates() const { return true; }
339 
340  // Return dimensions of coords, transformed or actual
341  int getMachineDim() const {
342  if (is_transformed)
343  return this->transformed_networkDim;
344  else
345  return this->actual_networkDim;
346  }
347 
348  // Return the transformed maximum machine extents
349  bool getTransformedMachineExtent(int *nxyz) const {
350  if (is_transformed) {
351  for (int dim = 0; dim < transformed_networkDim; ++dim)
352  nxyz[dim] = this->transformed_machine_extent[dim];
353 
354  return true;
355  }
356  else
357  return false;
358  }
359 
360  // Return the fake "RCA" machine extents for testing
361  bool getActualMachineExtent(int *nxyz) const {
362 /*
363 #if defined (HAVE_ZOLTAN2_RCALIB)
364  mesh_coord_t mxyz;
365  rca_get_max_dimension(&mxyz);
366 
367  int dim = 0;
368  nxyz[dim++] = mxyz.mesh_x + 1; // X - group [0, ~100]
369  nxyz[dim++] = mxyz.mesh_y + 1; // Y - row within group [0, 5]
370  nxyz[dim++] = mxyz.mesh_z + 1; // Z - col within row [0, 15]
371  return true;
372 #else
373  return false;
374 #endif
375 */
376 
377  nxyz[0] = 11; // X - group
378  nxyz[1] = 6; // Y - row within group
379  nxyz[2] = 16; // Z - col within group
380 
381  // Needed for test/unit_test/Machine.cpp PASS
382 // nxyz[0] = 4;
383 // nxyz[1] = 8;
384 // nxyz[2] = 12;
385 
386  return true;
387  }
388 
389  // Return machine extents, transformed or actual
390  bool getMachineExtent(int *nxyz) const {
391  if (is_transformed)
392  this->getTransformedMachineExtent(nxyz);
393  else
394  this->getActualMachineExtent(nxyz);
395 
396  return true;
397  }
398 
399  // Return number of groups (RCA X-dim) with allocated nodes
400  part_t getNumUniqueGroups() const override{
401  return this->num_unique_groups;
402  }
403 
404  // Return number of ranks in each group (RCA X-dim) in an allocation
405  bool getGroupCount(part_t *grp_count) const override {
406 
407  if (group_count != NULL) {
408  for (int i = 0; i < num_unique_groups; ++i) {
409  grp_count[i] = this->group_count[i];
410  }
411 
412  return true;
413  }
414  else
415  return false;
416  }
417 
418  // Print allocation coords and extents on rank 0, transformed or actual
420  if (this->myRank >= 0) {
421  // Print transformed coordinates and extents
422  if (is_transformed) {
423  for (int i = 0; i < this->numRanks; ++i) {
424  std::cout << "Rank:" << i << " ";
425  for (int j = 0; j < this->transformed_networkDim; ++j) {
426  std::cout << " " << this->transformed_procCoords[j][i];
427  }
428  std::cout << std::endl;
429  }
430 
431  std::cout << std::endl << "Transformed Machine Extent: ";
432  for (int i = 0; i < this->transformed_networkDim; ++i) {
433  std::cout << " " << this->transformed_machine_extent[i];
434  }
435  std::cout << std::endl;
436  }
437  // Print actual coordinates and extents
438  else {
439  for (int i = 0; i < this->numRanks; ++i) {
440  std::cout << "Rank:" << i;
441  for (int j = 0; j < this->actual_networkDim; ++j) {
442  std::cout << " " << actual_procCoords[j][i];
443  }
444  std::cout << std::endl;
445  }
446 
447  std::cout << std::endl << "Actual Machine Extent: ";
448  for (int i = 0; i < this->actual_networkDim; ++i) {
449  std::cout << " " << this->actual_machine_extent[i];
450  }
451  std::cout << std::endl;
452  }
453  }
454  }
455 
456  // Return transformed coord for this rank
457  bool getMyTransformedMachineCoordinate(pcoord_t *xyz) {
458  if (is_transformed) {
459  for (int i = 0; i < this->transformed_networkDim; ++i) {
460  xyz[i] = transformed_procCoords[i][this->myRank];
461  }
462 
463  return true;
464  }
465  else
466  return false;
467  }
468 
469  // Return the fake "RCA" coord for this rank for testing
470  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
471 /*
472 #if defined (HAVE_ZOLTAN2_RCALIB)
473  // Cray node info for current node
474  rs_node_t nodeInfo;
475  rca_get_nodeid(&nodeInfo);
476 
477  // Current node ID
478  int NIDs = (int)nodeInfo.rs_node_s._node_id;
479 
480  mesh_coord_t node_coord;
481  int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
482  if (returnval == -1) {
483  return false;
484  }
485  xyz[0] = node_coord.mesh_x;
486  xyz[1] = node_coord.mesh_y;
487  xyz[2] = node_coord.mesh_z;
488  return true;
489 #else
490  return false;
491 #endif
492 */
493  srand(this->myRank);
494 
495  int x = rand() % 11;
496  int y = rand() % 6;
497  int z = rand() % 16;
498 
499  xyz[0] = x;
500  xyz[1] = y;
501  xyz[2] = z;
502 
503  // Needed for test/unit_test/Machine.cpp PASS
504 // xyz[0] = this->myRank;
505 // xyz[1] = this->numRanks;
506 // xyz[2] = this->numRanks + 1;
507 
508  group_count[x]++;
509 
510  return true;
511  }
512 
513  // Return machine coordinate for this rank, transformed or actual
514  bool getMyMachineCoordinate(pcoord_t *xyz) {
515  if (is_transformed)
517  else
518  this->getMyActualMachineCoordinate(xyz);
519 
520  return true;
521  }
522 
523  // Return machine coord of given rank, transformed or actual
524  inline bool getMachineCoordinate(const int rank,
525  pcoord_t *xyz) const {
526  if (is_transformed) {
527  for (int i = 0; i < this->transformed_networkDim; ++i) {
528  xyz[i] = transformed_procCoords[i][rank];
529  }
530  }
531  else {
532  for (int i = 0; i < this->actual_networkDim; ++i) {
533  xyz[i] = actual_procCoords[i][rank];
534  }
535  }
536 
537  return true;
538  }
539 
540  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
541  return false; // cannot yet return from nodename
542  }
543 
544  // Return view of all machine coords, transformed or actual
545  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
546  if (is_transformed) {
547  allCoords = transformed_procCoords;
548  }
549  else {
550  allCoords = actual_procCoords;
551  }
552 
553  return true;
554  }
555 
556  // Return (approx) hop count from rank1 to rank2. Does not account for
557  // Dragonfly's dynamic routing.
558  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
559  hops = 0;
560 
561  if (rank1 == rank2)
562  return true;
563  if (rank1 >= this->numRanks || rank2 >= this->numRanks) {
564  std::cerr << "Rank outside bounds for the machine ranks";
565  exit(1);
566  }
567 
568  if (this->is_transformed) {
569  // Case: ranks in different groups (i.e. different RCA x-coords)
570  // Does not account for location of group to group connection.
571  // (Most group to group messages will take 5 hops)
572  if (this->transformed_procCoords[0][rank1] !=
573  this->transformed_procCoords[0][rank2])
574  {
575  hops = 5;
576 
577  return true;
578  }
579 
580  // Case: ranks in same group
581  // For each 2 differences in transformed_coordinates then
582  // 1 hop
583  for (int i = 1; i < this->transformed_networkDim; ++i) {
584  if (this->transformed_procCoords[i][rank1] !=
585  this->transformed_procCoords[i][rank2])
586  ++hops;
587  }
588  hops /= 2;
589  }
590  else {
591  // Case: ranks in different groups
592  // Does not account for location of group to group connection.
593  // (Nearly all group to group messages will take 5 hops)
594  if (this->actual_procCoords[0][rank1] !=
595  this->actual_procCoords[0][rank2])
596  {
597  hops = 5;
598  return true;
599  }
600 
601  // Case: ranks in same group
602  // For each difference in actual_coordinates then
603  // 1 hop
604  for (int i = 1; i < this->actual_networkDim; ++i) {
605  if (this->actual_procCoords[i][rank1] !=
606  this->actual_procCoords[i][rank2])
607  ++hops;
608  }
609  }
610 
611  return true;
612  }
613 
614 private:
615 
616  // # of dimensions in the stored coordinates, transformed or actual
617  int transformed_networkDim;
618  int actual_networkDim;
619 
620  // Machine Coordinates
621  pcoord_t **transformed_procCoords;
622  pcoord_t **actual_procCoords;
623 
624  // Maximum extents for each dimension, transformed or actual
625  part_t *transformed_machine_extent;
626  part_t *actual_machine_extent;
627 
628  // Number of groups (RCA X-dim) with nonzero nodes allocated
629  part_t num_unique_groups;
630  // Distribution of nodes in each group (zero node groups have been trimmed)
631  part_t *group_count;
632 
633  // Are out coordinates transformed?
634  bool is_transformed;
635 
636  const Teuchos::ParameterList *pl;
637 
638 
639  // reduceAll the machine coordinates
640  void gatherMachineCoordinates(pcoord_t **&coords, int netDim,
641  const Teuchos::Comm<int> &comm) {
642  // Reduces and stores all machine coordinates.
643  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
644 
645  for (int i = 0; i < netDim; ++i) {
646  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
647  this->numRanks,
648  coords[i], tmpVect);
649  pcoord_t *tmp = tmpVect;
650  tmpVect = coords[i];
651  coords[i] = tmp;
652  }
653  delete [] tmpVect;
654  }
655 
656 };
657 
658 } // namespace Zoltan2
659 
660 #endif
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) RCA network machine description;.
A Dragonfly (e.g. Cori, Trinity, Theta) Machine Class for testing only. A more realistic machine shou...
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori &amp; Trinity) network machine description;.
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineClass Base class for representing machine coordinates, networks, etc.
SparseMatrixAdapter_t::part_t part_t
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...