Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineTorusRCA.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
11 #define _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 #include <Zoltan2_Machine.hpp>
16 
17 #ifdef HAVE_ZOLTAN2_RCALIB
18 extern "C"{
19 #include <rca_lib.h>
20 }
21 #endif
22 
23 
24 namespace Zoltan2{
25 
29 template <typename pcoord_t, typename part_t>
30 class MachineTorusRCA : public Machine <pcoord_t, part_t> {
31 
32 public:
37  MachineTorusRCA(const Teuchos::Comm<int> &comm):
38  Machine<pcoord_t,part_t>(comm),
39  networkDim(3),
40  actual_networkDim(3),
41  procCoords(NULL),
42  actual_procCoords(NULL),
43  machine_extent(NULL),
44  actual_machine_extent(NULL),
45  is_transformed(false),
46  pl(NULL) {
47 
48  actual_machine_extent = machine_extent = new int[networkDim];
49  this->getRealMachineExtent(this->machine_extent);
50  actual_machine_extent = machine_extent;
51 
52  //allocate memory for processor coordinates.
53  actual_procCoords = procCoords = new pcoord_t *[networkDim];
54  for (int i = 0; i < networkDim; ++i) {
55  procCoords[i] = new pcoord_t[this->numRanks];
56  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
57  }
58 
59  //obtain the coordinate of the processor.
60  pcoord_t *xyz = new pcoord_t[networkDim];
62  for (int i = 0; i < networkDim; i++)
63  procCoords[i][this->myRank] = xyz[i];
64  delete [] xyz;
65 
66  //reduceAll the coordinates of each processor.
67  gatherMachineCoordinates(comm);
68  }
69 
70  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
71  int dim = 0;
72  int transformed_network_dim = networkDim;
73  if (dim < transformed_network_dim)
74  wrap_around[dim++] = true;
75  if (dim < transformed_network_dim)
76  wrap_around[dim++] = true;
77  if (dim < transformed_network_dim)
78  wrap_around[dim++] = true;
79  return true;
80  }
81 
82  MachineTorusRCA(const Teuchos::Comm<int> &comm,
83  const Teuchos::ParameterList &pl_):
84  Machine<pcoord_t,part_t>(comm),
85  networkDim(3),
86  actual_networkDim(3),
87  procCoords(NULL),
88  actual_procCoords(NULL),
89  machine_extent(NULL),
90  actual_machine_extent(NULL),
91  is_transformed(false),
92  pl(&pl_) {
93 
94  actual_machine_extent = machine_extent = new int[networkDim];
95  this->getRealMachineExtent(this->machine_extent);
96  actual_machine_extent = machine_extent;
97 
98  //allocate memory for processor coordinates.
99  actual_procCoords = procCoords = new pcoord_t *[networkDim];
100  for (int i = 0; i < networkDim; ++i) {
101  procCoords[i] = new pcoord_t[this->numRanks];
102  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
103  }
104  //obtain the coordinate of the processor.
105  pcoord_t *xyz = new pcoord_t[networkDim];
107  for (int i = 0; i < networkDim; i++)
108  procCoords[i][this->myRank] = xyz[i];
109  delete [] xyz;
110 
111 
112  //reduceAll the coordinates of each processor.
113  gatherMachineCoordinates(comm);
114 
115  const Teuchos::ParameterEntry *pe2 =
116  this->pl->getEntryPtr("Machine_Optimization_Level");
117 // this->printAllocation();
118 
119  if (pe2) {
120  int optimization_level;
121  optimization_level = pe2->getValue<int>(&optimization_level);
122 
123  if (optimization_level == 1) {
124  is_transformed = true;
125  this->networkDim = 3;
126  procCoords = new pcoord_t * [networkDim];
127  for(int i = 0; i < networkDim; ++i) {
128  procCoords[i] = new pcoord_t[this->numRanks] ;
129  //this->proc_coords[permutation[i]];
130  }
131  for (int i = 0; i < this->numRanks; ++i) {
132  procCoords[0][i] = this->actual_procCoords[0][i] * 8;
133  int yordinal = this->actual_procCoords[1][i];
134  procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
135  int zordinal = this->actual_procCoords[2][i];
136  procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
137  }
138  int mx = this->machine_extent[0];
139  int my = this->machine_extent[1];
140  int mz = this->machine_extent[2];
141 
142 
143  this->machine_extent = new int[networkDim];
144  this->machine_extent[0] = mx * 8;
145  this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
146  this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
147  if(this->myRank == 0)
148  std::cout << "Transforming the coordinates" << std::endl;
149 // this->printAllocation();
150  }
151  else if(optimization_level >= 3) {
152  is_transformed = true;
153  this->networkDim = 6;
154  procCoords = new pcoord_t * [networkDim];
155  for(int i = 0; i < networkDim; ++i) {
156  procCoords[i] = new pcoord_t[this->numRanks] ;
157 // this->proc_coords[permutation[i]];
158  }
159 
160 // this->machine_extent[0] = this->actual_machine_extent
161  this->machine_extent = new int[networkDim];
162 
163  this->machine_extent[0] =
164  ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
165  this->machine_extent[3] = 2 * 8 ;
166  this->machine_extent[1] =
167  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
168  this->machine_extent[4] = 2 * 8;
169  this->machine_extent[2] =
170  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
171  this->machine_extent[5] = 8 * 5;
172 
173  for (int k = 0; k < this->numRanks ; k++) {
174  // This part is for titan.
175  // But it holds for other 3D torus machines such as Bluewaters.
176 
177  // Bandwitdh along
178  // X = 75
179  // Y = 37.5 or 75 --- everyother has 37.5
180  // --- Y[0-1] =75 but Y[1-2]=37.5
181  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
182 
183  // Along X we make groups of 2. Then scale the distance with 64.
184  // First dimension is represents x/2
185  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
186  // Then the 3rd dimension is x%2. distance is scaled with 8,
187  // reversely proportional with bw=75
188  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
189 
190  // Along Y. Every other one has the slowest link. So we want
191  // distances between Y/2 huge.
192  // We scale Y/2 with 2400 so that we make sure that it is the
193  // first one we divie.
194  procCoords[1][k] =
195  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
196  // The other one is scaled with 8 as in X.
197  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
198 
199  // We make groups of 8 along Z. Then distances between these
200  // groups are scaled with 160.
201  // So that it is more than 2x distance than the distance with
202  // X grouping.
203  // That is we scale the groups of Zs with 160. Groups of X with 64.
204  // Zs has 8 processors connecting them, while X has only one. We
205  // want to divide along
206  // Z twice before dividing along X.
207  procCoords[2][k] =
208  ((int (this->actual_procCoords[2][k])) / 8) * 160;
209  // In the second group everything is scaled with 5, as bw=120
210  procCoords[5][k] =
211  ((int (this->actual_procCoords[2][k])) % 8) * 5;
212  }
213  }
214  else if(optimization_level == 2) {
215  // This is as above case. but we make groups of 3 along X instead.
216  is_transformed = true;
217  this->networkDim = 6;
218  procCoords = new pcoord_t * [networkDim];
219  for(int i = 0; i < networkDim; ++i) {
220  procCoords[i] = new pcoord_t[this->numRanks] ;
221 // this->proc_coords[permutation[i]];
222  }
223 
224 // this->machine_extent[0] = this->actual_machine_extent
225  this->machine_extent = new int[networkDim];
226 
227  this->machine_extent[0] =
228  ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
229  this->machine_extent[3] = 3 * 8 ;
230  this->machine_extent[1] =
231  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
232  this->machine_extent[4] = 2 * 8;
233  this->machine_extent[2] =
234  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
235  this->machine_extent[5] = 8 * 5;
236 
237 
238  for (int k = 0; k < this->numRanks ; k++) {
239  // This part is for titan.
240  // But it holds for other 3D torus machines such as Bluewaters.
241 
242  // Bandwitdh along
243  // X = 75
244  // Y = 37.5 or 75 --- everyother has 37.5
245  // --- Y[0-1] =75 but Y[1-2]=37.5
246  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
247 
248  // In this case we make groups of 3. along X.
249  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
250  // Then the 3rd dimension is x%2. distance is scaled with 8,
251  // reversely proportional with bw=75
252  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
253 
254  // Along Y. Every other one has the slowest link. So we want
255  // distances between Y/2 huge.
256  // We scale Y/2 with 2400 so that we make sure that it is the
257  // first one we divie.
258  procCoords[1][k] =
259  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
260  // The other one is scaled with 8 as in X.
261  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
262 
263 
264  procCoords[2][k] =
265  ((int (this->actual_procCoords[2][k])) / 8) * 160;
266  // In the second group everything is scaled with 5, as bw=120
267  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
268  }
269  }
270  }
271  }
272 
273 
274 
275 
276  virtual ~MachineTorusRCA() {
277  if (is_transformed) {
278  is_transformed = false;
279  for (int i = 0; i < actual_networkDim; i++) {
280  delete [] actual_procCoords[i];
281  }
282  delete [] actual_procCoords;
283  delete [] actual_machine_extent;
284  }
285  for (int i = 0; i < networkDim; i++) {
286  delete [] procCoords[i];
287  }
288  delete [] procCoords;
289  delete [] machine_extent;
290  }
291 
292  bool hasMachineCoordinates() const { return true; }
293 
294  int getMachineDim() const { return this->networkDim; }
295  int getRealMachineDim() const { return this->actual_networkDim; }
296 
297  bool getMachineExtent(int *nxyz) const {
298  if (is_transformed) {
299  return false;
300  }
301  else {
302  int dim = 0;
303  nxyz[dim++] = this->machine_extent[0]; // X
304  nxyz[dim++] = this->machine_extent[1]; // Y
305  nxyz[dim++] = this->machine_extent[2]; // Z
306  return true;
307  }
308  }
309 
310  bool getRealMachineExtent(int *nxyz) const {
311 #if defined (HAVE_ZOLTAN2_RCALIB)
312  mesh_coord_t mxyz;
313  rca_get_max_dimension(&mxyz);
314  int dim = 0;
315  nxyz[dim++] = mxyz.mesh_x + 1; // X
316  nxyz[dim++] = mxyz.mesh_y + 1; // Y
317  nxyz[dim++] = mxyz.mesh_z + 1; // Z
318  return true;
319 #else
320  return false;
321 #endif
322  }
323 
324 
326  if(this->myRank == 0) {
327  for (int i = 0; i < this->numRanks; ++i) {
328  std::cout << "Rank:" << i
329  << " " << procCoords[0][i]
330  << " " << procCoords[1][i]
331  << " " << procCoords[2][i] << std::endl;
332  }
333  std::cout << "Machine Extent:"
334  << " " << this->machine_extent[0]
335  << " " << this->machine_extent[1]
336  << " " << this->machine_extent[2] << std::endl;
337  }
338  }
339 
340  bool getMyMachineCoordinate(pcoord_t *xyz) {
341  for (int i = 0; i < this->networkDim; ++i) {
342  xyz[i] = procCoords[i][this->myRank];
343  }
344  return true;
345  }
346 
347  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
348 #if defined (HAVE_ZOLTAN2_RCALIB)
349  rs_node_t nodeInfo; /* Cray node info for node running this function */
350  rca_get_nodeid(&nodeInfo);
351  int NIDs = (int)nodeInfo.rs_node_s._node_id; /* its node ID */
352 
353  mesh_coord_t node_coord;
354  int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
355  if (returnval == -1) {
356  return false;
357  }
358  xyz[0] = node_coord.mesh_x;
359  xyz[1] = node_coord.mesh_y;
360  xyz[2] = node_coord.mesh_z;
361  return true;
362 #else
363  return false;
364 #endif
365  }
366 
367  inline bool getMachineCoordinate(const int rank,
368  pcoord_t *xyz) const {
369  for (int i = 0; i < this->networkDim; ++i) {
370  xyz[i] = procCoords[i][rank];
371  }
372  return true;
373  }
374 
375 
376  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
377  return false; // cannot yet return from nodename
378  }
379 
380  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
381  allCoords = procCoords;
382  return true;
383  }
384 
385  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
386  hops = 0;
387  for (int i = 0; i < networkDim; ++i) {
388  pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
389  if (distance < 0)
390  distance = -distance;
391  if (machine_extent[i] - distance < distance)
392  distance = machine_extent[i] - distance;
393  hops += distance;
394  }
395  return true;
396  }
397 
398 
399 private:
400 
401  int networkDim;
402  int actual_networkDim;
403 
404  pcoord_t **procCoords;
405  pcoord_t **actual_procCoords;
406 
407  part_t *machine_extent;
408  part_t *actual_machine_extent;
409  bool is_transformed;
410 
411 
412  const Teuchos::ParameterList *pl;
413 
414 /*
415  bool delete_transformed_coords;
416  int transformed_network_dim;
417  pcoord_t **transformed_coordinates;
418 */
419 
420  void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
421  // reduces and stores all machine coordinates.
422  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
423 
424  for (int i = 0; i < networkDim; i++) {
425  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
426  this->numRanks,
427  procCoords[i], tmpVect);
428  pcoord_t *tmp = tmpVect;
429  tmpVect = procCoords[i];
430  procCoords[i] = tmp;
431  }
432  delete [] tmpVect;
433  }
434 
435 };
436 
437 } // namespace Zoltan2
438 #endif
bool getRealMachineExtent(int *nxyz) const
MachineTorusRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
MachineClass Base class for representing machine coordinates, networks, etc.
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineTorusRCA(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
bool getMachineExtent(int *nxyz) const
SparseMatrixAdapter_t::part_t part_t
bool getMyMachineCoordinate(pcoord_t *xyz)
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
An RCA Machine class on Torus Networks.
bool getMyActualMachineCoordinate(pcoord_t *xyz)