Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineTorusRCA.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
2 #define _ZOLTAN2_MACHINE_TORUS_RCALIB_HPP_
3 
4 #include <Teuchos_Comm.hpp>
5 #include <Teuchos_CommHelpers.hpp>
6 #include <Zoltan2_Machine.hpp>
7 
8 #ifdef HAVE_ZOLTAN2_RCALIB
9 extern "C"{
10 #include <rca_lib.h>
11 }
12 #endif
13 
14 
15 namespace Zoltan2{
16 
20 template <typename pcoord_t, typename part_t>
21 class MachineTorusRCA : public Machine <pcoord_t, part_t> {
22 
23 public:
28  MachineTorusRCA(const Teuchos::Comm<int> &comm):
29  Machine<pcoord_t,part_t>(comm),
30  networkDim(3),
31  actual_networkDim(3),
32  procCoords(NULL),
33  actual_procCoords(NULL),
34  machine_extent(NULL),
35  actual_machine_extent(NULL),
36  is_transformed(false),
37  pl(NULL) {
38 
39  actual_machine_extent = machine_extent = new int[networkDim];
40  this->getRealMachineExtent(this->machine_extent);
41  actual_machine_extent = machine_extent;
42 
43  //allocate memory for processor coordinates.
44  actual_procCoords = procCoords = new pcoord_t *[networkDim];
45  for (int i = 0; i < networkDim; ++i) {
46  procCoords[i] = new pcoord_t[this->numRanks];
47  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
48  }
49 
50  //obtain the coordinate of the processor.
51  pcoord_t *xyz = new pcoord_t[networkDim];
53  for (int i = 0; i < networkDim; i++)
54  procCoords[i][this->myRank] = xyz[i];
55  delete [] xyz;
56 
57  //reduceAll the coordinates of each processor.
58  gatherMachineCoordinates(comm);
59  }
60 
61  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
62  int dim = 0;
63  int transformed_network_dim = networkDim;
64  if (dim < transformed_network_dim)
65  wrap_around[dim++] = true;
66  if (dim < transformed_network_dim)
67  wrap_around[dim++] = true;
68  if (dim < transformed_network_dim)
69  wrap_around[dim++] = true;
70  return true;
71  }
72 
73  MachineTorusRCA(const Teuchos::Comm<int> &comm,
74  const Teuchos::ParameterList &pl_):
75  Machine<pcoord_t,part_t>(comm),
76  networkDim(3),
77  actual_networkDim(3),
78  procCoords(NULL),
79  actual_procCoords(NULL),
80  machine_extent(NULL),
81  actual_machine_extent(NULL),
82  is_transformed(false),
83  pl(&pl_) {
84 
85  actual_machine_extent = machine_extent = new int[networkDim];
86  this->getRealMachineExtent(this->machine_extent);
87  actual_machine_extent = machine_extent;
88 
89  //allocate memory for processor coordinates.
90  actual_procCoords = procCoords = new pcoord_t *[networkDim];
91  for (int i = 0; i < networkDim; ++i) {
92  procCoords[i] = new pcoord_t[this->numRanks];
93  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
94  }
95  //obtain the coordinate of the processor.
96  pcoord_t *xyz = new pcoord_t[networkDim];
98  for (int i = 0; i < networkDim; i++)
99  procCoords[i][this->myRank] = xyz[i];
100  delete [] xyz;
101 
102 
103  //reduceAll the coordinates of each processor.
104  gatherMachineCoordinates(comm);
105 
106  const Teuchos::ParameterEntry *pe2 =
107  this->pl->getEntryPtr("Machine_Optimization_Level");
108 // this->printAllocation();
109 
110  if (pe2) {
111  int optimization_level;
112  optimization_level = pe2->getValue<int>(&optimization_level);
113 
114  if (optimization_level == 1) {
115  is_transformed = true;
116  this->networkDim = 3;
117  procCoords = new pcoord_t * [networkDim];
118  for(int i = 0; i < networkDim; ++i) {
119  procCoords[i] = new pcoord_t[this->numRanks] ;
120  //this->proc_coords[permutation[i]];
121  }
122  for (int i = 0; i < this->numRanks; ++i) {
123  procCoords[0][i] = this->actual_procCoords[0][i] * 8;
124  int yordinal = this->actual_procCoords[1][i];
125  procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
126  int zordinal = this->actual_procCoords[2][i];
127  procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
128  }
129  int mx = this->machine_extent[0];
130  int my = this->machine_extent[1];
131  int mz = this->machine_extent[2];
132 
133 
134  this->machine_extent = new int[networkDim];
135  this->machine_extent[0] = mx * 8;
136  this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
137  this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
138  if(this->myRank == 0)
139  std::cout << "Transforming the coordinates" << std::endl;
140 // this->printAllocation();
141  }
142  else if(optimization_level >= 3) {
143  is_transformed = true;
144  this->networkDim = 6;
145  procCoords = new pcoord_t * [networkDim];
146  for(int i = 0; i < networkDim; ++i) {
147  procCoords[i] = new pcoord_t[this->numRanks] ;
148 // this->proc_coords[permutation[i]];
149  }
150 
151 // this->machine_extent[0] = this->actual_machine_extent
152  this->machine_extent = new int[networkDim];
153 
154  this->machine_extent[0] =
155  ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
156  this->machine_extent[3] = 2 * 8 ;
157  this->machine_extent[1] =
158  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
159  this->machine_extent[4] = 2 * 8;
160  this->machine_extent[2] =
161  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
162  this->machine_extent[5] = 8 * 5;
163 
164  for (int k = 0; k < this->numRanks ; k++) {
165  // This part is for titan.
166  // But it holds for other 3D torus machines such as Bluewaters.
167 
168  // Bandwitdh along
169  // X = 75
170  // Y = 37.5 or 75 --- everyother has 37.5
171  // --- Y[0-1] =75 but Y[1-2]=37.5
172  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
173 
174  // Along X we make groups of 2. Then scale the distance with 64.
175  // First dimension is represents x/2
176  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
177  // Then the 3rd dimension is x%2. distance is scaled with 8,
178  // reversely proportional with bw=75
179  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
180 
181  // Along Y. Every other one has the slowest link. So we want
182  // distances between Y/2 huge.
183  // We scale Y/2 with 2400 so that we make sure that it is the
184  // first one we divie.
185  procCoords[1][k] =
186  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
187  // The other one is scaled with 8 as in X.
188  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
189 
190  // We make groups of 8 along Z. Then distances between these
191  // groups are scaled with 160.
192  // So that it is more than 2x distance than the distance with
193  // X grouping.
194  // That is we scale the groups of Zs with 160. Groups of X with 64.
195  // Zs has 8 processors connecting them, while X has only one. We
196  // want to divide along
197  // Z twice before dividing along X.
198  procCoords[2][k] =
199  ((int (this->actual_procCoords[2][k])) / 8) * 160;
200  // In the second group everything is scaled with 5, as bw=120
201  procCoords[5][k] =
202  ((int (this->actual_procCoords[2][k])) % 8) * 5;
203  }
204  }
205  else if(optimization_level == 2) {
206  // This is as above case. but we make groups of 3 along X instead.
207  is_transformed = true;
208  this->networkDim = 6;
209  procCoords = new pcoord_t * [networkDim];
210  for(int i = 0; i < networkDim; ++i) {
211  procCoords[i] = new pcoord_t[this->numRanks] ;
212 // this->proc_coords[permutation[i]];
213  }
214 
215 // this->machine_extent[0] = this->actual_machine_extent
216  this->machine_extent = new int[networkDim];
217 
218  this->machine_extent[0] =
219  ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
220  this->machine_extent[3] = 3 * 8 ;
221  this->machine_extent[1] =
222  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
223  this->machine_extent[4] = 2 * 8;
224  this->machine_extent[2] =
225  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
226  this->machine_extent[5] = 8 * 5;
227 
228 
229  for (int k = 0; k < this->numRanks ; k++) {
230  // This part is for titan.
231  // But it holds for other 3D torus machines such as Bluewaters.
232 
233  // Bandwitdh along
234  // X = 75
235  // Y = 37.5 or 75 --- everyother has 37.5
236  // --- Y[0-1] =75 but Y[1-2]=37.5
237  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
238 
239  // In this case we make groups of 3. along X.
240  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
241  // Then the 3rd dimension is x%2. distance is scaled with 8,
242  // reversely proportional with bw=75
243  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
244 
245  // Along Y. Every other one has the slowest link. So we want
246  // distances between Y/2 huge.
247  // We scale Y/2 with 2400 so that we make sure that it is the
248  // first one we divie.
249  procCoords[1][k] =
250  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
251  // The other one is scaled with 8 as in X.
252  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
253 
254 
255  procCoords[2][k] =
256  ((int (this->actual_procCoords[2][k])) / 8) * 160;
257  // In the second group everything is scaled with 5, as bw=120
258  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
259  }
260  }
261  }
262  }
263 
264 
265 
266 
267  virtual ~MachineTorusRCA() {
268  if (is_transformed) {
269  is_transformed = false;
270  for (int i = 0; i < actual_networkDim; i++) {
271  delete [] actual_procCoords[i];
272  }
273  delete [] actual_procCoords;
274  delete [] actual_machine_extent;
275  }
276  for (int i = 0; i < networkDim; i++) {
277  delete [] procCoords[i];
278  }
279  delete [] procCoords;
280  delete [] machine_extent;
281  }
282 
283  bool hasMachineCoordinates() const { return true; }
284 
285  int getMachineDim() const { return this->networkDim; }
286  int getRealMachineDim() const { return this->actual_networkDim; }
287 
288  bool getMachineExtent(int *nxyz) const {
289  if (is_transformed) {
290  return false;
291  }
292  else {
293  int dim = 0;
294  nxyz[dim++] = this->machine_extent[0]; // X
295  nxyz[dim++] = this->machine_extent[1]; // Y
296  nxyz[dim++] = this->machine_extent[2]; // Z
297  return true;
298  }
299  }
300 
301  bool getRealMachineExtent(int *nxyz) const {
302 #if defined (HAVE_ZOLTAN2_RCALIB)
303  mesh_coord_t mxyz;
304  rca_get_max_dimension(&mxyz);
305  int dim = 0;
306  nxyz[dim++] = mxyz.mesh_x + 1; // X
307  nxyz[dim++] = mxyz.mesh_y + 1; // Y
308  nxyz[dim++] = mxyz.mesh_z + 1; // Z
309  return true;
310 #else
311  return false;
312 #endif
313  }
314 
315 
317  if(this->myRank == 0) {
318  for (int i = 0; i < this->numRanks; ++i) {
319  std::cout << "Rank:" << i
320  << " " << procCoords[0][i]
321  << " " << procCoords[1][i]
322  << " " << procCoords[2][i] << std::endl;
323  }
324  std::cout << "Machine Extent:"
325  << " " << this->machine_extent[0]
326  << " " << this->machine_extent[1]
327  << " " << this->machine_extent[2] << std::endl;
328  }
329  }
330 
331  bool getMyMachineCoordinate(pcoord_t *xyz) {
332  for (int i = 0; i < this->networkDim; ++i) {
333  xyz[i] = procCoords[i][this->myRank];
334  }
335  return true;
336  }
337 
338  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
339 #if defined (HAVE_ZOLTAN2_RCALIB)
340  rs_node_t nodeInfo; /* Cray node info for node running this function */
341  rca_get_nodeid(&nodeInfo);
342  int NIDs = (int)nodeInfo.rs_node_s._node_id; /* its node ID */
343 
344  mesh_coord_t node_coord;
345  int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
346  if (returnval == -1) {
347  return false;
348  }
349  xyz[0] = node_coord.mesh_x;
350  xyz[1] = node_coord.mesh_y;
351  xyz[2] = node_coord.mesh_z;
352  return true;
353 #else
354  return false;
355 #endif
356  }
357 
358  inline bool getMachineCoordinate(const int rank,
359  pcoord_t *xyz) const {
360  for (int i = 0; i < this->networkDim; ++i) {
361  xyz[i] = procCoords[i][rank];
362  }
363  return true;
364  }
365 
366 
367  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
368  return false; // cannot yet return from nodename
369  }
370 
371  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
372  allCoords = procCoords;
373  return true;
374  }
375 
376  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
377  hops = 0;
378  for (int i = 0; i < networkDim; ++i) {
379  pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
380  if (distance < 0)
381  distance = -distance;
382  if (machine_extent[i] - distance < distance)
383  distance = machine_extent[i] - distance;
384  hops += distance;
385  }
386  return true;
387  }
388 
389 
390 private:
391 
392  int networkDim;
393  int actual_networkDim;
394 
395  pcoord_t **procCoords;
396  pcoord_t **actual_procCoords;
397 
398  part_t *machine_extent;
399  part_t *actual_machine_extent;
400  bool is_transformed;
401 
402 
403  const Teuchos::ParameterList *pl;
404 
405 /*
406  bool delete_transformed_coords;
407  int transformed_network_dim;
408  pcoord_t **transformed_coordinates;
409 */
410 
411  void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
412  // reduces and stores all machine coordinates.
413  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
414 
415  for (int i = 0; i < networkDim; i++) {
416  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
417  this->numRanks,
418  procCoords[i], tmpVect);
419  pcoord_t *tmp = tmpVect;
420  tmpVect = procCoords[i];
421  procCoords[i] = tmp;
422  }
423  delete [] tmpVect;
424  }
425 
426 };
427 
428 } // namespace Zoltan2
429 #endif
bool getRealMachineExtent(int *nxyz) const
MachineTorusRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
MachineClass Base class for representing machine coordinates, networks, etc.
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineTorusRCA(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
bool getMachineExtent(int *nxyz) const
SparseMatrixAdapter_t::part_t part_t
bool getMyMachineCoordinate(pcoord_t *xyz)
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
An RCA Machine class on Torus Networks.
bool getMyActualMachineCoordinate(pcoord_t *xyz)