Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineTorusRCAForTesting.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
2 #define _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
3 
4 #include <Teuchos_Comm.hpp>
5 #include <Teuchos_CommHelpers.hpp>
6 #include <Zoltan2_Machine.hpp>
7 
8 #include <cstdlib> /* srand, rand */
9 #include <fstream>
10 #include <string>
11 
12 namespace Zoltan2{
13 
18 template <typename pcoord_t, typename part_t>
19 class MachineTorusRCAForTesting : public Machine <pcoord_t, part_t> {
20 
21 public:
26  MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm):
27  Machine<pcoord_t,part_t>(comm),
28  networkDim(3), actual_networkDim(3),
29  procCoords(NULL), actual_procCoords(NULL),
30  machine_extent(NULL),actual_machine_extent(NULL),
31  is_transformed(false), pl(NULL)
32  {
33  actual_machine_extent = machine_extent = new int[networkDim];
34  this->getRealMachineExtent(this->machine_extent);
35  actual_machine_extent = machine_extent;
36 
37  // Allocate memory for processor coordinates.
38  actual_procCoords = procCoords = new pcoord_t *[networkDim];
39  for (int i = 0; i < networkDim; ++i) {
40  procCoords[i] = new pcoord_t[this->numRanks];
41  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
42  }
43 
44  // Obtain the coordinate of the processor.
45  pcoord_t *xyz = new pcoord_t[networkDim];
47  for (int i = 0; i < networkDim; i++)
48  procCoords[i][this->myRank] = xyz[i];
49  delete [] xyz;
50 
51 
52  // reduceAll the coordinates of each processor.
53  gatherMachineCoordinates(comm);
54  }
55 
56  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
57  int dim = 0;
58  int transformed_network_dim = networkDim;
59 
60  if (dim < transformed_network_dim)
61  wrap_around[dim++] = true;
62  if (dim < transformed_network_dim)
63  wrap_around[dim++] = true;
64  if (dim < transformed_network_dim)
65  wrap_around[dim++] = true;
66  return true;
67  }
68 
69  MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm,
70  const Teuchos::ParameterList &pl_):
71  Machine<pcoord_t,part_t>(comm),
72  networkDim(3), actual_networkDim(3),
73  procCoords(NULL), actual_procCoords(NULL),
74  machine_extent(NULL),actual_machine_extent(NULL),
75  is_transformed(false), pl(&pl_)
76  {
77 
78  actual_machine_extent = machine_extent = new int[networkDim];
79  this->getRealMachineExtent(this->machine_extent);
80  actual_machine_extent = machine_extent;
81 
82  // Allocate memory for processor coordinates.
83  actual_procCoords = procCoords = new pcoord_t *[networkDim];
84 
85 
86  const Teuchos::ParameterEntry *pe1 =
87  this->pl->getEntryPtr("Input_RCA_Machine_Coords");
88  if (pe1) {
89  std::string input_coord_file;
90  input_coord_file = pe1->getValue<std::string>(&input_coord_file);
91  if (input_coord_file != "") {
92 
93  if (this->myRank == 0) {
94  std::vector < std::vector <pcoord_t> > proc_coords(networkDim);
95  std::fstream machine_coord_file(input_coord_file.c_str());
96 
97  part_t i = 0;
98  pcoord_t a,b, c;
99  machine_coord_file >> a >> b >> c;
100  while(!machine_coord_file.eof()) {
101  proc_coords[0].push_back(a);
102  proc_coords[1].push_back(b);
103  proc_coords[2].push_back(c);
104  ++i;
105  machine_coord_file >> a >> b >> c;
106  }
107 
108  machine_coord_file.close();
109  std::cout << "Rewriting numprocs from:"
110  << this->numRanks << " to:" << i << std::endl;
111  this->numRanks = i;
112 
113  for(int ii = 0; ii < networkDim; ++ii) {
114  procCoords[ii] = new pcoord_t[this->numRanks];
115  for (int j = 0; j < this->numRanks; ++j) {
116  procCoords[ii][j] = proc_coords[ii][j];
117  }
118  }
119  }
120  comm.broadcast(0, sizeof(int), (char *) &(this->numRanks));
121 
122  if (this->myRank != 0) {
123  for (int i = 0; i < networkDim; ++i) {
124  procCoords[i] = new pcoord_t[this->numRanks];
125  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
126  }
127  }
128  }
129  }
130  else {
131  for (int i = 0; i < networkDim; ++i) {
132  procCoords[i] = new pcoord_t[this->numRanks];
133  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
134  }
135  // Obtain the coordinate of the processor.
136  pcoord_t *xyz = new pcoord_t[networkDim];
138  for (int i = 0; i < networkDim; i++)
139  procCoords[i][this->myRank] = xyz[i];
140  delete [] xyz;
141  }
142 
143  // reduceAll the coordinates of each processor.
144  gatherMachineCoordinates(comm);
145 
146  const Teuchos::ParameterEntry *pe2 =
147  this->pl->getEntryPtr("Machine_Optimization_Level");
148 // this->printAllocation();
149  if (pe2) {
150  int optimization_level;
151  optimization_level = pe2->getValue<int>(&optimization_level);
152 
153  if (optimization_level == 1) {
154  is_transformed = true;
155  this->networkDim = 3;
156  procCoords = new pcoord_t * [networkDim];
157  for(int i = 0; i < networkDim; ++i) {
158  procCoords[i] = new pcoord_t[this->numRanks] ;
159 // this->proc_coords[permutation[i]];
160  }
161  for (int i = 0; i < this->numRanks; ++i) {
162  procCoords[0][i] = this->actual_procCoords[0][i] * 8;
163  int yordinal = this->actual_procCoords[1][i];
164  procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
165  int zordinal = this->actual_procCoords[2][i];
166  procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
167  }
168  int mx = this->machine_extent[0];
169  int my = this->machine_extent[1];
170  int mz = this->machine_extent[2];
171 
172 
173  this->machine_extent = new int[networkDim];
174  this->machine_extent[0] = mx * 8;
175  this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
176  this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
177  if(this->myRank == 0)
178  std::cout << "Transforming the coordinates" << std::endl;
179 // this->printAllocation();
180  }
181  else if(optimization_level >= 3) {
182  is_transformed = true;
183  this->networkDim = 6;
184  procCoords = new pcoord_t * [networkDim];
185  for(int i = 0; i < networkDim; ++i) {
186  procCoords[i] = new pcoord_t[this->numRanks] ;
187 // this->proc_coords[permutation[i]];
188  }
189 
190 // this->machine_extent[0] = this->actual_machine_extent
191  this->machine_extent = new int[networkDim];
192 
193  this->machine_extent[0] =
194  ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
195  this->machine_extent[3] = 2 * 8 ;
196  this->machine_extent[1] =
197  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
198  this->machine_extent[4] = 2 * 8;
199  this->machine_extent[2] =
200  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
201  this->machine_extent[5] = 8 * 5;
202 
203  for (int k = 0; k < this->numRanks ; k++) {
204  // This part is for titan.
205  // But it holds for other 3D torus machines such as Bluewaters.
206 
207  // Bandwitdh along
208  // X = 75
209  // Y = 37.5 or 75 --- everyother has 37.5
210  // --- Y[0-1] =75 but Y[1-2]=37.5
211  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
212 
213  // Along X we make groups of 2. Then scale the distance with 64.
214  // First dimension is represents x/2
215  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
216  // Then the 3rd dimension is x%2. distance is scaled with 8,
217  // reversely proportional with bw=75
218  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
219 
220  // Along Y. Every other one has the slowest link. So we want
221  // distances between Y/2 huge.
222  // We scale Y/2 with 2400 so that we make sure that it is the
223  // first one we divie.
224  procCoords[1][k] =
225  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
226  // The other one is scaled with 8 as in X.
227  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
228 
229  // We make groups of 8 along Z. Then distances between these
230  // groups are scaled with 160.
231  // So that it is more than 2x distance than the distance with X
232  // grouping.
233  // That is we scale the groups of Zs with 160. Groups of X with 64.
234  // Zs has 8 processors connecting them, while X has only one. We
235  // want to divide along Z twice before dividing along X.
236  procCoords[2][k] =
237  ((int (this->actual_procCoords[2][k])) / 8) * 160;
238  // In the second group everything is scaled with 5, as bw=120
239  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
240  }
241  }
242  else if(optimization_level == 2) {
243  // This is as above case. but we make groups of 3 along X instead.
244  is_transformed = true;
245  this->networkDim = 6;
246  procCoords = new pcoord_t * [networkDim];
247  for(int i = 0; i < networkDim; ++i) {
248  procCoords[i] = new pcoord_t[this->numRanks] ;
249 // this->proc_coords[permutation[i]];
250  }
251 
252 // this->machine_extent[0] = this->actual_machine_extent
253  this->machine_extent = new int[networkDim];
254 
255  this->machine_extent[0] =
256  ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
257  this->machine_extent[3] = 3 * 8 ;
258  this->machine_extent[1] =
259  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
260  this->machine_extent[4] = 2 * 8;
261  this->machine_extent[2] =
262  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
263  this->machine_extent[5] = 8 * 5;
264 
265 
266  for (int k = 0; k < this->numRanks ; k++) {
267  // This part is for titan.
268  // But it holds for other 3D torus machines such as Bluewaters.
269 
270  // Bandwitdh along
271  // X = 75
272  // Y = 37.5 or 75 --- everyother has 37.5
273  // --- Y[0-1] =75 but Y[1-2]=37.5
274  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
275 
276  // In this case we make groups of 3. along X.
277  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
278  // Then the 3rd dimension is x%2. distance is scaled with 8,
279  // reversely proportional with bw=75
280  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
281 
282  // Along Y. Every other one has the slowest link. So we want
283  // distances between Y/2 huge.
284  // We scale Y/2 with 2400 so that we make sure that it is the
285  // first one we divie.
286  procCoords[1][k] =
287  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
288  // The other one is scaled with 8 as in X.
289  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
290 
291 
292  procCoords[2][k] =
293  ((int (this->actual_procCoords[2][k])) / 8) * 160;
294  // In the second group everything is scaled with 5, as bw=120
295  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
296  }
297  }
298  }
299  }
300 
302  if (is_transformed) {
303  is_transformed = false;
304  for (int i = 0; i < actual_networkDim; i++) {
305  delete [] actual_procCoords[i];
306  }
307  delete [] actual_procCoords;
308  delete [] actual_machine_extent;
309  }
310  for (int i = 0; i < networkDim; i++) {
311  delete [] procCoords[i];
312  }
313  delete [] procCoords;
314  delete [] machine_extent;
315  }
316 
317  bool hasMachineCoordinates() const { return true; }
318 
319  int getMachineDim() const { return this->networkDim; }
320  int getRealMachineDim() const { return this->actual_networkDim; }
321 
322  bool getMachineExtent(int *nxyz) const {
323  if (is_transformed) {
324  return false;
325  }
326  else {
327  int dim = 0;
328  nxyz[dim++] = this->machine_extent[0]; //x
329  nxyz[dim++] = this->machine_extent[1]; //y
330  nxyz[dim++] = this->machine_extent[2]; //z
331  return true;
332  }
333  }
334 
335  bool getRealMachineExtent(int *nxyz) const {
336  int dim = 0;
337  nxyz[dim++] = 25; //x
338  nxyz[dim++] = 16; //y
339  nxyz[dim++] = 24; //z
340  return true;
341  }
342 
343 
345  if(this->myRank == 0) {
346  for (int i = 0; i < this->numRanks; ++i) {
347  std::cout << "Rank:" << i
348  << " " << procCoords[0][i]
349  << " " << procCoords[1][i]
350  << " " << procCoords[2][i] << std::endl;
351  }
352  std::cout << "Machine Extent:"
353  << " " << this->machine_extent[0]
354  << " " << this->machine_extent[1]
355  << " " << this->machine_extent[2] << std::endl;
356  }
357  }
358 
359  bool getMyMachineCoordinate(pcoord_t *xyz) {
360  for (int i = 0; i < this->networkDim; ++i) {
361  xyz[i] = procCoords[i][this->myRank];
362  }
363  return true;
364  }
365 
366  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
367  xyz[0] = rand() % 25;
368  xyz[1] = rand() % 16;
369  xyz[2] = rand() % 24;
370  return true;
371  }
372 
373  inline bool getMachineCoordinate(const int rank,
374  pcoord_t *xyz) const {
375  for (int i = 0; i < this->networkDim; ++i) {
376  xyz[i] = procCoords[i][rank];
377  }
378  return true;
379  }
380 
381 
382  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
383  return false; // cannot yet return from nodename
384  }
385 
386  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
387  allCoords = procCoords;
388  return true;
389  }
390 
391  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
392  hops = 0;
393  for (int i = 0; i < networkDim; ++i) {
394  pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
395  if (distance < 0)
396  distance = -distance;
397  if (machine_extent[i] - distance < distance)
398  distance = machine_extent[i] - distance;
399  hops += distance;
400  }
401  return true;
402  }
403 
404 
405 private:
406 
407  int networkDim;
408  int actual_networkDim;
409 
410  pcoord_t **procCoords;
411  pcoord_t **actual_procCoords;
412 
413  part_t *machine_extent;
414  part_t *actual_machine_extent;
415  bool is_transformed;
416 
417 
418  const Teuchos::ParameterList *pl;
419 
420 /*
421  bool delete_transformed_coords;
422  int transformed_network_dim;
423  pcoord_t **transformed_coordinates;
424 */
425 
426  void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
427  // reduces and stores all machine coordinates.
428  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
429 
430  for (int i = 0; i < networkDim; i++) {
431  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
432  this->numRanks,
433  procCoords[i], tmpVect);
434  pcoord_t *tmp = tmpVect;
435  tmpVect = procCoords[i];
436  procCoords[i] = tmp;
437  }
438  delete [] tmpVect;
439  }
440 
441 };
442 
443 } // namespace Zoltan2
444 #endif
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
MachineClass Base class for representing machine coordinates, networks, etc.
SparseMatrixAdapter_t::part_t part_t
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
An RCA Machine Class (Torus Networks) for testing only A more realistic machine should be used for ta...
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.