Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineTorusRCAForTesting.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
11 #define _ZOLTAN2_MACHINE_TORUS_RCALIBTEST_HPP_
12 
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
15 #include <Zoltan2_Machine.hpp>
16 
17 #include <cstdlib> /* srand, rand */
18 #include <fstream>
19 #include <string>
20 
21 namespace Zoltan2{
22 
27 template <typename pcoord_t, typename part_t>
28 class MachineTorusRCAForTesting : public Machine <pcoord_t, part_t> {
29 
30 public:
35  MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm):
36  Machine<pcoord_t,part_t>(comm),
37  networkDim(3), actual_networkDim(3),
38  procCoords(NULL), actual_procCoords(NULL),
39  machine_extent(NULL),actual_machine_extent(NULL),
40  is_transformed(false), pl(NULL)
41  {
42  actual_machine_extent = machine_extent = new int[networkDim];
43  this->getRealMachineExtent(this->machine_extent);
44  actual_machine_extent = machine_extent;
45 
46  // Allocate memory for processor coordinates.
47  actual_procCoords = procCoords = new pcoord_t *[networkDim];
48  for (int i = 0; i < networkDim; ++i) {
49  procCoords[i] = new pcoord_t[this->numRanks];
50  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
51  }
52 
53  // Obtain the coordinate of the processor.
54  pcoord_t *xyz = new pcoord_t[networkDim];
56  for (int i = 0; i < networkDim; i++)
57  procCoords[i][this->myRank] = xyz[i];
58  delete [] xyz;
59 
60 
61  // reduceAll the coordinates of each processor.
62  gatherMachineCoordinates(comm);
63  }
64 
65  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
66  int dim = 0;
67  int transformed_network_dim = networkDim;
68 
69  if (dim < transformed_network_dim)
70  wrap_around[dim++] = true;
71  if (dim < transformed_network_dim)
72  wrap_around[dim++] = true;
73  if (dim < transformed_network_dim)
74  wrap_around[dim++] = true;
75  return true;
76  }
77 
78  MachineTorusRCAForTesting(const Teuchos::Comm<int> &comm,
79  const Teuchos::ParameterList &pl_):
80  Machine<pcoord_t,part_t>(comm),
81  networkDim(3), actual_networkDim(3),
82  procCoords(NULL), actual_procCoords(NULL),
83  machine_extent(NULL),actual_machine_extent(NULL),
84  is_transformed(false), pl(&pl_)
85  {
86 
87  actual_machine_extent = machine_extent = new int[networkDim];
88  this->getRealMachineExtent(this->machine_extent);
89  actual_machine_extent = machine_extent;
90 
91  // Allocate memory for processor coordinates.
92  actual_procCoords = procCoords = new pcoord_t *[networkDim];
93 
94 
95  const Teuchos::ParameterEntry *pe1 =
96  this->pl->getEntryPtr("Input_RCA_Machine_Coords");
97  if (pe1) {
98  std::string input_coord_file;
99  input_coord_file = pe1->getValue<std::string>(&input_coord_file);
100  if (input_coord_file != "") {
101 
102  if (this->myRank == 0) {
103  std::vector < std::vector <pcoord_t> > proc_coords(networkDim);
104  std::fstream machine_coord_file(input_coord_file.c_str());
105 
106  part_t i = 0;
107  pcoord_t a,b, c;
108  machine_coord_file >> a >> b >> c;
109  while(!machine_coord_file.eof()) {
110  proc_coords[0].push_back(a);
111  proc_coords[1].push_back(b);
112  proc_coords[2].push_back(c);
113  ++i;
114  machine_coord_file >> a >> b >> c;
115  }
116 
117  machine_coord_file.close();
118  std::cout << "Rewriting numprocs from:"
119  << this->numRanks << " to:" << i << std::endl;
120  this->numRanks = i;
121 
122  for(int ii = 0; ii < networkDim; ++ii) {
123  procCoords[ii] = new pcoord_t[this->numRanks];
124  for (int j = 0; j < this->numRanks; ++j) {
125  procCoords[ii][j] = proc_coords[ii][j];
126  }
127  }
128  }
129  comm.broadcast(0, sizeof(int), (char *) &(this->numRanks));
130 
131  if (this->myRank != 0) {
132  for (int i = 0; i < networkDim; ++i) {
133  procCoords[i] = new pcoord_t[this->numRanks];
134  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
135  }
136  }
137  }
138  }
139  else {
140  for (int i = 0; i < networkDim; ++i) {
141  procCoords[i] = new pcoord_t[this->numRanks];
142  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
143  }
144  // Obtain the coordinate of the processor.
145  pcoord_t *xyz = new pcoord_t[networkDim];
147  for (int i = 0; i < networkDim; i++)
148  procCoords[i][this->myRank] = xyz[i];
149  delete [] xyz;
150  }
151 
152  // reduceAll the coordinates of each processor.
153  gatherMachineCoordinates(comm);
154 
155  const Teuchos::ParameterEntry *pe2 =
156  this->pl->getEntryPtr("Machine_Optimization_Level");
157 // this->printAllocation();
158  if (pe2) {
159  int optimization_level;
160  optimization_level = pe2->getValue<int>(&optimization_level);
161 
162  if (optimization_level == 1) {
163  is_transformed = true;
164  this->networkDim = 3;
165  procCoords = new pcoord_t * [networkDim];
166  for(int i = 0; i < networkDim; ++i) {
167  procCoords[i] = new pcoord_t[this->numRanks] ;
168 // this->proc_coords[permutation[i]];
169  }
170  for (int i = 0; i < this->numRanks; ++i) {
171  procCoords[0][i] = this->actual_procCoords[0][i] * 8;
172  int yordinal = this->actual_procCoords[1][i];
173  procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
174  int zordinal = this->actual_procCoords[2][i];
175  procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
176  }
177  int mx = this->machine_extent[0];
178  int my = this->machine_extent[1];
179  int mz = this->machine_extent[2];
180 
181 
182  this->machine_extent = new int[networkDim];
183  this->machine_extent[0] = mx * 8;
184  this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
185  this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
186  if(this->myRank == 0)
187  std::cout << "Transforming the coordinates" << std::endl;
188 // this->printAllocation();
189  }
190  else if(optimization_level >= 3) {
191  is_transformed = true;
192  this->networkDim = 6;
193  procCoords = new pcoord_t * [networkDim];
194  for(int i = 0; i < networkDim; ++i) {
195  procCoords[i] = new pcoord_t[this->numRanks] ;
196 // this->proc_coords[permutation[i]];
197  }
198 
199 // this->machine_extent[0] = this->actual_machine_extent
200  this->machine_extent = new int[networkDim];
201 
202  this->machine_extent[0] =
203  ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
204  this->machine_extent[3] = 2 * 8 ;
205  this->machine_extent[1] =
206  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
207  this->machine_extent[4] = 2 * 8;
208  this->machine_extent[2] =
209  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
210  this->machine_extent[5] = 8 * 5;
211 
212  for (int k = 0; k < this->numRanks ; k++) {
213  // This part is for titan.
214  // But it holds for other 3D torus machines such as Bluewaters.
215 
216  // Bandwitdh along
217  // X = 75
218  // Y = 37.5 or 75 --- everyother has 37.5
219  // --- Y[0-1] =75 but Y[1-2]=37.5
220  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
221 
222  // Along X we make groups of 2. Then scale the distance with 64.
223  // First dimension is represents x/2
224  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
225  // Then the 3rd dimension is x%2. distance is scaled with 8,
226  // reversely proportional with bw=75
227  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
228 
229  // Along Y. Every other one has the slowest link. So we want
230  // distances between Y/2 huge.
231  // We scale Y/2 with 2400 so that we make sure that it is the
232  // first one we divie.
233  procCoords[1][k] =
234  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
235  // The other one is scaled with 8 as in X.
236  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
237 
238  // We make groups of 8 along Z. Then distances between these
239  // groups are scaled with 160.
240  // So that it is more than 2x distance than the distance with X
241  // grouping.
242  // That is we scale the groups of Zs with 160. Groups of X with 64.
243  // Zs has 8 processors connecting them, while X has only one. We
244  // want to divide along Z twice before dividing along X.
245  procCoords[2][k] =
246  ((int (this->actual_procCoords[2][k])) / 8) * 160;
247  // In the second group everything is scaled with 5, as bw=120
248  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
249  }
250  }
251  else if(optimization_level == 2) {
252  // This is as above case. but we make groups of 3 along X instead.
253  is_transformed = true;
254  this->networkDim = 6;
255  procCoords = new pcoord_t * [networkDim];
256  for(int i = 0; i < networkDim; ++i) {
257  procCoords[i] = new pcoord_t[this->numRanks] ;
258 // this->proc_coords[permutation[i]];
259  }
260 
261 // this->machine_extent[0] = this->actual_machine_extent
262  this->machine_extent = new int[networkDim];
263 
264  this->machine_extent[0] =
265  ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
266  this->machine_extent[3] = 3 * 8 ;
267  this->machine_extent[1] =
268  ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
269  this->machine_extent[4] = 2 * 8;
270  this->machine_extent[2] =
271  ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
272  this->machine_extent[5] = 8 * 5;
273 
274 
275  for (int k = 0; k < this->numRanks ; k++) {
276  // This part is for titan.
277  // But it holds for other 3D torus machines such as Bluewaters.
278 
279  // Bandwitdh along
280  // X = 75
281  // Y = 37.5 or 75 --- everyother has 37.5
282  // --- Y[0-1] =75 but Y[1-2]=37.5
283  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
284 
285  // In this case we make groups of 3. along X.
286  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
287  // Then the 3rd dimension is x%2. distance is scaled with 8,
288  // reversely proportional with bw=75
289  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
290 
291  // Along Y. Every other one has the slowest link. So we want
292  // distances between Y/2 huge.
293  // We scale Y/2 with 2400 so that we make sure that it is the
294  // first one we divie.
295  procCoords[1][k] =
296  (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
297  // The other one is scaled with 8 as in X.
298  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
299 
300 
301  procCoords[2][k] =
302  ((int (this->actual_procCoords[2][k])) / 8) * 160;
303  // In the second group everything is scaled with 5, as bw=120
304  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
305  }
306  }
307  }
308  }
309 
311  if (is_transformed) {
312  is_transformed = false;
313  for (int i = 0; i < actual_networkDim; i++) {
314  delete [] actual_procCoords[i];
315  }
316  delete [] actual_procCoords;
317  delete [] actual_machine_extent;
318  }
319  for (int i = 0; i < networkDim; i++) {
320  delete [] procCoords[i];
321  }
322  delete [] procCoords;
323  delete [] machine_extent;
324  }
325 
326  bool hasMachineCoordinates() const { return true; }
327 
328  int getMachineDim() const { return this->networkDim; }
329  int getRealMachineDim() const { return this->actual_networkDim; }
330 
331  bool getMachineExtent(int *nxyz) const {
332  if (is_transformed) {
333  return false;
334  }
335  else {
336  int dim = 0;
337  nxyz[dim++] = this->machine_extent[0]; //x
338  nxyz[dim++] = this->machine_extent[1]; //y
339  nxyz[dim++] = this->machine_extent[2]; //z
340  return true;
341  }
342  }
343 
344  bool getRealMachineExtent(int *nxyz) const {
345  int dim = 0;
346  nxyz[dim++] = 25; //x
347  nxyz[dim++] = 16; //y
348  nxyz[dim++] = 24; //z
349  return true;
350  }
351 
352 
354  if(this->myRank == 0) {
355  for (int i = 0; i < this->numRanks; ++i) {
356  std::cout << "Rank:" << i
357  << " " << procCoords[0][i]
358  << " " << procCoords[1][i]
359  << " " << procCoords[2][i] << std::endl;
360  }
361  std::cout << "Machine Extent:"
362  << " " << this->machine_extent[0]
363  << " " << this->machine_extent[1]
364  << " " << this->machine_extent[2] << std::endl;
365  }
366  }
367 
368  bool getMyMachineCoordinate(pcoord_t *xyz) {
369  for (int i = 0; i < this->networkDim; ++i) {
370  xyz[i] = procCoords[i][this->myRank];
371  }
372  return true;
373  }
374 
375  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
376  xyz[0] = rand() % 25;
377  xyz[1] = rand() % 16;
378  xyz[2] = rand() % 24;
379  return true;
380  }
381 
382  inline bool getMachineCoordinate(const int rank,
383  pcoord_t *xyz) const {
384  for (int i = 0; i < this->networkDim; ++i) {
385  xyz[i] = procCoords[i][rank];
386  }
387  return true;
388  }
389 
390 
391  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
392  return false; // cannot yet return from nodename
393  }
394 
395  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
396  allCoords = procCoords;
397  return true;
398  }
399 
400  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
401  hops = 0;
402  for (int i = 0; i < networkDim; ++i) {
403  pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
404  if (distance < 0)
405  distance = -distance;
406  if (machine_extent[i] - distance < distance)
407  distance = machine_extent[i] - distance;
408  hops += distance;
409  }
410  return true;
411  }
412 
413 
414 private:
415 
416  int networkDim;
417  int actual_networkDim;
418 
419  pcoord_t **procCoords;
420  pcoord_t **actual_procCoords;
421 
422  part_t *machine_extent;
423  part_t *actual_machine_extent;
424  bool is_transformed;
425 
426 
427  const Teuchos::ParameterList *pl;
428 
429 /*
430  bool delete_transformed_coords;
431  int transformed_network_dim;
432  pcoord_t **transformed_coordinates;
433 */
434 
435  void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
436  // reduces and stores all machine coordinates.
437  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
438 
439  for (int i = 0; i < networkDim; i++) {
440  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
441  this->numRanks,
442  procCoords[i], tmpVect);
443  pcoord_t *tmp = tmpVect;
444  tmpVect = procCoords[i];
445  procCoords[i] = tmp;
446  }
447  delete [] tmpVect;
448  }
449 
450 };
451 
452 } // namespace Zoltan2
453 #endif
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
MachineClass Base class for representing machine coordinates, networks, etc.
SparseMatrixAdapter_t::part_t part_t
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
An RCA Machine Class (Torus Networks) for testing only A more realistic machine should be used for ta...
MachineTorusRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.