Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineRCAForTest.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_MACHINE_RCALIBTEST_HPP_
2 #define _ZOLTAN2_MACHINE_RCALIBTEST_HPP_
3 
4 #include <Teuchos_Comm.hpp>
5 #include <Teuchos_CommHelpers.hpp>
6 #include <Zoltan2_Machine.hpp>
7 #include <cstdlib> /* srand, rand */
8 #include <fstream>
9 #include <string>
10 
11 namespace Zoltan2{
12 
17 template <typename pcoord_t, typename part_t>
18 class MachineRCATest : public Machine <pcoord_t, part_t> {
19 
20 public:
25  MachineRCATest(const Teuchos::Comm<int> &comm):
26  Machine<pcoord_t,part_t>(comm),
27  networkDim(3), actual_networkDim(3),
28  procCoords(NULL), actual_procCoords(NULL),
29  machine_extent(NULL),actual_machine_extent(NULL),
30  is_transformed(false), pl(NULL)
31  {
32  actual_machine_extent = machine_extent = new int[networkDim];
33  this->getRealMachineExtent(this->machine_extent);
34  actual_machine_extent = machine_extent;
35 
36  //allocate memory for processor coordinates.
37  actual_procCoords = procCoords = new pcoord_t *[networkDim];
38  for (int i = 0; i < networkDim; ++i){
39  procCoords[i] = new pcoord_t[this->numRanks];
40  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
41  }
42 
43  //obtain the coordinate of the processor.
44  pcoord_t *xyz = new pcoord_t[networkDim];
46  for (int i = 0; i < networkDim; i++)
47  procCoords[i][this->myRank] = xyz[i];
48  delete [] xyz;
49 
50 
51  //reduceAll the coordinates of each processor.
52  gatherMachineCoordinates(comm);
53  }
54 
55  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
56  int dim = 0;
57  int transformed_network_dim = networkDim;
58  if (dim < transformed_network_dim)
59  wrap_around[dim++] = true;
60  if (dim < transformed_network_dim)
61  wrap_around[dim++] = true;
62  if (dim < transformed_network_dim)
63  wrap_around[dim++] = true;
64  return true;
65  }
66 
67  MachineRCATest(const Teuchos::Comm<int> &comm, const Teuchos::ParameterList &pl_ ):
68  Machine<pcoord_t,part_t>(comm),
69  networkDim(3), actual_networkDim(3),
70  procCoords(NULL), actual_procCoords(NULL),
71  machine_extent(NULL),actual_machine_extent(NULL),
72  is_transformed(false), pl(&pl_)
73  {
74 
75  actual_machine_extent = machine_extent = new int[networkDim];
76  this->getRealMachineExtent(this->machine_extent);
77  actual_machine_extent = machine_extent;
78 
79  //allocate memory for processor coordinates.
80  actual_procCoords = procCoords = new pcoord_t *[networkDim];
81 
82 
83  const Teuchos::ParameterEntry *pe1 = this->pl->getEntryPtr("Input_RCA_Machine_Coords");
84  if (pe1){
85  std::string input_coord_file;
86  input_coord_file = pe1->getValue<std::string>(&input_coord_file);
87  if (input_coord_file != ""){
88 
89  if (this->myRank == 0){
90  std::vector < std::vector <pcoord_t> > proc_coords(networkDim);
91  std::fstream machine_coord_file(input_coord_file.c_str());
92 
93  part_t i = 0;
94  pcoord_t a,b, c;
95  machine_coord_file >> a >> b >> c;
96  while(!machine_coord_file.eof()){
97  proc_coords[0].push_back(a);
98  proc_coords[1].push_back(b);
99  proc_coords[2].push_back(c);
100  ++i;
101  machine_coord_file >> a >> b >> c;
102  }
103 
104  machine_coord_file.close();
105  std::cout << "Rewriting numprocs from:" << this->numRanks << " to:" << i << std::endl;
106  this->numRanks = i;
107 
108  for(int ii = 0; ii < networkDim; ++ii){
109  procCoords[ii] = new pcoord_t[this->numRanks];
110  for (int j = 0; j < this->numRanks; ++j){
111  procCoords[ii][j] = proc_coords[ii][j];
112  }
113  }
114  }
115  comm.broadcast(0, sizeof(int), (char *) &(this->numRanks));
116 
117  if (this->myRank != 0){
118  for (int i = 0; i < networkDim; ++i){
119  procCoords[i] = new pcoord_t[this->numRanks];
120  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
121  }
122  }
123  }
124  }
125  else {
126  for (int i = 0; i < networkDim; ++i){
127  procCoords[i] = new pcoord_t[this->numRanks];
128  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
129  }
130  //obtain the coordinate of the processor.
131  pcoord_t *xyz = new pcoord_t[networkDim];
133  for (int i = 0; i < networkDim; i++)
134  procCoords[i][this->myRank] = xyz[i];
135  delete [] xyz;
136  }
137 
138  //reduceAll the coordinates of each processor.
139  gatherMachineCoordinates(comm);
140 
141  const Teuchos::ParameterEntry *pe2 = this->pl->getEntryPtr("Machine_Optimization_Level");
142  //this->printAllocation();
143  if (pe2){
144  int optimization_level;
145  optimization_level = pe2->getValue<int>(&optimization_level);
146 
147  if (optimization_level == 1){
148  is_transformed = true;
149  this->networkDim = 3;
150  procCoords = new pcoord_t * [networkDim];
151  for(int i = 0; i < networkDim; ++i){
152  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
153  }
154  for (int i = 0; i < this->numRanks; ++i){
155  procCoords[0][i] = this->actual_procCoords[0][i] * 8;
156  int yordinal = this->actual_procCoords[1][i];
157  procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
158  int zordinal = this->actual_procCoords[2][i];
159  procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
160  }
161  int mx = this->machine_extent[0];
162  int my = this->machine_extent[1];
163  int mz = this->machine_extent[2];
164 
165 
166  this->machine_extent = new int[networkDim];
167  this->machine_extent[0] = mx * 8;
168  this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
169  this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
170  if(this->myRank == 0) std::cout << "Transforming the coordinates" << std::endl;
171  //this->printAllocation();
172  }
173  else if(optimization_level >= 3){
174  is_transformed = true;
175  this->networkDim = 6;
176  procCoords = new pcoord_t * [networkDim];
177  for(int i = 0; i < networkDim; ++i){
178  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
179  }
180 
181  //this->machine_extent[0] = this->actual_machine_extent
182  this->machine_extent = new int[networkDim];
183 
184  this->machine_extent[0] = ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
185  this->machine_extent[3] = 2 * 8 ;
186  this->machine_extent[1] = ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
187  this->machine_extent[4] = 2 * 8;
188  this->machine_extent[2] = ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
189  this->machine_extent[5] = 8 * 5;
190 
191  for (int k = 0; k < this->numRanks ; k++){
192  //This part is for titan.
193  //But it holds for other 3D torus machines such as Bluewaters.
194 
195  //Bandwitdh along
196  // X = 75
197  // Y = 37.5 or 75 --- everyother has 37.5 --- Y[0-1] =75 but Y[1-2]=37.5
198  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
199 
200  //Along X we make groups of 2. Then scale the distance with 64.
201  //First dimension is represents x/2
202  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
203  //Then the 3rd dimension is x%2. distance is scaled with 8, reversely proportional with bw=75
204  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
205 
206  //Along Y. Every other one has the slowest link. So we want distances between Y/2 huge.
207  //We scale Y/2 with 2400 so that we make sure that it is the first one we divie.
208  procCoords[1][k] = (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
209  //The other one is scaled with 8 as in X.
210  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
211 
212  //We make groups of 8 along Z. Then distances between these groups are scaled with 160.
213  //So that it is more than 2x distance than the distance with X grouping.
214  //That is we scale the groups of Zs with 160. Groups of X with 64.
215  //Zs has 8 processors connecting them, while X has only one. We want to divide along
216  //Z twice before dividing along X.
217  procCoords[2][k] = ((int (this->actual_procCoords[2][k])) / 8) * 160;
218  //In the second group everything is scaled with 5, as bw=120
219  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
220  }
221  }
222  else if(optimization_level == 2){
223  //This is as above case. but we make groups of 3 along X instead.
224  is_transformed = true;
225  this->networkDim = 6;
226  procCoords = new pcoord_t * [networkDim];
227  for(int i = 0; i < networkDim; ++i){
228  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
229  }
230 
231  //this->machine_extent[0] = this->actual_machine_extent
232  this->machine_extent = new int[networkDim];
233 
234  this->machine_extent[0] = ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
235  this->machine_extent[3] = 3 * 8 ;
236  this->machine_extent[1] = ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
237  this->machine_extent[4] = 2 * 8;
238  this->machine_extent[2] = ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
239  this->machine_extent[5] = 8 * 5;
240 
241 
242  for (int k = 0; k < this->numRanks ; k++){
243  //This part is for titan.
244  //But it holds for other 3D torus machines such as Bluewaters.
245 
246  //Bandwitdh along
247  // X = 75
248  // Y = 37.5 or 75 --- everyother has 37.5 --- Y[0-1] =75 but Y[1-2]=37.5
249  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
250 
251  //In this case we make groups of 3. along X.
252  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
253  //Then the 3rd dimension is x%2. distance is scaled with 8, reversely proportional with bw=75
254  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
255 
256  //Along Y. Every other one has the slowest link. So we want distances between Y/2 huge.
257  //We scale Y/2 with 2400 so that we make sure that it is the first one we divie.
258  procCoords[1][k] = (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
259  //The other one is scaled with 8 as in X.
260  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
261 
262 
263  procCoords[2][k] = ((int (this->actual_procCoords[2][k])) / 8) * 160;
264  //In the second group everything is scaled with 5, as bw=120
265  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
266  }
267  }
268  }
269  }
270 
271 
272 
273 
274  virtual ~MachineRCATest() {
275  if (is_transformed){
276  is_transformed = false;
277  for (int i = 0; i < actual_networkDim; i++){
278  delete [] actual_procCoords[i];
279  }
280  delete [] actual_procCoords;
281  delete [] actual_machine_extent;
282  }
283  for (int i = 0; i < networkDim; i++){
284  delete [] procCoords[i];
285  }
286  delete [] procCoords;
287  delete [] machine_extent;
288  }
289 
290  bool hasMachineCoordinates() const { return true; }
291 
292  int getMachineDim() const { return this->networkDim;/*transformed_network_dim;*/ }
293  int getRealMachineDim() const { return this->actual_networkDim;/*transformed_network_dim;*/ }
294 
295  bool getMachineExtent(int *nxyz) const {
296  if (is_transformed){
297  return false;
298  }
299  else {
300  int dim = 0;
301  nxyz[dim++] = this->machine_extent[0]; //x
302  nxyz[dim++] = this->machine_extent[1]; //y
303  nxyz[dim++] = this->machine_extent[2]; //z
304  return true;
305  }
306  }
307 
308  bool getRealMachineExtent(int *nxyz) const {
309  int dim = 0;
310  nxyz[dim++] = 25; //x
311  nxyz[dim++] = 16; //y
312  nxyz[dim++] = 24; //z
313  return true;
314  }
315 
316 
318  if(this->myRank == 0){
319  for (int i = 0; i < this->numRanks; ++i){
320  std::cout << "Rank:" << i << " " << procCoords[0][i] << " " << procCoords[1][i] << " " << procCoords[2][i] << std::endl;
321  }
322  std::cout << "Machine Extent:" << " " << this->machine_extent[0] << " " << this->machine_extent[1] << " " << this->machine_extent[2] << std::endl;
323  }
324  }
325 
326  bool getMyMachineCoordinate(pcoord_t *xyz) {
327  for (int i = 0; i < this->networkDim; ++i){
328  xyz[i] = procCoords[i][this->myRank];
329  }
330  return true;
331  }
332 
333  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
334  xyz[0] = rand() % 25;
335  xyz[1] = rand() % 16;
336  xyz[2] = rand() % 24;
337  return true;
338  }
339 
340  inline bool getMachineCoordinate(const int rank,
341  pcoord_t *xyz) const {
342  for (int i = 0; i < this->networkDim; ++i){
343  xyz[i] = procCoords[i][rank];
344  }
345  return true;
346  }
347 
348 
349  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
350  return false; // cannot yet return from nodename
351  }
352 
353  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
354  allCoords = procCoords;
355  return true;
356  }
357 
358  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops){
359  hops = 0;
360  for (int i = 0; i < networkDim; ++i){
361  pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
362  if (distance < 0 ) distance = -distance;
363  if (machine_extent[i] - distance < distance) distance = machine_extent[i] - distance;
364  hops += distance;
365  }
366  return true;
367  }
368 
369 
370 private:
371 
372  int networkDim;
373  int actual_networkDim;
374 
375  pcoord_t **procCoords;
376  pcoord_t **actual_procCoords;
377 
378  part_t *machine_extent;
379  part_t *actual_machine_extent;
380  bool is_transformed;
381 
382 
383  const Teuchos::ParameterList *pl;
384  //bool delete_tranformed_coords;
385 
386 /*
387  bool delete_transformed_coords;
388  int transformed_network_dim;
389  pcoord_t **transformed_coordinates;
390 */
391  void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
392  // reduces and stores all machine coordinates.
393  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
394 
395  for (int i = 0; i < networkDim; i++) {
396  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
397  this->numRanks, procCoords[i], tmpVect);
398  pcoord_t *tmp = tmpVect;
399  tmpVect = procCoords[i];
400  procCoords[i] = tmp;
401  }
402  delete [] tmpVect;
403  }
404 
405 };
406 }
407 #endif
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
bool getMyMachineCoordinate(pcoord_t *xyz)
bool getMachineExtent(int *nxyz) const
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
A Machine Class for testing only A more realistic machine should be used for task mapping...
MachineClass Base class for representing machine coordinates, networks, etc.
bool getMyActualMachineCoordinate(pcoord_t *xyz)
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineRCATest(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
SparseMatrixAdapter_t::part_t part_t
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops)
bool getRealMachineExtent(int *nxyz) const
MachineRCATest(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)