Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_MachineRCA.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_MACHINE_RCALIB_HPP_
2 #define _ZOLTAN2_MACHINE_RCALIB_HPP_
3 
4 #include <Teuchos_Comm.hpp>
5 #include <Teuchos_CommHelpers.hpp>
6 #include <Zoltan2_Machine.hpp>
7 
8 #ifdef HAVE_ZOLTAN2_RCALIB
9 extern "C"{
10 #include <rca_lib.h>
11 }
12 #endif
13 
14 
15 namespace Zoltan2{
16 
21 template <typename pcoord_t, typename part_t>
22 class MachineRCA : public Machine <pcoord_t, part_t> {
23 
24 public:
29  MachineRCA(const Teuchos::Comm<int> &comm):
30  Machine<pcoord_t,part_t>(comm),
31  networkDim(3), actual_networkDim(3),
32  procCoords(NULL), actual_procCoords(NULL),
33  machine_extent(NULL),actual_machine_extent(NULL),
34  is_transformed(false), pl(NULL)
35  {
36  actual_machine_extent = machine_extent = new int[networkDim];
37  this->getRealMachineExtent(this->machine_extent);
38  actual_machine_extent = machine_extent;
39 
40  //allocate memory for processor coordinates.
41  actual_procCoords = procCoords = new pcoord_t *[networkDim];
42  for (int i = 0; i < networkDim; ++i){
43  procCoords[i] = new pcoord_t[this->numRanks];
44  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
45  }
46 
47  //obtain the coordinate of the processor.
48  pcoord_t *xyz = new pcoord_t[networkDim];
50  for (int i = 0; i < networkDim; i++)
51  procCoords[i][this->myRank] = xyz[i];
52  delete [] xyz;
53 
54 
55  //reduceAll the coordinates of each processor.
56  gatherMachineCoordinates(comm);
57  }
58 
59  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
60  int dim = 0;
61  int transformed_network_dim = networkDim;
62  if (dim < transformed_network_dim)
63  wrap_around[dim++] = true;
64  if (dim < transformed_network_dim)
65  wrap_around[dim++] = true;
66  if (dim < transformed_network_dim)
67  wrap_around[dim++] = true;
68  return true;
69  }
70 
71  MachineRCA(const Teuchos::Comm<int> &comm, const Teuchos::ParameterList &pl_ ):
72  Machine<pcoord_t,part_t>(comm),
73  networkDim(3), actual_networkDim(3),
74  procCoords(NULL), actual_procCoords(NULL),
75  machine_extent(NULL),actual_machine_extent(NULL),
76  is_transformed(false), pl(&pl_)
77  {
78 
79  actual_machine_extent = machine_extent = new int[networkDim];
80  this->getRealMachineExtent(this->machine_extent);
81  actual_machine_extent = machine_extent;
82 
83  //allocate memory for processor coordinates.
84  actual_procCoords = procCoords = new pcoord_t *[networkDim];
85  for (int i = 0; i < networkDim; ++i){
86  procCoords[i] = new pcoord_t[this->numRanks];
87  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
88  }
89  //obtain the coordinate of the processor.
90  pcoord_t *xyz = new pcoord_t[networkDim];
92  for (int i = 0; i < networkDim; i++)
93  procCoords[i][this->myRank] = xyz[i];
94  delete [] xyz;
95 
96 
97  //reduceAll the coordinates of each processor.
98  gatherMachineCoordinates(comm);
99 
100  const Teuchos::ParameterEntry *pe2 = this->pl->getEntryPtr("Machine_Optimization_Level");
101  //this->printAllocation();
102  if (pe2){
103  int optimization_level;
104  optimization_level = pe2->getValue<int>(&optimization_level);
105 
106  if (optimization_level == 1){
107  is_transformed = true;
108  this->networkDim = 3;
109  procCoords = new pcoord_t * [networkDim];
110  for(int i = 0; i < networkDim; ++i){
111  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
112  }
113  for (int i = 0; i < this->numRanks; ++i){
114  procCoords[0][i] = this->actual_procCoords[0][i] * 8;
115  int yordinal = this->actual_procCoords[1][i];
116  procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
117  int zordinal = this->actual_procCoords[2][i];
118  procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
119  }
120  int mx = this->machine_extent[0];
121  int my = this->machine_extent[1];
122  int mz = this->machine_extent[2];
123 
124 
125  this->machine_extent = new int[networkDim];
126  this->machine_extent[0] = mx * 8;
127  this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
128  this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
129  if(this->myRank == 0) std::cout << "Transforming the coordinates" << std::endl;
130  //this->printAllocation();
131  }
132  else if(optimization_level >= 3){
133  is_transformed = true;
134  this->networkDim = 6;
135  procCoords = new pcoord_t * [networkDim];
136  for(int i = 0; i < networkDim; ++i){
137  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
138  }
139 
140  //this->machine_extent[0] = this->actual_machine_extent
141  this->machine_extent = new int[networkDim];
142 
143  this->machine_extent[0] = ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
144  this->machine_extent[3] = 2 * 8 ;
145  this->machine_extent[1] = ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
146  this->machine_extent[4] = 2 * 8;
147  this->machine_extent[2] = ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
148  this->machine_extent[5] = 8 * 5;
149 
150  for (int k = 0; k < this->numRanks ; k++){
151  //This part is for titan.
152  //But it holds for other 3D torus machines such as Bluewaters.
153 
154  //Bandwitdh along
155  // X = 75
156  // Y = 37.5 or 75 --- everyother has 37.5 --- Y[0-1] =75 but Y[1-2]=37.5
157  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
158 
159  //Along X we make groups of 2. Then scale the distance with 64.
160  //First dimension is represents x/2
161  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
162  //Then the 3rd dimension is x%2. distance is scaled with 8, reversely proportional with bw=75
163  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
164 
165  //Along Y. Every other one has the slowest link. So we want distances between Y/2 huge.
166  //We scale Y/2 with 2400 so that we make sure that it is the first one we divie.
167  procCoords[1][k] = (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
168  //The other one is scaled with 8 as in X.
169  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
170 
171  //We make groups of 8 along Z. Then distances between these groups are scaled with 160.
172  //So that it is more than 2x distance than the distance with X grouping.
173  //That is we scale the groups of Zs with 160. Groups of X with 64.
174  //Zs has 8 processors connecting them, while X has only one. We want to divide along
175  //Z twice before dividing along X.
176  procCoords[2][k] = ((int (this->actual_procCoords[2][k])) / 8) * 160;
177  //In the second group everything is scaled with 5, as bw=120
178  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
179  }
180  }
181  else if(optimization_level == 2){
182  //This is as above case. but we make groups of 3 along X instead.
183  is_transformed = true;
184  this->networkDim = 6;
185  procCoords = new pcoord_t * [networkDim];
186  for(int i = 0; i < networkDim; ++i){
187  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
188  }
189 
190  //this->machine_extent[0] = this->actual_machine_extent
191  this->machine_extent = new int[networkDim];
192 
193  this->machine_extent[0] = ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
194  this->machine_extent[3] = 3 * 8 ;
195  this->machine_extent[1] = ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
196  this->machine_extent[4] = 2 * 8;
197  this->machine_extent[2] = ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
198  this->machine_extent[5] = 8 * 5;
199 
200 
201  for (int k = 0; k < this->numRanks ; k++){
202  //This part is for titan.
203  //But it holds for other 3D torus machines such as Bluewaters.
204 
205  //Bandwitdh along
206  // X = 75
207  // Y = 37.5 or 75 --- everyother has 37.5 --- Y[0-1] =75 but Y[1-2]=37.5
208  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
209 
210  //In this case we make groups of 3. along X.
211  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
212  //Then the 3rd dimension is x%2. distance is scaled with 8, reversely proportional with bw=75
213  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
214 
215  //Along Y. Every other one has the slowest link. So we want distances between Y/2 huge.
216  //We scale Y/2 with 2400 so that we make sure that it is the first one we divie.
217  procCoords[1][k] = (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
218  //The other one is scaled with 8 as in X.
219  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
220 
221 
222  procCoords[2][k] = ((int (this->actual_procCoords[2][k])) / 8) * 160;
223  //In the second group everything is scaled with 5, as bw=120
224  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
225  }
226  }
227  }
228  }
229 
230 
231 
232 
233  virtual ~MachineRCA() {
234  if (is_transformed){
235  is_transformed = false;
236  for (int i = 0; i < actual_networkDim; i++){
237  delete [] actual_procCoords[i];
238  }
239  delete [] actual_procCoords;
240  delete [] actual_machine_extent;
241  }
242  for (int i = 0; i < networkDim; i++){
243  delete [] procCoords[i];
244  }
245  delete [] procCoords;
246  delete [] machine_extent;
247  }
248 
249  bool hasMachineCoordinates() const { return true; }
250 
251  int getMachineDim() const { return this->networkDim;/*transformed_network_dim;*/ }
252  int getRealMachineDim() const { return this->actual_networkDim;/*transformed_network_dim;*/ }
253 
254  bool getMachineExtent(int *nxyz) const {
255  if (is_transformed){
256  return false;
257  }
258  else {
259  int dim = 0;
260  nxyz[dim++] = this->machine_extent[0]; //x
261  nxyz[dim++] = this->machine_extent[1]; //y
262  nxyz[dim++] = this->machine_extent[2]; //z
263  return true;
264  }
265  }
266 
267  bool getRealMachineExtent(int *nxyz) const {
268 #if defined (HAVE_ZOLTAN2_RCALIB)
269  mesh_coord_t mxyz;
270  rca_get_max_dimension(&mxyz);
271  int dim = 0;
272  nxyz[dim++] = mxyz.mesh_x + 1; //x
273  nxyz[dim++] = mxyz.mesh_y + 1; //y
274  nxyz[dim++] = mxyz.mesh_z + 1; //z
275  return true;
276 #else
277  return false;
278 #endif
279  }
280 
281 
283  if(this->myRank == 0){
284  for (int i = 0; i < this->numRanks; ++i){
285  std::cout << "Rank:" << i << " " << procCoords[0][i] << " " << procCoords[1][i] << " " << procCoords[2][i] << std::endl;
286  }
287  std::cout << "Machine Extent:" << " " << this->machine_extent[0] << " " << this->machine_extent[1] << " " << this->machine_extent[2] << std::endl;
288  }
289  }
290 
291  bool getMyMachineCoordinate(pcoord_t *xyz) {
292  for (int i = 0; i < this->networkDim; ++i){
293  xyz[i] = procCoords[i][this->myRank];
294  }
295  return true;
296  }
297 
298  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
299 #if defined (HAVE_ZOLTAN2_RCALIB)
300  rs_node_t nodeInfo; /* Cray node info for node running this function */
301  rca_get_nodeid(&nodeInfo);
302  int NIDs = (int)nodeInfo.rs_node_s._node_id; /* its node ID */
303 
304  mesh_coord_t node_coord;
305  int returnval = rca_get_meshcoord((uint16_t)NIDs, &node_coord);
306  if (returnval == -1){
307  return false;
308  }
309  xyz[0] = node_coord.mesh_x;
310  xyz[1] = node_coord.mesh_y;
311  xyz[2] = node_coord.mesh_z;
312  return true;
313 #else
314  return false;
315 #endif
316  }
317 
318  inline bool getMachineCoordinate(const int rank,
319  pcoord_t *xyz) const {
320  for (int i = 0; i < this->networkDim; ++i){
321  xyz[i] = procCoords[i][rank];
322  }
323  return true;
324  }
325 
326 
327  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
328  return false; // cannot yet return from nodename
329  }
330 
331  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
332  allCoords = procCoords;
333  return true;
334  }
335 
336  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops){
337  hops = 0;
338  for (int i = 0; i < networkDim; ++i){
339  pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
340  if (distance < 0 ) distance = -distance;
341  if (machine_extent[i] - distance < distance) distance = machine_extent[i] - distance;
342  hops += distance;
343  }
344  return true;
345  }
346 
347 
348 private:
349 
350  int networkDim;
351  int actual_networkDim;
352 
353  pcoord_t **procCoords;
354  pcoord_t **actual_procCoords;
355 
356  part_t *machine_extent;
357  part_t *actual_machine_extent;
358  bool is_transformed;
359 
360 
361  const Teuchos::ParameterList *pl;
362  //bool delete_tranformed_coords;
363 
364 /*
365  bool delete_transformed_coords;
366  int transformed_network_dim;
367  pcoord_t **transformed_coordinates;
368 */
369  void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
370  // reduces and stores all machine coordinates.
371  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
372 
373  for (int i = 0; i < networkDim; i++) {
374  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
375  this->numRanks, procCoords[i], tmpVect);
376  pcoord_t *tmp = tmpVect;
377  tmpVect = procCoords[i];
378  procCoords[i] = tmp;
379  }
380  delete [] tmpVect;
381  }
382 
383 };
384 }
385 #endif
bool getMachineExtent(int *nxyz) const
A Machine Class for testing only A more realistic machine should be used for task mapping...
bool getRealMachineExtent(int *nxyz) const
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops)
MachineRCA(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
MachineClass Base class for representing machine coordinates, networks, etc.
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
bool getMyActualMachineCoordinate(pcoord_t *xyz)
MachineRCA(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
SparseMatrixAdapter_t::part_t part_t
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getMyMachineCoordinate(pcoord_t *xyz)
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
bool hasMachineCoordinates() const