10 #ifndef _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
11 #define _ZOLTAN2_MACHINE_DRAGONFLY_RCALIBTEST_HPP_
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
30 template <
typename pcoord_t,
typename part_t>
44 transformed_networkDim(3),
46 transformed_procCoords(NULL),
47 actual_procCoords(NULL),
48 transformed_machine_extent(NULL),
49 actual_machine_extent(NULL),
52 is_transformed(false),
55 actual_machine_extent =
new int[actual_networkDim];
60 group_count =
new part_t[actual_machine_extent[0]];
62 memset(group_count, 0,
sizeof(
part_t) * actual_machine_extent[0]);
65 transformed_networkDim = 1 + actual_machine_extent[1] +
66 actual_machine_extent[2];
67 transformed_machine_extent =
new int[transformed_networkDim];
70 actual_procCoords =
new pcoord_t *[actual_networkDim];
71 transformed_procCoords =
new pcoord_t *[transformed_networkDim];
73 for (
int i = 0; i < actual_networkDim; ++i) {
74 actual_procCoords[i] =
new pcoord_t[this->
numRanks];
75 memset(actual_procCoords[i], 0,
79 pcoord_t *xyz =
new pcoord_t[transformed_networkDim];
81 for (
int i = 0; i < actual_networkDim; ++i)
82 actual_procCoords[i][this->
myRank] = xyz[i];
88 memset(tmp_vec, 0,
sizeof(
part_t) * actual_machine_extent[0]);
90 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
91 actual_machine_extent[0],
96 num_unique_groups = 0;
98 for (
int i = 0; i < actual_machine_extent[0]; ++i) {
105 delete[] group_count;
106 group_count =
new part_t[num_unique_groups];
109 for (
int i = 0; i < actual_machine_extent[0]; ++i) {
110 if (tmp_vec[i] > 0) {
111 group_count[pos] = tmp_vec[i];
119 gatherMachineCoordinates(this->actual_procCoords,
120 this->actual_networkDim, comm);
140 const Teuchos::ParameterList &pl_ ):
142 transformed_networkDim(3),
143 actual_networkDim(3),
144 transformed_procCoords(NULL),
145 actual_procCoords(NULL),
146 transformed_machine_extent(NULL),
147 actual_machine_extent(NULL),
148 num_unique_groups(0),
150 is_transformed(false),
153 actual_machine_extent =
new int[actual_networkDim];
158 group_count =
new part_t[actual_machine_extent[0]];
160 memset(group_count, 0,
sizeof(
part_t) * actual_machine_extent[0]);
163 actual_procCoords =
new pcoord_t *[actual_networkDim];
164 transformed_procCoords =
new pcoord_t *[transformed_networkDim];
166 pcoord_t *xyz =
new pcoord_t[actual_networkDim];
172 memset(tmp_vec, 0,
sizeof(
part_t) * actual_machine_extent[0]);
174 Teuchos::reduceAll<int, part_t>(comm, Teuchos::REDUCE_SUM,
175 actual_machine_extent[0],
180 num_unique_groups = 0;
182 for (
int i = 0; i < actual_machine_extent[0]; ++i) {
183 if (tmp_vec[i] > 0) {
189 delete[] group_count;
190 group_count =
new part_t[num_unique_groups];
193 for (
int i = 0; i < actual_machine_extent[0]; ++i)
195 if (tmp_vec[i] > 0) {
196 group_count[pos] = tmp_vec[i];
202 const Teuchos::ParameterEntry *pe2 =
203 this->pl->getEntryPtr(
"Machine_Optimization_Level");
207 int optimization_level;
208 optimization_level = pe2->getValue<
int>(&optimization_level);
210 if (optimization_level > 0) {
211 is_transformed =
true;
214 transformed_networkDim = 1 + actual_machine_extent[1] +
215 actual_machine_extent[2];
216 transformed_machine_extent =
new int[transformed_networkDim];
218 transformed_procCoords =
new pcoord_t *[transformed_networkDim];
221 for (
int i = 0; i < transformed_networkDim; ++i) {
222 transformed_procCoords[i] =
new pcoord_t[this->
numRanks];
223 memset(transformed_procCoords[i], 0,
228 int nx = this->actual_machine_extent[0];
229 int ny = this->actual_machine_extent[1];
230 int nz = this->actual_machine_extent[2];
232 const Teuchos::ParameterEntry *pe_x =
233 this->pl->getEntryPtr(
"Machine_X_Stretch");
234 const Teuchos::ParameterEntry *pe_y =
235 this->pl->getEntryPtr(
"Machine_Y_Stretch");
236 const Teuchos::ParameterEntry *pe_z =
237 this->pl->getEntryPtr(
"Machine_Z_Stretch");
245 x_stretch = pe_x->getValue<
int>(&x_stretch);
247 y_stretch = pe_y->getValue<
int>(&y_stretch);
249 z_stretch = pe_z->getValue<
int>(&z_stretch);
252 transformed_procCoords[0][this->
myRank] =
253 x_stretch * xyz[0] * ny * nz;
256 for (
int i = 1; i < 1 + ny; ++i) {
258 transformed_procCoords[i][this->
myRank] = 0;
260 if (xyz[1] == i - 1) {
261 transformed_procCoords[i][this->
myRank] = y_stretch;
265 for (
int i = 1 + ny; i < transformed_networkDim; ++i) {
267 transformed_procCoords[i][this->
myRank] = 0;
269 if (xyz[2] == i - (1 + ny))
270 transformed_procCoords[i][this->
myRank] = z_stretch;
273 this->transformed_machine_extent =
new int[transformed_networkDim];
276 this->transformed_machine_extent[0] = x_stretch * (nx - 1) * ny * nz;
277 for (
int i = 1; i < 1 + ny; ++i) {
278 this->transformed_machine_extent[i] = y_stretch;
280 for (
int i = 1 + ny; i < transformed_networkDim; ++i) {
281 this->transformed_machine_extent[i] = z_stretch;
285 gatherMachineCoordinates(this->transformed_procCoords,
286 this->transformed_networkDim, comm);
292 if (!is_transformed) {
294 for (
int i = 0; i < actual_networkDim; ++i) {
295 actual_procCoords[i] =
new pcoord_t[this->
numRanks];
296 memset(actual_procCoords[i], 0,
300 for (
int i = 0; i < actual_networkDim; ++i)
301 actual_procCoords[i][this->
myRank] = xyz[i];
304 gatherMachineCoordinates(this->actual_procCoords,
305 this->actual_networkDim, comm);
314 if (is_transformed) {
315 is_transformed =
false;
317 for (
int i = 0; i < transformed_networkDim; ++i) {
318 delete [] transformed_procCoords[i];
321 delete [] transformed_machine_extent;
325 for (
int i = 0; i < actual_networkDim; ++i) {
326 delete [] actual_procCoords[i];
331 delete [] actual_procCoords;
332 delete [] transformed_procCoords;
334 delete [] actual_machine_extent;
335 delete [] group_count;
343 return this->transformed_networkDim;
345 return this->actual_networkDim;
350 if (is_transformed) {
351 for (
int dim = 0; dim < transformed_networkDim; ++dim)
352 nxyz[dim] = this->transformed_machine_extent[dim];
401 return this->num_unique_groups;
407 if (group_count != NULL) {
408 for (
int i = 0; i < num_unique_groups; ++i) {
409 grp_count[i] = this->group_count[i];
422 if (is_transformed) {
423 for (
int i = 0; i < this->
numRanks; ++i) {
424 std::cout <<
"Rank:" << i <<
" ";
425 for (
int j = 0; j < this->transformed_networkDim; ++j) {
426 std::cout <<
" " << this->transformed_procCoords[j][i];
428 std::cout << std::endl;
431 std::cout << std::endl <<
"Transformed Machine Extent: ";
432 for (
int i = 0; i < this->transformed_networkDim; ++i) {
433 std::cout <<
" " << this->transformed_machine_extent[i];
435 std::cout << std::endl;
439 for (
int i = 0; i < this->
numRanks; ++i) {
440 std::cout <<
"Rank:" << i;
441 for (
int j = 0; j < this->actual_networkDim; ++j) {
442 std::cout <<
" " << actual_procCoords[j][i];
444 std::cout << std::endl;
447 std::cout << std::endl <<
"Actual Machine Extent: ";
448 for (
int i = 0; i < this->actual_networkDim; ++i) {
449 std::cout <<
" " << this->actual_machine_extent[i];
451 std::cout << std::endl;
458 if (is_transformed) {
459 for (
int i = 0; i < this->transformed_networkDim; ++i) {
460 xyz[i] = transformed_procCoords[i][this->
myRank];
525 pcoord_t *xyz)
const {
526 if (is_transformed) {
527 for (
int i = 0; i < this->transformed_networkDim; ++i) {
528 xyz[i] = transformed_procCoords[i][rank];
532 for (
int i = 0; i < this->actual_networkDim; ++i) {
533 xyz[i] = actual_procCoords[i][rank];
546 if (is_transformed) {
547 allCoords = transformed_procCoords;
550 allCoords = actual_procCoords;
558 virtual bool getHopCount(
int rank1,
int rank2, pcoord_t &hops)
const override {
564 std::cerr <<
"Rank outside bounds for the machine ranks";
568 if (this->is_transformed) {
572 if (this->transformed_procCoords[0][rank1] !=
573 this->transformed_procCoords[0][rank2])
583 for (
int i = 1; i < this->transformed_networkDim; ++i) {
584 if (this->transformed_procCoords[i][rank1] !=
585 this->transformed_procCoords[i][rank2])
594 if (this->actual_procCoords[0][rank1] !=
595 this->actual_procCoords[0][rank2])
604 for (
int i = 1; i < this->actual_networkDim; ++i) {
605 if (this->actual_procCoords[i][rank1] !=
606 this->actual_procCoords[i][rank2])
617 int transformed_networkDim;
618 int actual_networkDim;
621 pcoord_t **transformed_procCoords;
622 pcoord_t **actual_procCoords;
625 part_t *transformed_machine_extent;
626 part_t *actual_machine_extent;
636 const Teuchos::ParameterList *pl;
640 void gatherMachineCoordinates(pcoord_t **&coords,
int netDim,
641 const Teuchos::Comm<int> &comm) {
643 pcoord_t *tmpVect =
new pcoord_t [this->
numRanks];
645 for (
int i = 0; i < netDim; ++i) {
646 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
649 pcoord_t *tmp = tmpVect;
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm)
Constructor: Dragonfly (e.g. Cori & Trinity) RCA network machine description;.
bool getMyActualMachineCoordinate(pcoord_t *xyz)
A Dragonfly (e.g. Cori, Trinity, Theta) Machine Class for testing only. A more realistic machine shou...
MachineDragonflyRCAForTesting(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
Constructor: Dragonfly (e.g. Cori & Trinity) network machine description;.
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
MachineClass Base class for representing machine coordinates, networks, etc.
SparseMatrixAdapter_t::part_t part_t
int getMachineDim() const
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available ...
virtual ~MachineDragonflyRCAForTesting()
bool getMyMachineCoordinate(pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
bool hasMachineCoordinates() const
bool getGroupCount(part_t *grp_count) const override
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getMyTransformedMachineCoordinate(pcoord_t *xyz)
bool getActualMachineExtent(int *nxyz) const
part_t getNumUniqueGroups() const override
getNumUniqueGroups function return the number of unique Dragonfly network groups in provided allocati...
bool getMachineExtent(int *nxyz) const
bool getTransformedMachineExtent(int *nxyz) const