Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
TaskMappingSimulate.cpp
Go to the documentation of this file.
1 
5 
6 #include <string>
7 
8 #include <Teuchos_RCP.hpp>
9 #include <Teuchos_Array.hpp>
10 #include <Teuchos_ParameterList.hpp>
11 #include "Teuchos_XMLParameterListHelpers.hpp"
12 
13 #include "Tpetra_MultiVector.hpp"
14 #include <Tpetra_CrsGraph.hpp>
15 #include <Tpetra_Map.hpp>
16 
19 #include <Zoltan2_TimerManager.hpp>
24 
25 /*
26 typedef int test_lno_t;
27 typedef long long test_gno_t;
28 typedef double test_scalar_t;
29 */
33 
34 typedef Tpetra::CrsGraph<test_lno_t, test_gno_t, znode_t> mytest_tcrsGraph_t;
35 typedef Tpetra::MultiVector<test_scalar_t, test_lno_t, test_gno_t, znode_t> mytest_tMVector_t;
37 typedef Tpetra::Map<>::node_type mytest_znode_t;
38 typedef Tpetra::Map<test_lno_t, test_gno_t, mytest_znode_t> mytest_map_t;
40 
45 };
46 
50 };
51 
52 RCP<mytest_tcrsGraph_t> create_tpetra_input_matrix(
53  char *input_binary_graph,
54  Teuchos::RCP<const Teuchos::Comm<int> > tcomm,
55  test_gno_t & myTasks,
56  std::vector <int> &task_communication_xadj_,
57  std::vector <int> &task_communication_adj_,
58  std::vector <double> &task_communication_adjw_){
59 
60  int rank = tcomm->getRank();
61  using namespace Teuchos;
62 
63  myTasks = 0;
64  test_lno_t myEdges = 0;
65 
66 
67  if (rank == 0){
68  FILE *f2 = fopen(input_binary_graph, "rb");
69  int num_vertices = 0;
70  int num_edges = 0;
71  fread(&num_vertices,sizeof(int),1,f2); // write 10 bytes to our buffer
72  fread(&num_edges, sizeof(int),1,f2); // write 10 bytes to our buffer
73 
74  myTasks = num_vertices;
75  myEdges = num_edges;
76  std::cout << "numParts:" << num_vertices << " ne:" << num_edges << std::endl;
77 
78  task_communication_xadj_.resize(num_vertices+1);
79  task_communication_adj_.resize(num_edges);
80  task_communication_adjw_.resize(num_edges);
81 
82  fread((void *)&(task_communication_xadj_[0]),sizeof(int),num_vertices + 1,f2); // write 10 bytes to our buffer
83  fread((void *)&(task_communication_adj_[0]),sizeof(int),num_edges ,f2); // write 10 bytes to our buffer
84  fread((void *)&(task_communication_adjw_[0]),sizeof(double),num_edges,f2); // write 10 bytes to our buffer
85  fclose(f2);
86 
87  }
88 
89 
90  tcomm->broadcast(0, sizeof(test_lno_t), (char *) &myTasks);
91  tcomm->broadcast(0, sizeof(test_lno_t), (char *) &myEdges);
92 
93  if (rank != 0){
94  task_communication_xadj_.resize(myTasks+1);
95  task_communication_adj_.resize(myEdges);
96  task_communication_adjw_.resize(myEdges);
97  }
98 
99  tcomm->broadcast(0, sizeof(test_lno_t) * (myTasks+1), (char *) &(task_communication_xadj_[0]));
100  tcomm->broadcast(0, sizeof(test_lno_t)* (myEdges), (char *) &(task_communication_adj_[0]));
101  tcomm->broadcast(0, sizeof(test_scalar_t)* (myEdges), (char *) &(task_communication_adjw_[0]));
102 
103 
104  using namespace Teuchos;
105  Teuchos::RCP<const Teuchos::Comm<int> > serial_comm = Teuchos::createSerialComm<int>();
106  RCP<const mytest_map_t> map = rcp (new mytest_map_t (myTasks, myTasks, 0, serial_comm));
107 
108  RCP<mytest_tcrsGraph_t> TpetraCrsGraph(new mytest_tcrsGraph_t (map, 0));
109 
110 
111  std::vector<test_gno_t> tmp(myEdges);
112  for (test_lno_t lclRow = 0; lclRow < myTasks; ++lclRow) {
113  const test_gno_t gblRow = map->getGlobalElement (lclRow);
114  test_lno_t begin = task_communication_xadj_[gblRow];
115  test_lno_t end = task_communication_xadj_[gblRow + 1];
116  for (test_lno_t m = begin; m < end; ++m){
117  tmp[m - begin] = task_communication_adj_[m];
118  }
119  const ArrayView< const test_gno_t > indices(&(tmp[0]), end-begin);
120  TpetraCrsGraph->insertGlobalIndices(gblRow, indices);
121  }
122  TpetraCrsGraph->fillComplete ();
123 
124 
125  return TpetraCrsGraph;
126 }
127 
128 
129 RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > create_multi_vector_adapter(
130  RCP<const mytest_map_t> map, int coord_dim,
131  test_scalar_t ** partCenters, test_gno_t myTasks){
132 
133 
134  Teuchos::Array<Teuchos::ArrayView<const test_scalar_t> > coordView(coord_dim);
135 
136  if(myTasks > 0){
137  for (int i = 0; i < coord_dim; ++i){
138  Teuchos::ArrayView<const test_scalar_t> a(partCenters[i], myTasks);
139  coordView[i] = a;
140  }
141  }
142  else {
143  for (int i = 0; i < coord_dim; ++i){
144  Teuchos::ArrayView<const test_scalar_t> a;
145  coordView[i] = a;
146  }
147  }
148  RCP<mytest_tMVector_t> coords(new mytest_tMVector_t(map, coordView.view(0, coord_dim), coord_dim));//= set multivector;
149  RCP<const mytest_tMVector_t> const_coords = rcp_const_cast<const mytest_tMVector_t>(coords);
150  RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > adapter (new Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t>(const_coords));
151  return adapter;
152 }
153 
154 
155 void test_serial_input_adapter(Teuchos::RCP<const Teuchos::Comm<int> > global_tcomm,
156  char *input_binary_graph, char *input_binary_coordinate, char *input_machine_file,
157  int machine_optimization_level, bool divide_prime_first, int rank_per_node, bool visualize_mapping, int reduce_best_mapping){
158 
159  if (input_binary_graph == NULL || input_binary_coordinate == NULL || input_machine_file == NULL){
160  throw "Binary files is mandatory";
161  }
162  //all processors have the all input in this case.
163  Teuchos::RCP<const Teuchos::Comm<int> > serial_comm = Teuchos::createSerialComm<int>();
164 
165  //for the input creation, let processor think that it is the only processor.
166 
167  Teuchos::ParameterList serial_problemParams;
168  //create mapping problem parameters
169  serial_problemParams.set("mapping_algorithm", "geometric");
170  serial_problemParams.set("distributed_input_adapter", false);
171  serial_problemParams.set("algorithm", "multijagged");
172  serial_problemParams.set("Machine_Optimization_Level", machine_optimization_level);
173  serial_problemParams.set("Input_RCA_Machine_Coords", input_machine_file);
174  serial_problemParams.set("divide_prime_first", divide_prime_first);
175  serial_problemParams.set("ranks_per_node", rank_per_node);
176  if (reduce_best_mapping)
177  serial_problemParams.set("reduce_best_mapping", true);
178  else
179  serial_problemParams.set("reduce_best_mapping", false);
180 
181  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> transformed_machine(*global_tcomm, serial_problemParams);
182  int numProcs = transformed_machine.getNumRanks();
183  //TODO MOVE THIS DOWN.
184  serial_problemParams.set("num_global_parts", numProcs);
185  RCP<Zoltan2::Environment> env (new Zoltan2::Environment(serial_problemParams, global_tcomm));
186  RCP<Zoltan2::TimerManager> timer(new Zoltan2::TimerManager(global_tcomm, &std::cout, Zoltan2::MACRO_TIMERS));
187  env->setTimer(timer);
189 
190  std::vector <double> task_communication_adjw_;
191 
192  std::vector <int> task_communication_xadj_;
193  std::vector <int> task_communication_adj_;
194 
195  test_scalar_t **partCenters = NULL;
196  test_gno_t myTasks ;
197  //create tpetra input graph
198  RCP<mytest_tcrsGraph_t> serial_tpetra_graph = create_tpetra_input_matrix(
199  input_binary_graph,
200  global_tcomm,
201  myTasks,
202  task_communication_xadj_, task_communication_adj_,
203  task_communication_adjw_);
204  RCP<const mytest_map_t> serial_map = serial_tpetra_graph->getMap();
205  global_tcomm->barrier();
206 
207  //create input adapter from tpetra graph
208  env->timerStart(Zoltan2::MACRO_TIMERS, "AdapterCreate");
209  RCP<const mytest_tcrsGraph_t> const_tpetra_graph = rcp_const_cast<const mytest_tcrsGraph_t>(serial_tpetra_graph);
210  RCP<mytest_adapter_t> ia (new mytest_adapter_t(const_tpetra_graph, 0, 1));
211 
212  int rank = global_tcomm->getRank();
213 
214  int numParts = 0; int coordDim = 0;
215 
216  if (rank == 0)
217  {
218  FILE *f2 = fopen(input_binary_coordinate, "rb");
219  fread((void *)&numParts,sizeof(int),1,f2); // write 10 bytes to our buffer
220  fread((void *)&coordDim,sizeof(int),1,f2); // write 10 bytes to our buffer
221 
222 
223  partCenters = new test_scalar_t * [coordDim];
224  for(int i = 0; i < coordDim; ++i){
225  partCenters[i] = new test_scalar_t[numParts];
226  fread((void *) partCenters[i],sizeof(double),numParts, f2); // write 10 bytes to our buffer
227  }
228  fclose(f2);
229  }
230 
231  global_tcomm->broadcast(0, sizeof(test_lno_t), (char *) &numParts);
232  global_tcomm->broadcast(0, sizeof(test_lno_t), (char *) &coordDim);
233 
234  if (numParts!= myTasks){
235  throw "myTasks is different than numParts";
236  }
237  if (rank != 0){
238  partCenters = new test_scalar_t * [coordDim];
239  for(int i = 0; i < coordDim; ++i){
240  partCenters[i] = new test_scalar_t[numParts];
241  }
242  }
243 
244  for(int i = 0; i < coordDim; ++i){
245  global_tcomm->broadcast(0, sizeof(test_scalar_t)* (numParts), (char *) partCenters[i]);
246  }
247 
248  //create multivector for coordinates and
249  RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > serial_adapter = create_multi_vector_adapter(serial_map, coordDim, partCenters, myTasks);
250  ia->setCoordinateInput(serial_adapter.getRawPtr());
251 
252  ia->setEdgeWeights(&(task_communication_adjw_[0]), 1, 0);
253 /*
254  for (int i = 0; i < task_communication_adjw_.size(); ++i){
255  std::cout << task_communication_adjw_[i] << " ";
256  }
257  std::cout << std::endl;
258  for (int i = 0; i < task_communication_adjw_.size(); ++i){
259  std::cout << task_communication_adj_[i] << " ";
260  }
261  std::cout << std::endl;
262 */
263  env->timerStop(Zoltan2::MACRO_TIMERS, "AdapterCreate");
264  global_tcomm->barrier();
266 
267 
268  //NOW, it only makes sense to map them serially. This is a case for the applications,
269  //where they already have the whole graph in all processes, and they want to do the mapping.
270  //Basically it will same time mapping algorithm, if that is the case.
271 
272  //First case from the distributed case does not really make sense and it is errornous.
273  //zoltan partitioning algorithms require distributed input. One still can do that in two phases,
274  //but needs to make sure that distributed and serial input adapters matches correctly.
275 
276  //Second case does not make sense and errornous. All elements are within the same node and they should not be
277  //assumed to be in the same part, since it will result only a single part.
278 
279  //If input adapter is not distributed, we are only interested in the third case.
280  //Each element will be its own unique part at the beginning of the mapping.
281 
282  //FOR the third case we create our own solution and set unique parts to each element.
283  //Basically each element has its global id as part number.
284  //It global ids are same as local ids here because each processors owns the whole thing.
285  Zoltan2::PartitioningSolution<mytest_adapter_t> single_phase_mapping_solution(env, global_tcomm, 0);
286  Teuchos::ArrayView< const test_gno_t> gids = serial_map->getNodeElementList();
287 
288  ArrayRCP<int> initial_part_ids(myTasks);
289  for (test_gno_t i = 0; i < myTasks; ++i){
290  initial_part_ids[i] = gids[i];
291  }
292  single_phase_mapping_solution.setParts(initial_part_ids);
293 
294 
295  env->timerStart(Zoltan2::MACRO_TIMERS, "Problem Create");
296  //create a mapping problem for the third case. We provide a solution in which all elements belong to unique part.
297  //even the input is not distributed, we still provide the global_tcomm because processors will calculate different mappings
298  //and the best one will be chosen.
300  ia.getRawPtr(), &serial_problemParams, global_tcomm, &single_phase_mapping_solution, &transformed_machine);
301 
302  env->timerStop(Zoltan2::MACRO_TIMERS, "Problem Create");
303  //solve mapping problem.
304  env->timerStart(Zoltan2::MACRO_TIMERS, "Problem Solve");
305  serial_map_problem.solve(true);
306  env->timerStop(Zoltan2::MACRO_TIMERS, "Problem Solve");
307 
308  //get the solution.
309  Zoltan2::MappingSolution<mytest_adapter_t> *msoln3 = serial_map_problem.getSolution();
310 
311  timer->printAndResetToZero();
312 
313  //typedef Zoltan2::EvaluatePartition<my_adapter_t> quality_t;
315 
316 
317  //input is not distributed in this case.
318  //metric object should be given the serial comm so that it can calculate the correct metrics without global communication.
319  RCP<quality_t> metricObject_3 = rcp(
320  new quality_t(ia.getRawPtr(),&serial_problemParams,serial_comm,msoln3, serial_map_problem.getMachine().getRawPtr()));
321 
322  if (global_tcomm->getRank() == 0){
323  std::cout << "METRICS FOR THE SERIAL CASE - ONE PHASE MAPPING - EACH ELEMENT IS ASSUMED TO BE IN UNIQUE PART AT THE BEGINNING" << std::endl;
324  metricObject_3->printMetrics(std::cout);
325  }
326  if (machine_optimization_level > 0){
327 
328  Teuchos::ParameterList serial_problemParams_2;
329  serial_problemParams_2.set("Input_RCA_Machine_Coords", input_machine_file);
330 
331  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> actual_machine(*global_tcomm, serial_problemParams_2);
332 
333  RCP<quality_t> metricObject_4 = rcp(
334  new quality_t(ia.getRawPtr(),&serial_problemParams_2,serial_comm,msoln3, &actual_machine));
335 
336  if (global_tcomm->getRank() == 0){
337  std::cout << "METRICS FOR THE SERIAL CASE - ONE PHASE MAPPING - EACH ELEMENT IS ASSUMED TO BE IN UNIQUE PART AT THE BEGINNING" << std::endl;
338  metricObject_4->printMetrics(std::cout);
339  }
340  }
341 
342  if (visualize_mapping && global_tcomm->getRank() == 0){
343 
344  Teuchos::ParameterList serial_problemParams_2;
345  serial_problemParams_2.set("Input_RCA_Machine_Coords", input_machine_file);
346  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> actual_machine(*global_tcomm, serial_problemParams_2);
347  test_scalar_t ** coords;
348  actual_machine.getAllMachineCoordinatesView(coords);
349  Zoltan2::visualize_mapping<zscalar_t, int> (0, actual_machine.getMachineDim(), actual_machine.getNumRanks(), coords,
350  int (task_communication_xadj_.size())-1, &(task_communication_xadj_[0]), &(task_communication_adj_[0]), msoln3->getPartListView());
351 
352  }
353 
354 }
355 
356 int main(int narg, char *arg[]){
357 
358  Tpetra::ScopeGuard tscope(&narg, &arg);
359  Teuchos::RCP<const Teuchos::Comm<int> > global_tcomm=Tpetra::getDefaultComm();
360 
361  char *input_binary_graph = NULL;
362  char *input_binary_coordinate = NULL;
363  char *input_machine_file = NULL;
364  int machine_optimization_level = 10;
365  bool divide_prime_first = false;
366  int rank_per_node = 1;
367  int reduce_best_mapping = 1;
368  bool visualize_mapping = false;
369  for ( int i = 1 ; i < narg ; ++i ) {
370  if ( 0 == strcasecmp( arg[i] , "BG" ) ) {
371 
372  input_binary_graph = arg[++i];
373  }
374  else if ( 0 == strcasecmp( arg[i] , "BC" ) ) {
375  input_binary_coordinate = arg[++i];
376  }
377  else if ( 0 == strcasecmp( arg[i] , "MF" ) ) {
378  //not binary.
379  input_machine_file = arg[++i];
380  }
381  else if ( 0 == strcasecmp( arg[i] , "OL" ) ) {
382  machine_optimization_level = atoi( arg[++i] );
383  }
384  else if ( 0 == strcasecmp( arg[i] , "DP" ) ) {
385  if (atoi( arg[++i] )){
386  divide_prime_first = true;
387  }
388  }
389  else if ( 0 == strcasecmp( arg[i] , "RPN" ) ) {
390  rank_per_node = atoi( arg[++i] );
391  }
392  else if ( 0 == strcasecmp( arg[i] , "VM" ) ) {
393  visualize_mapping = true;
394  }
395  else if ( 0 == strcasecmp( arg[i] , "RBM" ) ) {
396  reduce_best_mapping = atoi( arg[++i] );
397  }
398  else{
399  std::cerr << "Unrecognized command line argument #" << i << ": " << arg[i] << std::endl ;
400  return 1;
401  }
402  }
403 
404 
405  try{
406 
407  test_serial_input_adapter(global_tcomm, input_binary_graph, input_binary_coordinate, input_machine_file,
408  machine_optimization_level, divide_prime_first, rank_per_node, visualize_mapping, reduce_best_mapping);
409 
410 #if 0
411  {
412  part_t my_parts = 0, *my_result_parts;
413  //const part_t *local_element_to_rank = msoln1->getPartListView();
414 
415  std::cout << "me:" << global_tcomm->getRank() << " my_parts:" << my_parts << " myTasks:" << myTasks << std::endl;
416  if (global_tcomm->getRank() == 0) {
417 
418  //zscalar_t **dots = partCenters;
419  //int i = 0, j =0;
420  FILE *f2 = fopen("plot.gnuplot", "w");
421  for (int i = 0; i< global_tcomm->getSize(); ++i){
422  char str[20];
423  sprintf(str, "coords%d.txt", i);
424  if (i == 0){
425  fprintf(f2,"splot \"%s\"\n", str);
426  }
427  else {
428  fprintf(f2,"replot \"%s\"\n", str);
429  }
430  }
431  fprintf(f2,"pause-1\n");
432  fclose(f2);
433  }
434  char str[20];
435  int myrank = global_tcomm->getRank();
436  sprintf(str, "coords%d.txt", myrank);
437  FILE *coord_files = fopen(str, "w");
438 
439 
440  for (int j = 0; j < my_parts; ++j){
441  int findex = my_result_parts[j];
442  std::cout << "findex " << findex << std::endl;
443  fprintf(coord_files,"%lf %lf %lf\n", partCenters[0][findex], partCenters[1][findex], partCenters[2][findex]);
444  }
445  fclose(coord_files);
446  }
447 #endif
448 
449  if (global_tcomm->getRank() == 0){
450  std::cout << "PASS" << std::endl;
451  }
452  }
453  catch(std::string &s){
454  std::cerr << s << std::endl;
455  }
456 
457  catch(char * s){
458  std::cerr << s << std::endl;
459  }
460 }
461 
RCP< Zoltan2::XpetraMultiVectorAdapter< mytest_tMVector_t > > create_multi_vector_adapter(RCP< const mytest_map_t > map, zscalar_t **partCenters, zgno_t myTasks)
Time an algorithm (or other entity) as a whole.
Zoltan2::XpetraCrsGraphAdapter< mytest_tcrsGraph_t, mytest_tMVector_t > mytest_adapter_t
MappingInputDistributution
Tpetra::CrsGraph< zlno_t, zgno_t, znode_t > mytest_tcrsGraph_t
void visualize_mapping(int myRank, const int machine_coord_dim, const int num_ranks, proc_coord_t **machine_coords, const v_lno_t num_tasks, const v_lno_t *task_communication_xadj, const v_lno_t *task_communication_adj, const int *task_to_rank)
PartitionMapping maps a solution or an input distribution to ranks.
int main(int narg, char *arg[])
void solve(bool updateInputData=true)
Direct the problem to create a solution.
double zscalar_t
mytest_adapter_t::part_t mytest_part_t
Provides access for Zoltan2 to Xpetra::CrsGraph data.
int zlno_t
common code used by tests
Tpetra::Map::node_type mytest_znode_t
Tpetra::Map< zlno_t, zgno_t, mytest_znode_t > mytest_map_t
Defines the XpetraMultiVectorAdapter.
Defines XpetraCrsGraphAdapter class.
SparseMatrixAdapter_t::part_t part_t
int getMachineDim() const
returns the dimension (number of coords per node) in the machine
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
getProcDim function set the coordinates of all ranks allCoords[i][j], i=0,...,getMachineDim(), j=0,...,getNumRanks(), is the i-th dimensional coordinate for rank j. return true if coordinates are available for all ranks
zgno_t test_gno_t
void test_serial_input_adapter(int nx, int ny, int nz, Teuchos::RCP< const Teuchos::Comm< int > > global_tcomm)
A PartitioningSolution is a solution to a partitioning problem.
Defines the EvaluatePartition class.
zlno_t test_lno_t
int getNumRanks() const
return the number of ranks.
InputTraits< User >::part_t part_t
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
zscalar_t test_scalar_t
An adapter for Xpetra::MultiVector.
MachineRepresentation Class Base class for representing machine coordinates, networks, etc.
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
int zgno_t
Tpetra::MultiVector< zscalar_t, zlno_t, zgno_t, znode_t > mytest_tMVector_t
Defines the MappingSolution class.
RCP< mytest_tcrsGraph_t > create_tpetra_input_matrix(int nx, int ny, int nz, int numProcs, Teuchos::RCP< const Teuchos::Comm< int > > tcomm, RCP< Zoltan2::Environment > env, zscalar_t **&partCenters, zgno_t &myTasks)
Defines the PartitioningProblem class.
MappingProblem enables mapping of a partition (either computed or input) to MPI ranks.
A class that computes and returns quality metrics.
Declarations for TimerManager.
Defines the MappingProblem class.