Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
TaskMappingSimulate.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Zoltan2_TaskMapping.hpp"
12 #include <Zoltan2_TestHelpers.hpp>
13 
14 #include <string>
15 
16 #include <Teuchos_RCP.hpp>
17 #include <Teuchos_Array.hpp>
18 #include <Teuchos_ParameterList.hpp>
19 #include "Teuchos_XMLParameterListHelpers.hpp"
20 
21 #include "Tpetra_MultiVector.hpp"
22 #include <Tpetra_CrsGraph.hpp>
23 #include <Tpetra_Map.hpp>
24 
27 #include <Zoltan2_TimerManager.hpp>
32 
33 /*
34 typedef int test_lno_t;
35 typedef long long test_gno_t;
36 typedef double test_scalar_t;
37 */
41 
42 typedef Tpetra::CrsGraph<test_lno_t, test_gno_t, znode_t> mytest_tcrsGraph_t;
43 typedef Tpetra::MultiVector<test_scalar_t, test_lno_t, test_gno_t, znode_t> mytest_tMVector_t;
45 typedef Tpetra::Map<>::node_type mytest_znode_t;
46 typedef Tpetra::Map<test_lno_t, test_gno_t, mytest_znode_t> mytest_map_t;
48 
53 };
54 
58 };
59 
60 RCP<mytest_tcrsGraph_t> create_tpetra_input_matrix(
61  char *input_binary_graph,
62  Teuchos::RCP<const Teuchos::Comm<int> > tcomm,
63  test_gno_t & myTasks,
64  std::vector <int> &task_communication_xadj_,
65  std::vector <int> &task_communication_adj_,
66  std::vector <double> &task_communication_adjw_){
67 
68  int rank = tcomm->getRank();
69  using namespace Teuchos;
70 
71  myTasks = 0;
72  test_lno_t myEdges = 0;
73 
74 
75  if (rank == 0){
76  FILE *f2 = fopen(input_binary_graph, "rb");
77  int num_vertices = 0;
78  int num_edges = 0;
79  fread(&num_vertices,sizeof(int),1,f2); // write 10 bytes to our buffer
80  fread(&num_edges, sizeof(int),1,f2); // write 10 bytes to our buffer
81 
82  myTasks = num_vertices;
83  myEdges = num_edges;
84  std::cout << "numParts:" << num_vertices << " ne:" << num_edges << std::endl;
85 
86  task_communication_xadj_.resize(num_vertices+1);
87  task_communication_adj_.resize(num_edges);
88  task_communication_adjw_.resize(num_edges);
89 
90  fread((void *)&(task_communication_xadj_[0]),sizeof(int),num_vertices + 1,f2); // write 10 bytes to our buffer
91  fread((void *)&(task_communication_adj_[0]),sizeof(int),num_edges ,f2); // write 10 bytes to our buffer
92  fread((void *)&(task_communication_adjw_[0]),sizeof(double),num_edges,f2); // write 10 bytes to our buffer
93  fclose(f2);
94 
95  }
96 
97 
98  tcomm->broadcast(0, sizeof(test_lno_t), (char *) &myTasks);
99  tcomm->broadcast(0, sizeof(test_lno_t), (char *) &myEdges);
100 
101  if (rank != 0){
102  task_communication_xadj_.resize(myTasks+1);
103  task_communication_adj_.resize(myEdges);
104  task_communication_adjw_.resize(myEdges);
105  }
106 
107  tcomm->broadcast(0, sizeof(test_lno_t) * (myTasks+1), (char *) &(task_communication_xadj_[0]));
108  tcomm->broadcast(0, sizeof(test_lno_t)* (myEdges), (char *) &(task_communication_adj_[0]));
109  tcomm->broadcast(0, sizeof(test_scalar_t)* (myEdges), (char *) &(task_communication_adjw_[0]));
110 
111 
112  using namespace Teuchos;
113  Teuchos::RCP<const Teuchos::Comm<int> > serial_comm = Teuchos::createSerialComm<int>();
114  RCP<const mytest_map_t> map = rcp (new mytest_map_t (myTasks, myTasks, 0, serial_comm));
115 
116  RCP<mytest_tcrsGraph_t> TpetraCrsGraph(new mytest_tcrsGraph_t (map, 0));
117 
118 
119  std::vector<test_gno_t> tmp(myEdges);
120  for (test_lno_t lclRow = 0; lclRow < myTasks; ++lclRow) {
121  const test_gno_t gblRow = map->getGlobalElement (lclRow);
122  test_lno_t begin = task_communication_xadj_[gblRow];
123  test_lno_t end = task_communication_xadj_[gblRow + 1];
124  for (test_lno_t m = begin; m < end; ++m){
125  tmp[m - begin] = task_communication_adj_[m];
126  }
127  const ArrayView< const test_gno_t > indices(&(tmp[0]), end-begin);
128  TpetraCrsGraph->insertGlobalIndices(gblRow, indices);
129  }
130  TpetraCrsGraph->fillComplete ();
131 
132 
133  return TpetraCrsGraph;
134 }
135 
136 
137 RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > create_multi_vector_adapter(
138  RCP<const mytest_map_t> map, int coord_dim,
139  test_scalar_t ** partCenters, test_gno_t myTasks){
140 
141 
142  Teuchos::Array<Teuchos::ArrayView<const test_scalar_t> > coordView(coord_dim);
143 
144  if(myTasks > 0){
145  for (int i = 0; i < coord_dim; ++i){
146  Teuchos::ArrayView<const test_scalar_t> a(partCenters[i], myTasks);
147  coordView[i] = a;
148  }
149  }
150  else {
151  for (int i = 0; i < coord_dim; ++i){
152  Teuchos::ArrayView<const test_scalar_t> a;
153  coordView[i] = a;
154  }
155  }
156  RCP<mytest_tMVector_t> coords(new mytest_tMVector_t(map, coordView.view(0, coord_dim), coord_dim));//= set multivector;
157  RCP<const mytest_tMVector_t> const_coords = rcp_const_cast<const mytest_tMVector_t>(coords);
158  RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > adapter (new Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t>(const_coords));
159  return adapter;
160 }
161 
162 
163 void test_serial_input_adapter(Teuchos::RCP<const Teuchos::Comm<int> > global_tcomm,
164  char *input_binary_graph, char *input_binary_coordinate, char *input_machine_file,
165  int machine_optimization_level, bool divide_prime_first, int rank_per_node, bool visualize_mapping, int reduce_best_mapping){
166 
167  if (input_binary_graph == NULL || input_binary_coordinate == NULL || input_machine_file == NULL){
168  throw "Binary files is mandatory";
169  }
170  //all processors have the all input in this case.
171  Teuchos::RCP<const Teuchos::Comm<int> > serial_comm = Teuchos::createSerialComm<int>();
172 
173  //for the input creation, let processor think that it is the only processor.
174 
175  Teuchos::ParameterList serial_problemParams;
176  //create mapping problem parameters
177  serial_problemParams.set("mapping_algorithm", "geometric");
178  serial_problemParams.set("distributed_input_adapter", false);
179  serial_problemParams.set("algorithm", "multijagged");
180  serial_problemParams.set("Machine_Optimization_Level", machine_optimization_level);
181  serial_problemParams.set("Input_RCA_Machine_Coords", input_machine_file);
182  serial_problemParams.set("divide_prime_first", divide_prime_first);
183  serial_problemParams.set("ranks_per_node", rank_per_node);
184  if (reduce_best_mapping)
185  serial_problemParams.set("reduce_best_mapping", true);
186  else
187  serial_problemParams.set("reduce_best_mapping", false);
188 
189  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> transformed_machine(*global_tcomm, serial_problemParams);
190  int numProcs = transformed_machine.getNumRanks();
191  //TODO MOVE THIS DOWN.
192  serial_problemParams.set("num_global_parts", numProcs);
193  RCP<Zoltan2::Environment> env (new Zoltan2::Environment(serial_problemParams, global_tcomm));
194  RCP<Zoltan2::TimerManager> timer(new Zoltan2::TimerManager(global_tcomm, &std::cout, Zoltan2::MACRO_TIMERS));
195  env->setTimer(timer);
197 
198  std::vector <double> task_communication_adjw_;
199 
200  std::vector <int> task_communication_xadj_;
201  std::vector <int> task_communication_adj_;
202 
203  test_scalar_t **partCenters = NULL;
204  test_gno_t myTasks ;
205  //create tpetra input graph
206  RCP<mytest_tcrsGraph_t> serial_tpetra_graph = create_tpetra_input_matrix(
207  input_binary_graph,
208  global_tcomm,
209  myTasks,
210  task_communication_xadj_, task_communication_adj_,
211  task_communication_adjw_);
212  RCP<const mytest_map_t> serial_map = serial_tpetra_graph->getMap();
213  global_tcomm->barrier();
214 
215  //create input adapter from tpetra graph
216  env->timerStart(Zoltan2::MACRO_TIMERS, "AdapterCreate");
217  RCP<const mytest_tcrsGraph_t> const_tpetra_graph = rcp_const_cast<const mytest_tcrsGraph_t>(serial_tpetra_graph);
218  RCP<mytest_adapter_t> ia (new mytest_adapter_t(const_tpetra_graph, 0, 1));
219 
220  int rank = global_tcomm->getRank();
221 
222  int numParts = 0; int coordDim = 0;
223 
224  if (rank == 0)
225  {
226  FILE *f2 = fopen(input_binary_coordinate, "rb");
227  fread((void *)&numParts,sizeof(int),1,f2); // write 10 bytes to our buffer
228  fread((void *)&coordDim,sizeof(int),1,f2); // write 10 bytes to our buffer
229 
230 
231  partCenters = new test_scalar_t * [coordDim];
232  for(int i = 0; i < coordDim; ++i){
233  partCenters[i] = new test_scalar_t[numParts];
234  fread((void *) partCenters[i],sizeof(double),numParts, f2); // write 10 bytes to our buffer
235  }
236  fclose(f2);
237  }
238 
239  global_tcomm->broadcast(0, sizeof(test_lno_t), (char *) &numParts);
240  global_tcomm->broadcast(0, sizeof(test_lno_t), (char *) &coordDim);
241 
242  if (numParts!= myTasks){
243  throw "myTasks is different than numParts";
244  }
245  if (rank != 0){
246  partCenters = new test_scalar_t * [coordDim];
247  for(int i = 0; i < coordDim; ++i){
248  partCenters[i] = new test_scalar_t[numParts];
249  }
250  }
251 
252  for(int i = 0; i < coordDim; ++i){
253  global_tcomm->broadcast(0, sizeof(test_scalar_t)* (numParts), (char *) partCenters[i]);
254  }
255 
256  //create multivector for coordinates and
257  RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > serial_adapter = create_multi_vector_adapter(serial_map, coordDim, partCenters, myTasks);
258  ia->setCoordinateInput(serial_adapter.getRawPtr());
259 
260  ia->setEdgeWeights(&(task_communication_adjw_[0]), 1, 0);
261 /*
262  for (int i = 0; i < task_communication_adjw_.size(); ++i){
263  std::cout << task_communication_adjw_[i] << " ";
264  }
265  std::cout << std::endl;
266  for (int i = 0; i < task_communication_adjw_.size(); ++i){
267  std::cout << task_communication_adj_[i] << " ";
268  }
269  std::cout << std::endl;
270 */
271  env->timerStop(Zoltan2::MACRO_TIMERS, "AdapterCreate");
272  global_tcomm->barrier();
274 
275 
276  //NOW, it only makes sense to map them serially. This is a case for the applications,
277  //where they already have the whole graph in all processes, and they want to do the mapping.
278  //Basically it will same time mapping algorithm, if that is the case.
279 
280  //First case from the distributed case does not really make sense and it is errornous.
281  //zoltan partitioning algorithms require distributed input. One still can do that in two phases,
282  //but needs to make sure that distributed and serial input adapters matches correctly.
283 
284  //Second case does not make sense and errornous. All elements are within the same node and they should not be
285  //assumed to be in the same part, since it will result only a single part.
286 
287  //If input adapter is not distributed, we are only interested in the third case.
288  //Each element will be its own unique part at the beginning of the mapping.
289 
290  //FOR the third case we create our own solution and set unique parts to each element.
291  //Basically each element has its global id as part number.
292  //It global ids are same as local ids here because each processors owns the whole thing.
293  Zoltan2::PartitioningSolution<mytest_adapter_t> single_phase_mapping_solution(env, global_tcomm, 0);
294  Teuchos::ArrayView< const test_gno_t> gids = serial_map->getLocalElementList();
295 
296  ArrayRCP<int> initial_part_ids(myTasks);
297  for (test_gno_t i = 0; i < myTasks; ++i){
298  initial_part_ids[i] = gids[i];
299  }
300  single_phase_mapping_solution.setParts(initial_part_ids);
301 
302 
303  env->timerStart(Zoltan2::MACRO_TIMERS, "Problem Create");
304  //create a mapping problem for the third case. We provide a solution in which all elements belong to unique part.
305  //even the input is not distributed, we still provide the global_tcomm because processors will calculate different mappings
306  //and the best one will be chosen.
308  ia.getRawPtr(), &serial_problemParams, global_tcomm, &single_phase_mapping_solution, &transformed_machine);
309 
310  env->timerStop(Zoltan2::MACRO_TIMERS, "Problem Create");
311  //solve mapping problem.
312  env->timerStart(Zoltan2::MACRO_TIMERS, "Problem Solve");
313  serial_map_problem.solve(true);
314  env->timerStop(Zoltan2::MACRO_TIMERS, "Problem Solve");
315 
316  //get the solution.
317  Zoltan2::MappingSolution<mytest_adapter_t> *msoln3 = serial_map_problem.getSolution();
318 
319  timer->printAndResetToZero();
320 
321  //typedef Zoltan2::EvaluatePartition<my_adapter_t> quality_t;
323 
324 
325  //input is not distributed in this case.
326  //metric object should be given the serial comm so that it can calculate the correct metrics without global communication.
327  RCP<quality_t> metricObject_3 = rcp(
328  new quality_t(ia.getRawPtr(),&serial_problemParams,serial_comm,msoln3, serial_map_problem.getMachine().getRawPtr()));
329 
330  if (global_tcomm->getRank() == 0){
331  std::cout << "METRICS FOR THE SERIAL CASE - ONE PHASE MAPPING - EACH ELEMENT IS ASSUMED TO BE IN UNIQUE PART AT THE BEGINNING" << std::endl;
332  metricObject_3->printMetrics(std::cout);
333  }
334  if (machine_optimization_level > 0){
335 
336  Teuchos::ParameterList serial_problemParams_2;
337  serial_problemParams_2.set("Input_RCA_Machine_Coords", input_machine_file);
338 
339  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> actual_machine(*global_tcomm, serial_problemParams_2);
340 
341  RCP<quality_t> metricObject_4 = rcp(
342  new quality_t(ia.getRawPtr(),&serial_problemParams_2,serial_comm,msoln3, &actual_machine));
343 
344  if (global_tcomm->getRank() == 0){
345  std::cout << "METRICS FOR THE SERIAL CASE - ONE PHASE MAPPING - EACH ELEMENT IS ASSUMED TO BE IN UNIQUE PART AT THE BEGINNING" << std::endl;
346  metricObject_4->printMetrics(std::cout);
347  }
348  }
349 
350  if (visualize_mapping && global_tcomm->getRank() == 0){
351 
352  Teuchos::ParameterList serial_problemParams_2;
353  serial_problemParams_2.set("Input_RCA_Machine_Coords", input_machine_file);
354  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> actual_machine(*global_tcomm, serial_problemParams_2);
355  test_scalar_t ** coords;
356  actual_machine.getAllMachineCoordinatesView(coords);
357  Zoltan2::visualize_mapping<zscalar_t, int> (0, actual_machine.getMachineDim(), actual_machine.getNumRanks(), coords,
358  int (task_communication_xadj_.size())-1, &(task_communication_xadj_[0]), &(task_communication_adj_[0]), msoln3->getPartListView());
359 
360  }
361 
362 }
363 
364 int main(int narg, char *arg[]){
365 
366  Tpetra::ScopeGuard tscope(&narg, &arg);
367  Teuchos::RCP<const Teuchos::Comm<int> > global_tcomm=Tpetra::getDefaultComm();
368 
369  char *input_binary_graph = NULL;
370  char *input_binary_coordinate = NULL;
371  char *input_machine_file = NULL;
372  int machine_optimization_level = 10;
373  bool divide_prime_first = false;
374  int rank_per_node = 1;
375  int reduce_best_mapping = 1;
376  bool visualize_mapping = false;
377  for ( int i = 1 ; i < narg ; ++i ) {
378  if ( 0 == strcasecmp( arg[i] , "BG" ) ) {
379 
380  input_binary_graph = arg[++i];
381  }
382  else if ( 0 == strcasecmp( arg[i] , "BC" ) ) {
383  input_binary_coordinate = arg[++i];
384  }
385  else if ( 0 == strcasecmp( arg[i] , "MF" ) ) {
386  //not binary.
387  input_machine_file = arg[++i];
388  }
389  else if ( 0 == strcasecmp( arg[i] , "OL" ) ) {
390  machine_optimization_level = atoi( arg[++i] );
391  }
392  else if ( 0 == strcasecmp( arg[i] , "DP" ) ) {
393  if (atoi( arg[++i] )){
394  divide_prime_first = true;
395  }
396  }
397  else if ( 0 == strcasecmp( arg[i] , "RPN" ) ) {
398  rank_per_node = atoi( arg[++i] );
399  }
400  else if ( 0 == strcasecmp( arg[i] , "VM" ) ) {
401  visualize_mapping = true;
402  }
403  else if ( 0 == strcasecmp( arg[i] , "RBM" ) ) {
404  reduce_best_mapping = atoi( arg[++i] );
405  }
406  else{
407  std::cerr << "Unrecognized command line argument #" << i << ": " << arg[i] << std::endl ;
408  return 1;
409  }
410  }
411 
412 
413  try{
414 
415  test_serial_input_adapter(global_tcomm, input_binary_graph, input_binary_coordinate, input_machine_file,
416  machine_optimization_level, divide_prime_first, rank_per_node, visualize_mapping, reduce_best_mapping);
417 
418 #if 0
419  {
420  part_t my_parts = 0, *my_result_parts;
421  //const part_t *local_element_to_rank = msoln1->getPartListView();
422 
423  std::cout << "me:" << global_tcomm->getRank() << " my_parts:" << my_parts << " myTasks:" << myTasks << std::endl;
424  if (global_tcomm->getRank() == 0) {
425 
426  //zscalar_t **dots = partCenters;
427  //int i = 0, j =0;
428  FILE *f2 = fopen("plot.gnuplot", "w");
429  for (int i = 0; i< global_tcomm->getSize(); ++i){
430  char str[20];
431  sprintf(str, "coords%d.txt", i);
432  if (i == 0){
433  fprintf(f2,"splot \"%s\"\n", str);
434  }
435  else {
436  fprintf(f2,"replot \"%s\"\n", str);
437  }
438  }
439  fprintf(f2,"pause-1\n");
440  fclose(f2);
441  }
442  char str[20];
443  int myrank = global_tcomm->getRank();
444  sprintf(str, "coords%d.txt", myrank);
445  FILE *coord_files = fopen(str, "w");
446 
447 
448  for (int j = 0; j < my_parts; ++j){
449  int findex = my_result_parts[j];
450  std::cout << "findex " << findex << std::endl;
451  fprintf(coord_files,"%lf %lf %lf\n", partCenters[0][findex], partCenters[1][findex], partCenters[2][findex]);
452  }
453  fclose(coord_files);
454  }
455 #endif
456 
457  if (global_tcomm->getRank() == 0){
458  std::cout << "PASS" << std::endl;
459  }
460  }
461  catch(std::string &s){
462  std::cerr << s << std::endl;
463  }
464 
465  catch(char * s){
466  std::cerr << s << std::endl;
467  }
468 }
469 
RCP< Zoltan2::XpetraMultiVectorAdapter< mytest_tMVector_t > > create_multi_vector_adapter(RCP< const mytest_map_t > map, zscalar_t **partCenters, zgno_t myTasks)
Time an algorithm (or other entity) as a whole.
Zoltan2::XpetraCrsGraphAdapter< mytest_tcrsGraph_t, mytest_tMVector_t > mytest_adapter_t
MappingInputDistributution
Tpetra::CrsGraph< zlno_t, zgno_t, znode_t > mytest_tcrsGraph_t
void visualize_mapping(int myRank, const int machine_coord_dim, const int num_ranks, proc_coord_t **machine_coords, const v_lno_t num_tasks, const v_lno_t *task_communication_xadj, const v_lno_t *task_communication_adj, const int *task_to_rank)
PartitionMapping maps a solution or an input distribution to ranks.
void solve(bool updateInputData=true)
Direct the problem to create a solution.
mytest_adapter_t::part_t mytest_part_t
Provides access for Zoltan2 to Xpetra::CrsGraph data.
Zoltan2::EvaluatePartition< matrixAdapter_t > quality_t
int main(int narg, char **arg)
Definition: coloring1.cpp:164
common code used by tests
typename InputTraits< User >::part_t part_t
Tpetra::Map::node_type mytest_znode_t
Tpetra::Map< zlno_t, zgno_t, mytest_znode_t > mytest_map_t
Defines the XpetraMultiVectorAdapter.
Defines XpetraCrsGraphAdapter class.
SparseMatrixAdapter_t::part_t part_t
int getMachineDim() const
returns the dimension (number of coords per node) in the machine
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
getProcDim function set the coordinates of all ranks allCoords[i][j], i=0,...,getMachineDim(), j=0,...,getNumRanks(), is the i-th dimensional coordinate for rank j. return true if coordinates are available for all ranks
zgno_t test_gno_t
void test_serial_input_adapter(int nx, int ny, int nz, Teuchos::RCP< const Teuchos::Comm< int > > global_tcomm)
A PartitioningSolution is a solution to a partitioning problem.
Defines the EvaluatePartition class.
zlno_t test_lno_t
int getNumRanks() const
return the number of ranks.
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
zscalar_t test_scalar_t
An adapter for Xpetra::MultiVector.
MachineRepresentation Class Base class for representing machine coordinates, networks, etc.
Tpetra::Map::local_ordinal_type zlno_t
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
Tpetra::MultiVector< zscalar_t, zlno_t, zgno_t, znode_t > mytest_tMVector_t
Defines the MappingSolution class.
RCP< mytest_tcrsGraph_t > create_tpetra_input_matrix(int nx, int ny, int nz, int numProcs, Teuchos::RCP< const Teuchos::Comm< int > > tcomm, RCP< Zoltan2::Environment > env, zscalar_t **&partCenters, zgno_t &myTasks)
Defines the PartitioningProblem class.
MappingProblem enables mapping of a partition (either computed or input) to MPI ranks.
float zscalar_t
A class that computes and returns quality metrics.
Tpetra::Map::global_ordinal_type zgno_t
Declarations for TimerManager.
Defines the MappingProblem class.