Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_GraphMetricsUtility.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
13 #ifndef ZOLTAN2_GRAPHICMETRICVALUESUTILITY_HPP
14 #define ZOLTAN2_GRAPHICMETRICVALUESUTILITY_HPP
15 
19 #include <zoltan_dd.h>
20 #include <Zoltan2_TPLTraits.hpp>
21 #include <map>
22 #include <vector>
23 
24 namespace Zoltan2 {
25 
54 template <typename Adapter,
55  typename MachineRep = // Default MachineRep type
56  MachineRepresentation<typename Adapter::scalar_t,
57  typename Adapter::part_t> >
59  const RCP<const Environment> &env,
60  const RCP<const Comm<int> > &comm,
61  const RCP<const GraphModel<typename Adapter::base_adapter_t> > &graph,
62  const ArrayView<const typename Adapter::part_t> &parts,
63  typename Adapter::part_t &numParts,
64  ArrayRCP<RCP<BaseClassMetrics<typename Adapter::scalar_t> > > &metrics,
65  ArrayRCP<typename Adapter::scalar_t> &globalSums,
66  bool bMessages = true,
67  const RCP <const MachineRep> machine = Teuchos::null) {
68 
69  env->timerStart(MACRO_TIMERS, "globalWeightedByPart");
70 
71  // Note we used to have with hops as a separate method but decided to combine
72  // both into this single method. machine is an optional parameter to choose
73  // between the two methods.
74  bool bHops = (machine != Teuchos::null);
75 
76  env->debug(DETAILED_STATUS, "Entering globalWeightedByPart");
77 
79  // Initialize return values
80 
81  typedef typename Adapter::lno_t lno_t;
82  typedef typename Adapter::gno_t gno_t;
83  typedef typename Adapter::offset_t offset_t;
84  typedef typename Adapter::scalar_t scalar_t;
85  typedef typename Adapter::part_t part_t;
86 
88  t_input_t;
89 
90  lno_t localNumVertices = graph->getLocalNumVertices();
91  lno_t localNumEdges = graph->getLocalNumEdges();
92 
93  ArrayView<const gno_t> Ids;
94  ArrayView<t_input_t> v_wghts;
95  graph->getVertexList(Ids, v_wghts);
96 
97  typedef GraphMetrics<scalar_t> gm_t;
98 
99  // get the edge ids, and weights
100  ArrayView<const gno_t> edgeIds;
101  ArrayView<const offset_t> offsets;
102  ArrayView<t_input_t> e_wgts;
103  graph->getEdgeList(edgeIds, offsets, e_wgts);
104 
105 
106  std::vector <scalar_t> edge_weights;
107  int numWeightPerEdge = graph->getNumWeightsPerEdge();
108 
109  int numMetrics = bHops ?
110  4 : // "edge cuts", messages, hops, weighted hops
111  2; // "edge cuts", messages
112 
113  if (numWeightPerEdge) numMetrics += bHops ?
114  numWeightPerEdge * 2 : // "weight n", weighted hops per weight n
115  numWeightPerEdge; // "weight n"
116 
117  // add some more metrics to the array
118  auto next = metrics.size(); // where we begin filling
119  for (auto n = 0; n < numMetrics; ++n) {
120  addNewMetric<gm_t, scalar_t>(env, metrics);
121  }
122 
123  std::vector <part_t> e_parts (localNumEdges);
124 
125  std::vector<part_t> local_parts;
126 
127 #ifdef HAVE_ZOLTAN2_MPI
128  if (comm->getSize() > 1) {
129  const bool bUseLocalIDs = false; // Local IDs not needed
131  int debug_level = 0;
132  directory_t directory(comm, bUseLocalIDs, debug_level);
133 
134  if (localNumVertices)
135  directory.update(localNumVertices, &Ids[0], NULL, &parts[0],
137  else
138  directory.update(localNumVertices, NULL, NULL, NULL,
140 
141  if (localNumEdges)
142  directory.find(localNumEdges, &edgeIds[0], NULL, &e_parts[0],
143  NULL, NULL, false);
144  else
145  directory.find(localNumEdges, NULL, NULL, NULL,
146  NULL, NULL, false);
147  } else
148 #endif
149  {
150  std::map<gno_t,lno_t> global_id_to_local_index;
151 
152  // else everything is local.
153  // we need a globalid to local index conversion.
154  // this does not exists till this point, so we need to create one.
155 
156  for (lno_t i = 0; i < localNumVertices; ++i){
157  //at the local index i, we have the global index Ids[i].
158  //so write i, to Ids[i] index of the vector.
159  global_id_to_local_index[Ids[i]] = i;
160  }
161 
162  for (lno_t i = 0; i < localNumEdges; ++i){
163  gno_t ei = edgeIds[i];
164  //ei is the global index of the neighbor one.
165  part_t p = parts[global_id_to_local_index[ei]];
166  e_parts[i] = p;
167  }
168  }
169 
170  RCP<const Teuchos::Comm<int> > tcomm = comm;
171 
172  env->timerStart(MACRO_TIMERS, "Communication Graph Create");
173  {
174  const bool bUseLocalIDs = false; // Local IDs not needed
175  int debug_level = 0;
176  // Create a directory indexed by part_t with values t_scalar_t for weight sums
177 
178  // this struct is the user data type for a part
179  // each part will have a std::vector<part_info> for its user data
180  // representing the list of all neighbors and a weight for each.
181  struct part_info {
182  part_info() : sum_weights(0) {
183  }
184 
185  // operator +=
186  // this allows the directory to know how to assemble two structs
187  // which return true for ==.
188  // TODO: Decide if we want directory to work like this for AggregateAdd
189  const part_info & operator+=(const part_info & src) {
190  sum_weights += src.sum_weights;
191  return *this; // return old value
192  }
193 
194  // operator>
195  // TODO: Decide if we want directory to work like this for AggregateAdd
196  bool operator>(const part_info & src) {
197  // Note: Currently this doesn't actually do anything except allow this
198  // struct to compile. Aggregate mode used operator> to preserve ordering
199  // and therefore a custom struct must currently define it. However in
200  // this test we will be using AggregateAdd mode which doesn't actually
201  // use this operator> . However if we change the test so we require the
202  // input data to already be ordered by target_part, then we could change
203  // the directory implementation so AggregateAdd and Aggregate are almost
204  // identical. The only difference would be that Aggregate mode would
205  // check operator== and if true, throw away the duplicate, while
206  // AggregateAdd mode would check operator== and if true, call operator+=
207  // to combine the values, in this case summing sum_weights.
208  return (target_part > src.target_part);
209  }
210 
211  // operator==
212  // this allows the directory to know that two structs should be
213  // aggregated into one using the operator +=.
214  // TODO: Decide if we want directory to work like this for AggregateAdd
215  // This works but seems fussy/complicated - to discuss. I'm not yet sure
216  // how to best integrate this so we can aggregate both simple types where
217  // we just keep unique elements and more complex structs with a 'rule'
218  // for combining them.
219  bool operator==(const part_info & src) {
220  // if target_part is the same then the values for sum_weights will
221  // be summed.
222  return (target_part == src.target_part);
223  }
224 
225  part_t target_part; // the part this part_info refers to
226  scalar_t sum_weights; // the sum of weights
227  };
228 
229  // get the vertices in each part in my part.
230  std::vector <lno_t> part_begins(numParts, -1);
231  std::vector <lno_t> part_nexts(localNumVertices, -1);
232 
233  // cluster vertices according to their parts.
234  // create local part graph.
235  for (lno_t i = 0; i < localNumVertices; ++i){
236  part_t ap = parts[i];
237  part_nexts[i] = part_begins[ap];
238  part_begins[ap] = i;
239  }
240 
241  for (int weight_index = -1;
242  weight_index < numWeightPerEdge; ++weight_index) {
243 
244  std::vector<part_t> part_data(numParts); // will resize to lower as needed
245  std::vector<std::vector<part_info>> user_data(numParts); // also to resize
246  int count_total_entries = 0;
247 
248  std::vector <part_t> part_neighbors(numParts);
249  std::vector <scalar_t> part_neighbor_weights(numParts, 0);
250  std::vector <scalar_t> part_neighbor_weights_ordered(numParts);
251 
252  // coarsen for all vertices in my part in order with parts.
253  for (part_t i = 0; i < numParts; ++i) {
254  part_t num_neighbor_parts = 0;
255  lno_t v = part_begins[i];
256  // get part i, and first vertex in this part v.
257  while (v != -1){
258  // now get the neightbors of v.
259  for (offset_t j = offsets[v]; j < offsets[v+1]; ++j){
260 
261  // get the part of the second vertex.
262  part_t ep = e_parts[j];
263 
264  // TODO: Can we skip condition (i==ep)
265  // The self reference set is going to be excluded later anyways
266  // so we could make this more efficient.
267  scalar_t ew = 1;
268  if (weight_index > -1){
269  ew = e_wgts[weight_index][j];
270  }
271  // add it to my local part neighbors for part i.
272  if (part_neighbor_weights[ep] < 0.00001){
273  part_neighbors[num_neighbor_parts++] = ep;
274  }
275  part_neighbor_weights[ep] += ew;
276  }
277  v = part_nexts[v];
278  }
279 
280  // now get the part list.
281  for (lno_t j = 0; j < num_neighbor_parts; ++j) {
282  part_t neighbor_part = part_neighbors[j];
283  part_neighbor_weights_ordered[j] =
284  part_neighbor_weights[neighbor_part];
285  part_neighbor_weights[neighbor_part] = 0;
286  }
287 
288  if (num_neighbor_parts > 0) {
289  // for the new directory note a difference in the logic flow
290  // originally we have CrsMatrix which could collect these values
291  // as we built each row. For the directory it's probably better to
292  // have update called just once so we collect the values and then
293  // do all of the update at the end.
294  part_data[count_total_entries] = i; // set up for directory
295  std::vector<part_info> & add_user_data =
296  user_data[count_total_entries];
297  ++count_total_entries;
298 
299  add_user_data.resize(num_neighbor_parts);
300 
301  for(int n = 0; n < num_neighbor_parts; ++n) {
302  part_info & add_data = add_user_data[n];
303  add_data.target_part = part_neighbors[n];
304  add_data.sum_weights = part_neighbor_weights_ordered[n];
305  }
306  }
307  }
308 
309  scalar_t max_edge_cut = 0;
310  scalar_t total_edge_cut = 0;
311  part_t max_message = 0;
312  part_t total_message = 0;
313 
314  part_t total_hop_count = 0;
315  scalar_t total_weighted_hop_count = 0;
316  part_t max_hop_count = 0;
317  scalar_t max_weighted_hop_count = 0;
318 
319  // for serial or comm size 1 we need to fill this from local data
320  // TODO: Maybe remove all special casing for serial and make this pipeline
321  // uniform always
322  if(local_parts.size() == 0) {
323  local_parts.resize(numParts);
324  for(size_t n = 0; n < local_parts.size(); ++n) {
325  local_parts[n] = n;
326  }
327  }
328 
329  std::vector<std::vector<part_info>> find_user_data;
330 
331  // directory does not yet support SerialComm because it still has older
332  // MPI calls which need to be refactored to Teuchos::comm format. To
333  // work around this issue skip the directory calls and just set the
334  // find data equal to the input update data. This works because above
335  // logic has already summed the weights per process so in the SerialComm
336  // case, there won't be duplicates.
337  bool bSerialComm =
338  (dynamic_cast<const Teuchos::SerialComm<int>*>(&(*comm)) != NULL);
339 
340  if(!bSerialComm) {
342  directory_t;
343  directory_t directory(comm, bUseLocalIDs, debug_level);
344 
345  if(count_total_entries) {
346  // update
347  directory.update(count_total_entries, &part_data[0],
348  NULL, &user_data[0],
349  NULL, directory_t::Update_Mode::AggregateAdd);
350  }
351  else {
352  directory.update(count_total_entries, NULL, NULL, NULL,
353  NULL, directory_t::Update_Mode::AggregateAdd);
354  }
355 
356  // get my local_parts (parts managed on this directory)
357  directory.get_locally_managed_gids(local_parts);
358 
359  // set up find_user_data to have the right size
360  find_user_data.resize(local_parts.size());
361 
362  // find
363  directory.find(local_parts.size(), &local_parts[0], NULL,
364  &find_user_data[0], NULL, NULL, false);
365  }
366  else {
367  find_user_data = user_data;
368  }
369 
370  for(size_t n = 0; n < local_parts.size(); ++n) {
371  scalar_t part_edge_cut = 0;
372  part_t part_messages = 0;
373  const std::vector<part_info> & data = find_user_data[n];
374  for(size_t q = 0; q < data.size(); ++q) {
375  const part_t local_part = local_parts[n];
376  const part_info & info = data[q];
377  if (info.target_part != local_part) {
378  part_edge_cut += info.sum_weights;
379  part_messages += 1;
380 
381  if(bHops) {
382  typename MachineRep::machine_pcoord_t hop_count = 0;
383  machine->getHopCount(local_part, info.target_part, hop_count);
384 
385  part_t hop_counts = hop_count;
386  scalar_t weighted_hop_counts = hop_count * info.sum_weights;
387 
388  total_hop_count += hop_counts;
389  total_weighted_hop_count += weighted_hop_counts;
390 
391  if (hop_counts > max_hop_count ){
392  max_hop_count = hop_counts;
393  }
394  if (weighted_hop_counts > max_weighted_hop_count ){
395  max_weighted_hop_count = weighted_hop_counts;
396  }
397  }
398  }
399  }
400 
401  if(part_edge_cut > max_edge_cut) {
402  max_edge_cut = part_edge_cut;
403  }
404  total_edge_cut += part_edge_cut;
405 
406  if (part_messages > max_message){
407  max_message = part_messages;
408  }
409  total_message += part_messages;
410  }
411 
412 
413  scalar_t g_max_edge_cut = 0;
414  scalar_t g_total_edge_cut = 0;
415  part_t g_max_message = 0;
416  part_t g_total_message = 0;
417 
418  part_t g_total_hop_count = 0;
419  scalar_t g_total_weighted_hop_count = 0;
420  part_t g_max_hop_count = 0;
421  scalar_t g_max_weighted_hop_count = 0;
422 
423  try{
424  Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_MAX, 1,
425  &max_edge_cut, &g_max_edge_cut);
426  Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_MAX, 1,
427  &max_message, &g_max_message);
428 
429  Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM, 1,
430  &total_edge_cut, &g_total_edge_cut);
431  Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_SUM, 1,
432  &total_message, &g_total_message);
433 
434  if(bHops) {
435  Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_MAX, 1,
436  &max_hop_count, &g_max_hop_count);
437  Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_MAX, 1,
438  &max_weighted_hop_count,
439  &g_max_weighted_hop_count);
440 
441  Teuchos::reduceAll<int, part_t>(*comm, Teuchos::REDUCE_SUM, 1,
442  &total_hop_count, &g_total_hop_count);
443  Teuchos::reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM, 1,
444  &total_weighted_hop_count,
445  &g_total_weighted_hop_count);
446  }
447  }
449 
450  if (weight_index == -1){
451  metrics[next]->setName("edge cuts");
452  }
453  else {
454  std::ostringstream oss;
455  oss << "weight " << weight_index;
456  metrics[next]->setName( oss.str());
457  }
458  metrics[next]->setMetricValue("global maximum", g_max_edge_cut);
459  metrics[next]->setMetricValue("global sum", g_total_edge_cut);
460  next++;
461  if (weight_index == -1){
462  metrics[next]->setName("message");
463  metrics[next]->setMetricValue("global maximum", g_max_message);
464  metrics[next]->setMetricValue("global sum", g_total_message);
465  next++;
466  }
467 
468  if(bHops) {
469  if (weight_index == -1){
470  metrics[next]->setName("hops (No Weight)");
471  metrics[next]->setMetricValue("global maximum", g_max_hop_count);
472  metrics[next]->setMetricValue("global sum", g_total_hop_count);
473  next++;
474  }
475 
476  std::ostringstream oss;
477  oss << "weighted hops" << weight_index;
478  metrics[next]->setName( oss.str());
479  metrics[next]->
480  setMetricValue("global maximum", g_max_weighted_hop_count);
481  metrics[next]->
482  setMetricValue("global sum", g_total_weighted_hop_count);
483  next++;
484  }
485  }
486  }
487 
488  env->timerStop(MACRO_TIMERS, "globalWeightedByPart");
489 
490  env->debug(DETAILED_STATUS, "Exiting globalWeightedByPart");
491 }
492 
495 template <typename scalar_t, typename part_t>
496 void printGraphMetricsHeader(std::ostream &os,
497  part_t targetNumParts,
498  part_t numParts) {
499 
500  os << "Graph Metrics: (" << numParts << " parts)";
501  os << std::endl;
502  if (targetNumParts != numParts) {
503  os << "Target number of parts is: " << targetNumParts << std::endl;
504  }
506 }
507 
510 template <typename scalar_t, typename part_t>
511 void printGraphMetrics(std::ostream &os,
512  part_t targetNumParts,
513  part_t numParts,
514  const ArrayView<RCP<BaseClassMetrics<scalar_t> > >
515  &infoList) {
516 
517  printGraphMetricsHeader<scalar_t, part_t>(os, targetNumParts, numParts);
518  for (int i=0; i < infoList.size(); i++) {
519  if (infoList[i]->getName() != METRICS_UNSET_STRING) {
520  infoList[i]->printLine(os);
521  }
522  }
523  os << std::endl;
524 }
525 
528 template <typename scalar_t, typename part_t>
529 void printGraphMetrics(std::ostream &os,
530  part_t targetNumParts,
531  part_t numParts,
532  RCP<BaseClassMetrics<scalar_t>> metricValue) {
533 
534  printGraphMetricsHeader<scalar_t, part_t>(os, targetNumParts, numParts);
535  metricValue->printLine(os);
536 }
537 
538 } //namespace Zoltan2
539 
540 #endif
Time an algorithm (or other entity) as a whole.
map_t::global_ordinal_type gno_t
Definition: mapRemotes.cpp:27
static void printHeader(std::ostream &os)
Print a standard header.
void printGraphMetricsHeader(std::ostream &os, part_t targetNumParts, part_t numParts)
Print out header info for graph metrics.
sub-steps, each method&#39;s entry and exit
SparseMatrixAdapter_t::part_t part_t
void globalWeightedByPart(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, const RCP< const GraphModel< typename Adapter::base_adapter_t > > &graph, const ArrayView< const typename Adapter::part_t > &parts, typename Adapter::part_t &numParts, ArrayRCP< RCP< BaseClassMetrics< typename Adapter::scalar_t > > > &metrics, ArrayRCP< typename Adapter::scalar_t > &globalSums, bool bMessages=true, const RCP< const MachineRep > machine=Teuchos::null)
Given the local partitioning, compute the global weighted cuts in each part.
map_t::local_ordinal_type lno_t
Definition: mapRemotes.cpp:26
void printGraphMetrics(std::ostream &os, part_t targetNumParts, part_t numParts, const ArrayView< RCP< BaseClassMetrics< scalar_t > > > &infoList)
Print out list of graph metrics.
size_t getLocalNumVertices() const
Returns the number vertices on this process.
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS&#39;s idx_t...
GraphModel defines the interface required for graph models.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
#define METRICS_UNSET_STRING