Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Zoltan2_AlgParMETIS.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef _ZOLTAN2_ALGPARMETIS_HPP_
11 #define _ZOLTAN2_ALGPARMETIS_HPP_
12 
13 #include <Zoltan2_GraphModel.hpp>
15 #include <Zoltan2_Algorithm.hpp>
17 #include <Zoltan2_Util.hpp>
18 
23 
24 #ifndef HAVE_ZOLTAN2_PARMETIS
25 
26 // Error handling for when ParMETIS is requested
27 // but Zoltan2 not built with ParMETIS.
28 
29 namespace Zoltan2 {
30 template <typename Adapter, typename Model=GraphModel<typename Adapter::base_adapter_t>>
31 class AlgParMETIS : public Algorithm<Adapter>
32 {
33 public:
34  AlgParMETIS(const RCP<const Environment> &,
35  const RCP<const Comm<int> > &,
36  const RCP<const typename Adapter::base_adapter_t> &,
37  const modelFlag_t& graphFlags_ = modelFlag_t())
38  {
39  throw std::runtime_error(
40  "BUILD ERROR: ParMETIS requested but not compiled into Zoltan2.\n"
41  "Please set CMake flag Zoltan2_ENABLE_ParMETIS:BOOL=ON.");
42  }
43 };
44 }
45 
46 #endif
47 
50 
51 #ifdef HAVE_ZOLTAN2_PARMETIS
52 
53 #ifndef HAVE_ZOLTAN2_MPI
54 
55 // ParMETIS requires compilation with MPI.
56 // If MPI is not available, make compilation fail.
57 #error "TPL ParMETIS requires compilation with MPI. Configure with -DTPL_ENABLE_MPI:BOOL=ON or -DZoltan2_ENABLE_ParMETIS:BOOL=OFF"
58 
59 #else
60 
61 extern "C" {
62 #include "parmetis.h"
63 }
64 
65 #if (PARMETIS_MAJOR_VERSION < 4)
66 
67 // Zoltan2 requires ParMETIS v4.x.
68 // Make compilation fail for earlier versions of ParMETIS.
69 #error "Specified version of ParMETIS is not compatible with Zoltan2; upgrade to ParMETIS v4 or later, or build Zoltan2 without ParMETIS."
70 
71 #else
72 
73 // MPI and ParMETIS version requirements are met. Proceed.
74 
75 namespace Zoltan2 {
76 
77 template <typename Adapter, typename Model=GraphModel<typename Adapter::base_adapter_t>>
78 class AlgParMETIS : public Algorithm<Adapter>
79 {
80 public:
81 
82  typedef GraphModel<typename Adapter::base_adapter_t> graphModel_t;
83  typedef typename Adapter::lno_t lno_t;
84  typedef typename Adapter::gno_t gno_t;
85  typedef typename Adapter::offset_t offset_t;
86  typedef typename Adapter::scalar_t scalar_t;
87  typedef typename Adapter::part_t part_t;
88 
89  typedef idx_t pm_idx_t;
90  typedef real_t pm_real_t;
91 
102  AlgParMETIS(const RCP<const Environment> &env__,
103  const RCP<const Comm<int> > &problemComm__,
104  const RCP<const typename Adapter::base_adapter_t> &adapter__,
105  const modelFlag_t& graphFlags_ = modelFlag_t()) :
106  env(env__), problemComm(problemComm__),
107  adapter(adapter__), graphFlags(graphFlags_)
108  { }
109 
110  void partition(const RCP<PartitioningSolution<Adapter> > &solution);
111 
112 private:
113 
114  const RCP<const Environment> env;
115  const RCP<const Comm<int> > problemComm;
116  const RCP<const typename Adapter::base_adapter_t> adapter;
117  modelFlag_t graphFlags;
118 
119  void scale_weights(size_t n, ArrayView<StridedData<lno_t, scalar_t> > &fwgts,
120  pm_idx_t *iwgts);
121 };
122 
123 
125  template <typename Adapter, typename Model>
127  const RCP<PartitioningSolution<Adapter> > &solution
128 )
129 {
130  HELLO;
131 
132  size_t numGlobalParts = solution->getTargetGlobalNumberOfParts();
133 
134  int me = problemComm->getRank();
135  int np = problemComm->getSize();
136 
137  // Get vertex info
138  ArrayView<const gno_t> vtxgnos;
139  ArrayView<StridedData<lno_t, scalar_t> > vwgts;
140 
141  const auto model = rcp(new Model(adapter, env, problemComm, graphFlags));
142 
143  int nVwgt = model->getNumWeightsPerVertex();
144  size_t nVtx = model->getVertexList(vtxgnos, vwgts);
145  pm_idx_t pm_nVtx;
147 
148  pm_idx_t *pm_vwgts = NULL;
149  if (nVwgt) {
150  pm_vwgts = new pm_idx_t[nVtx*nVwgt];
151  scale_weights(nVtx, vwgts, pm_vwgts);
152  }
153 
154  // Get edge info
155  ArrayView<const gno_t> adjgnos;
156  ArrayView<const offset_t> offsets;
157  ArrayView<StridedData<lno_t, scalar_t> > ewgts;
158  int nEwgt = model->getNumWeightsPerEdge();
159  size_t nEdge = model->getEdgeList(adjgnos, offsets, ewgts);
160 
161  pm_idx_t *pm_ewgts = NULL;
162  if (nEwgt) {
163  pm_ewgts = new pm_idx_t[nEdge*nEwgt];
164  scale_weights(nEdge, ewgts, pm_ewgts);
165  }
166 
167  // Convert index types for edges, if needed
168  pm_idx_t *pm_offsets;
170  pm_idx_t *pm_adjs;
171  pm_idx_t pm_dummy_adj;
172  if (nEdge)
174  else
175  pm_adjs = &pm_dummy_adj; // ParMETIS does not like NULL pm_adjs;
176 
177 
178  // Build vtxdist
179  pm_idx_t *pm_vtxdist;
180  ArrayView<size_t> vtxdist;
181  model->getVertexDist(vtxdist);
182  TPL_Traits<pm_idx_t,size_t>::ASSIGN_ARRAY(&pm_vtxdist, vtxdist);
183 
184  // ParMETIS does not like processors having no vertices.
185  // Inspect vtxdist and remove from communicator procs that have no vertices
186  RCP<Comm<int> > subcomm;
187  MPI_Comm mpicomm; // Note: mpicomm is valid only while subcomm is in scope
188 
189  int nKeep = 0;
190  if (np > 1) {
191  Array<int> keepRanks(np);
192  for (int i = 0; i < np; i++) {
193  if ((pm_vtxdist[i+1] - pm_vtxdist[i]) > 0) {
194  keepRanks[nKeep] = i;
195  pm_vtxdist[nKeep] = pm_vtxdist[i];
196  nKeep++;
197  }
198  }
199  pm_vtxdist[nKeep] = pm_vtxdist[np];
200  if (nKeep < np) {
201  subcomm = problemComm->createSubcommunicator(keepRanks.view(0,nKeep));
202  if (subcomm != Teuchos::null)
203  mpicomm = Teuchos::getRawMpiComm(*subcomm);
204  else
205  mpicomm = MPI_COMM_NULL;
206  }
207  else {
208  mpicomm = Teuchos::getRawMpiComm(*problemComm);
209  }
210  }
211  else {
212  mpicomm = Teuchos::getRawMpiComm(*problemComm);
213  }
214 
215  // Create array for ParMETIS to return results in.
216  pm_idx_t *pm_partList = NULL;
217  if (nVtx) pm_partList = new pm_idx_t[nVtx];
218  for (size_t i = 0; i < nVtx; i++) pm_partList[i] = 0;
219  int pm_return = METIS_OK;
220 
221  if (mpicomm != MPI_COMM_NULL) {
222  // If in ParMETIS' communicator (i.e., have vertices), call ParMETIS
223 
224  // Get target part sizes
225  pm_idx_t pm_nCon = (nVwgt == 0 ? 1 : pm_idx_t(nVwgt));
226  pm_real_t *pm_partsizes = new pm_real_t[numGlobalParts*pm_nCon];
227  for (pm_idx_t dim = 0; dim < pm_nCon; dim++) {
228  if (!solution->criteriaHasUniformPartSizes(dim))
229  for (size_t i=0; i<numGlobalParts; i++)
230  pm_partsizes[i*pm_nCon+dim] =
231  pm_real_t(solution->getCriteriaPartSize(dim,i));
232  else
233  for (size_t i=0; i<numGlobalParts; i++)
234  pm_partsizes[i*pm_nCon+dim] = pm_real_t(1.)/pm_real_t(numGlobalParts);
235  }
236 
237  // Get imbalance tolerances
238  double tolerance = 1.1;
239  const Teuchos::ParameterList &pl = env->getParameters();
240  const Teuchos::ParameterEntry *pe = pl.getEntryPtr("imbalance_tolerance");
241  if (pe) tolerance = pe->getValue<double>(&tolerance);
242 
243  // ParMETIS requires tolerance to be greater than 1.0;
244  // fudge it if condition is not met
245  if (tolerance <= 1.0) {
246  if (me == 0)
247  std::cerr << "Warning: ParMETIS requires imbalance_tolerance > 1.0; "
248  << "to comply, Zoltan2 reset imbalance_tolerance to 1.01."
249  << std::endl;
250  tolerance = 1.01;
251  }
252 
253  pm_real_t *pm_imbTols = new pm_real_t[pm_nCon];
254  for (pm_idx_t dim = 0; dim < pm_nCon; dim++)
255  pm_imbTols[dim] = pm_real_t(tolerance);
256 
257  std::string parmetis_method("PARTKWAY");
258  pe = pl.getEntryPtr("partitioning_approach");
259  if (pe){
260  std::string approach;
261  approach = pe->getValue<std::string>(&approach);
262  if ((approach == "repartition") || (approach == "maximize_overlap")) {
263  if (nKeep > 1)
264  // ParMETIS_V3_AdaptiveRepart requires two or more processors
265  parmetis_method = "ADAPTIVE_REPART";
266  else
267  // Probably best to do PartKway if nKeep == 1;
268  // I think REFINE_KWAY won't give a good answer in most use cases
269  // parmetis_method = "REFINE_KWAY";
270  parmetis_method = "PARTKWAY";
271  }
272  }
273 
274  // Other ParMETIS parameters?
275  pm_idx_t pm_wgtflag = 2*(nVwgt > 0) + (nEwgt > 0);
276  pm_idx_t pm_numflag = 0;
277  pm_idx_t pm_edgecut = -1;
278  pm_idx_t pm_options[METIS_NOPTIONS];
279  pm_options[0] = 1; // Use non-default options for some ParMETIS options
280  for (int i = 1; i < METIS_NOPTIONS; i++)
281  pm_options[i] = 0; // Default options
282  pm_options[2] = 15; // Matches default value used in Zoltan
283 
284  pm_idx_t pm_nPart;
285  TPL_Traits<pm_idx_t,size_t>::ASSIGN(pm_nPart, numGlobalParts);
286 
287  if (parmetis_method == "PARTKWAY") {
288  pm_return = ParMETIS_V3_PartKway(pm_vtxdist, pm_offsets, pm_adjs,
289  pm_vwgts, pm_ewgts, &pm_wgtflag,
290  &pm_numflag, &pm_nCon, &pm_nPart,
291  pm_partsizes, pm_imbTols, pm_options,
292  &pm_edgecut, pm_partList, &mpicomm);
293  }
294  else if (parmetis_method == "ADAPTIVE_REPART") {
295  // Get object sizes: pm_vsize
296  // TODO: get pm_vsize info from input adapter or graph model
297  // TODO: This is just a placeholder
298  pm_idx_t *pm_vsize = new pm_idx_t[nVtx];
299  for (size_t i = 0; i < nVtx; i++) pm_vsize[i] = 1;
300 
301  pm_real_t itr = 100.; // Same default as in Zoltan
302  pm_return = ParMETIS_V3_AdaptiveRepart(pm_vtxdist, pm_offsets, pm_adjs,
303  pm_vwgts,
304  pm_vsize, pm_ewgts, &pm_wgtflag,
305  &pm_numflag, &pm_nCon, &pm_nPart,
306  pm_partsizes, pm_imbTols,
307  &itr, pm_options, &pm_edgecut,
308  pm_partList, &mpicomm);
309  delete [] pm_vsize;
310  }
311  // else if (parmetis_method == "REFINE_KWAY") {
312  // We do not currently have an execution path that calls REFINE_KWAY.
313  // pm_return = ParMETIS_V3_RefineKway(pm_vtxdist, pm_offsets, pm_adjs,
314  // pm_vwgts, pm_ewgts, &pm_wgtflag,
315  // &pm_numflag, &pm_nCon, &pm_nPart,
316  // pm_partsizes, pm_imbTols, pm_options,
317  // &pm_edgecut, pm_partList, &mpicomm);
318  // }
319  else {
320  // We should not reach this condition.
321  throw std::logic_error("\nInvalid ParMETIS method requested.\n");
322  }
323 
324  // Clean up
325  delete [] pm_partsizes;
326  delete [] pm_imbTols;
327  }
328 
329  // Load answer into the solution.
330 
331  ArrayRCP<part_t> partList;
332  if (nVtx)
333  TPL_Traits<part_t, pm_idx_t>::SAVE_ARRAYRCP(&partList, pm_partList, nVtx);
335 
336  solution->setParts(partList);
337 
338  env->memory("Zoltan2-ParMETIS: After creating solution");
339 
340  // Clean up copies made due to differing data sizes.
343  if (nEdge)
345 
346  if (nVwgt) delete [] pm_vwgts;
347  if (nEwgt) delete [] pm_ewgts;
348 
349  if (pm_return != METIS_OK) {
350  throw std::runtime_error(
351  "\nParMETIS returned an error; no valid partition generated.\n"
352  "Look for 'PARMETIS ERROR' in your output for more details.\n");
353  }
354 }
355 
357 // Scale and round scalar_t weights (typically float or double) to
358 // ParMETIS' idx_t (typically int or long).
359 // subject to sum(weights) <= max_wgt_sum.
360 // Scale only if deemed necessary.
361 //
362 // Note that we use ceil() instead of round() to avoid
363 // rounding to zero weights.
364 // Based on Zoltan's scale_round_weights, mode 1
365 
366  template <typename Adapter, typename Model>
367  void AlgParMETIS<Adapter, Model>::scale_weights(
368  size_t n,
369  ArrayView<StridedData<typename Adapter::lno_t,
370  typename Adapter::scalar_t> > &fwgts,
371  pm_idx_t *iwgts
372 )
373 {
374  const double INT_EPSILON = 1e-5;
375  const int nWgt = fwgts.size();
376 
377  int *nonint_local = new int[nWgt+nWgt];
378  int *nonint = nonint_local + nWgt;
379 
380  double *sum_wgt_local = new double[nWgt*4];
381  double *max_wgt_local = sum_wgt_local + nWgt;
382  double *sum_wgt = max_wgt_local + nWgt;
383  double *max_wgt = sum_wgt + nWgt;
384 
385  for (int i = 0; i < nWgt; i++) {
386  nonint_local[i] = 0;
387  sum_wgt_local[i] = 0.;
388  max_wgt_local[i] = 0;
389  }
390 
391  // Compute local sums of the weights
392  // Check whether all weights are integers
393  for (int j = 0; j < nWgt; j++) {
394  for (size_t i = 0; i < n; i++) {
395  double fw = double(fwgts[j][i]);
396  if (!nonint_local[j]) {
397  pm_idx_t tmp = (pm_idx_t) floor(fw + .5); /* Nearest int */
398  if (fabs((double)tmp-fw) > INT_EPSILON) {
399  nonint_local[j] = 1;
400  }
401  }
402  sum_wgt_local[j] += fw;
403  if (fw > max_wgt_local[j]) max_wgt_local[j] = fw;
404  }
405  }
406 
407  Teuchos::reduceAll<int,int>(*problemComm, Teuchos::REDUCE_MAX, nWgt,
408  nonint_local, nonint);
409  Teuchos::reduceAll<int,double>(*problemComm, Teuchos::REDUCE_SUM, nWgt,
410  sum_wgt_local, sum_wgt);
411  Teuchos::reduceAll<int,double>(*problemComm, Teuchos::REDUCE_MAX, nWgt,
412  max_wgt_local, max_wgt);
413 
414  const double max_wgt_sum = double(std::numeric_limits<pm_idx_t>::max()/8);
415  for (int j = 0; j < nWgt; j++) {
416  double scale = 1.;
417 
418  // Scaling needed if weights are not integers or weights'
419  // range is not sufficient
420  if (nonint[j] || (max_wgt[j]<=INT_EPSILON) || (sum_wgt[j]>max_wgt_sum)) {
421  /* Calculate scale factor */
422  if (sum_wgt[j] != 0.) scale = max_wgt_sum/sum_wgt[j];
423  }
424 
425  /* Convert weights to positive integers using the computed scale factor */
426  for (size_t i = 0; i < n; i++)
427  iwgts[i*nWgt+j] = (pm_idx_t) ceil(double(fwgts[j][i])*scale);
428  }
429  delete [] nonint_local;
430  delete [] sum_wgt_local;
431 }
432 
433 } // namespace Zoltan2
434 
435 #endif // PARMETIS VERSION 4 OR HIGHER CHECK
436 
437 #endif // HAVE_ZOLTAN2_MPI
438 
439 #endif // HAVE_ZOLTAN2_PARMETIS
440 
441 #endif
#define HELLO
virtual void partition(const RCP< PartitioningSolution< Adapter > > &)
Partitioning method.
std::bitset< NUM_MODEL_FLAGS > modelFlag_t
map_t::global_ordinal_type gno_t
Definition: mapRemotes.cpp:27
Defines the PartitioningSolution class.
static void SAVE_ARRAYRCP(ArrayRCP< first_t > *a, second_t *b, size_t size)
SparseMatrixAdapter_t::part_t part_t
AlgParMETIS(const RCP< const Environment > &, const RCP< const Comm< int > > &, const RCP< const typename Adapter::base_adapter_t > &, const modelFlag_t &graphFlags_=modelFlag_t())
Adapter::scalar_t scalar_t
Adapter::part_t part_t
Algorithm defines the base class for all algorithms.
map_t::local_ordinal_type lno_t
Definition: mapRemotes.cpp:26
static void ASSIGN(first_t &a, second_t b)
static void ASSIGN_ARRAY(first_t **a, ArrayView< second_t > &b)
Defines the GraphModel interface.
A gathering of useful namespace methods.
static void DELETE_ARRAY(first_t **a)