14 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_ 
   15 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_ 
   18 template <
typename Adapter>
 
   55 template <
typename Adapter>
 
   60 #ifndef DOXYGEN_SHOULD_SKIP_THIS 
   62   typedef typename Adapter::scalar_t scalar_t;
 
   86     const RCP<
const Comm<int> > &comm,
 
  120     const RCP<
const Comm<int> > &comm,
 
  121     int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
 
  122     ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
 
  180     if (partDist_.size() > 0) 
return &partDist_[0];
 
  201     if (procDist_.size() > 0) 
return &procDist_[0];
 
  232     if (pSizeUniform_[idx])
 
  233       return 1.0 / nGlobalParts_;
 
  234     else if (pCompactIndex_[idx].size())
 
  235       return pSize_[idx][pCompactIndex_[idx][part]];
 
  237       return pSize_[idx][part];
 
  284   void setParts(ArrayRCP<part_t> &partList);
 
  309     for (
part_t i = 0; i < nrhs; i++) { 
 
  310       part_t k = (remap ? remap[i] : i);
 
  311       for (
part_t j = idx[k]; j < idx[k+1]; j++) { 
 
  312         if (i == (adj[j]-nlhs)) {
 
  338     if (parts_.size() > 0) 
return parts_.getRawPtr();
 
  347     if (procs_.size() > 0) 
return procs_.getRawPtr();
 
  355     if (this->algorithm_ == Teuchos::null)
 
  356       throw std::logic_error(
"no partitioning algorithm has been run yet");
 
  357     return this->algorithm_->isPartitioningTreeBinary();
 
  363                         std::vector<part_t> & permPartNums,
 
  364                         std::vector<part_t> & splitRangeBeg,
 
  365                         std::vector<part_t> & splitRangeEnd,
 
  366                         std::vector<part_t> & treeVertParents)
 const {
 
  370     if (this->algorithm_ == Teuchos::null)
 
  371       throw std::logic_error(
"no partitioning algorithm has been run yet");
 
  372     this->algorithm_->getPartitionTree(
 
  383   std::vector<Zoltan2::coordinateModelPartBox> &
 
  386     return this->algorithm_->getPartBoxesView();
 
  402       if (this->algorithm_ == Teuchos::null)
 
  403         throw std::logic_error(
"no partitioning algorithm has been run yet");
 
  405       p = this->algorithm_->pointAssign(dim, point); 
 
  423   void boxAssign(
int dim, scalar_t *lower, scalar_t *upper,
 
  424                  size_t &nPartsFound, 
part_t **partsFound)
 const 
  427       if (this->algorithm_ == Teuchos::null)
 
  428         throw std::logic_error(
"no partitioning algorithm has been run yet");
 
  430       this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound); 
 
  439                              ArrayRCP <part_t> &comAdj)
 const 
  442       if (this->algorithm_ == Teuchos::null)
 
  443         throw std::logic_error(
"no partitioning algorithm has been run yet");
 
  445       this->algorithm_->getCommunicationGraph(
this, comXAdj, comAdj);
 
  470     env_->localInputAssertion(__FILE__, __LINE__, 
"invalid process id",
 
  473     procToPartsMap(procId, numParts, partMin, partMax);
 
  489     env_->localInputAssertion(__FILE__, __LINE__, 
"invalid part id",
 
  492     partToProcsMap(partId, procMin, procMax);
 
  496   void partToProc(
bool doCheck, 
bool haveNumLocalParts, 
bool haveNumGlobalParts,
 
  497     int numLocalParts, 
int numGlobalParts);
 
  499   void procToPartsMap(
int procId, 
double &numParts, 
part_t &partMin,
 
  502   void partToProcsMap(
part_t partId, 
int &procMin, 
int &procMax) 
const;
 
  504   void setPartDistribution();
 
  506   void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
 
  507     ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
 
  509   void computePartSizes(
int widx, ArrayView<part_t> ids,
 
  510     ArrayView<scalar_t> sizes);
 
  512   void broadcastPartSizes(
int widx);
 
  515   RCP<const Environment> env_;             
 
  516   const RCP<const Comm<int> > comm_;       
 
  519   RCP<std::vector<Zoltan2::coordinateModelPartBox> > partBoxes;
 
  524   scalar_t localFraction_; 
 
  556   bool             onePartPerProc_;   
 
  557   std::vector<int>      partDist_;      
 
  558   std::vector<part_t> procDist_;      
 
  559   bool procDistEquallySpread_;        
 
  595   ArrayRCP<bool> pSizeUniform_;
 
  596   ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
 
  597   ArrayRCP<ArrayRCP<scalar_t> > pSize_;
 
  602   ArrayRCP<part_t> parts_;      
 
  606   part_t nGlobalPartsSolution_; 
 
  612   ArrayRCP<int> procs_;       
 
  617   const RCP<Algorithm<Adapter> > algorithm_;  
 
  624 template <
typename Adapter>
 
  626     const RCP<const Environment> &env,
 
  627     const RCP<
const Comm<int> > &comm,
 
  630     : env_(env), comm_(comm),
 
  632       nGlobalParts_(0), nLocalParts_(0),
 
  633       localFraction_(0),  nWeightsPerObj_(),
 
  634       onePartPerProc_(false), partDist_(), procDist_(),
 
  635       procDistEquallySpread_(false),
 
  636       pSizeUniform_(), pCompactIndex_(), pSize_(),
 
  637       parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
 
  638       procs_(), algorithm_(algorithm)
 
  640   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  
 
  642   setPartDistribution();
 
  647   ArrayRCP<part_t> *noIds = 
new ArrayRCP<part_t> [nWeightsPerObj_];
 
  648   ArrayRCP<scalar_t> *noSizes = 
new ArrayRCP<scalar_t> [nWeightsPerObj_];
 
  649   ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_, 
true);
 
  650   ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_, 
true);
 
  652   setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
 
  654   env_->memory(
"After construction of solution");
 
  657 template <
typename Adapter>
 
  659     const RCP<const Environment> &env,
 
  660     const RCP<
const Comm<int> > &comm,
 
  662     ArrayView<ArrayRCP<part_t> > reqPartIds,
 
  663     ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
 
  665     : env_(env), comm_(comm),
 
  667       nGlobalParts_(0), nLocalParts_(0),
 
  668       localFraction_(0),  nWeightsPerObj_(),
 
  669       onePartPerProc_(false), partDist_(), procDist_(),
 
  670       procDistEquallySpread_(false),
 
  671       pSizeUniform_(), pCompactIndex_(), pSize_(),
 
  672       parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
 
  673       procs_(), algorithm_(algorithm)
 
  675   nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);  
 
  677   setPartDistribution();
 
  679   setPartSizes(reqPartIds, reqPartSizes);
 
  681   env_->memory(
"After construction of solution");
 
  684 template <
typename Adapter>
 
  689   const ParameterList &pl = env_->getParameters();
 
  690   size_t haveGlobalNumParts=0, haveLocalNumParts=0;
 
  691   int numLocal=0, numGlobal=0;
 
  693   const Teuchos::ParameterEntry *pe = pl.getEntryPtr(
"num_global_parts");
 
  696     haveGlobalNumParts = 1;
 
  697     nGlobalParts_ = 
part_t(pe->getValue(&nGlobalParts_));
 
  698     numGlobal = nGlobalParts_;
 
  701   pe = pl.getEntryPtr(
"num_local_parts");
 
  704     haveLocalNumParts = 1;
 
  705     nLocalParts_ = 
part_t(pe->getValue(&nLocalParts_));
 
  706     numLocal = nLocalParts_;
 
  712     partToProc(
true, haveLocalNumParts, haveGlobalNumParts,
 
  713       numLocal, numGlobal);
 
  717   int nprocs = comm_->getSize();
 
  718   int rank = comm_->getRank();
 
  720   if (onePartPerProc_){
 
  721     nGlobalParts_ = nprocs;
 
  724   else if (partDist_.size() > 0){   
 
  725     nGlobalParts_ = partDist_.size() - 1;
 
  726     int pstart = partDist_[0];
 
  727     for (
part_t i=1; i <= nGlobalParts_; i++){
 
  728       int pend = partDist_[i];
 
  729       if (rank >= pstart && rank < pend){
 
  730         int numOwners = pend - pstart;
 
  732         localFraction_ = 1.0 / numOwners;
 
  738   else if (procDist_.size() > 0){  
 
  739     nGlobalParts_ = procDist_[nprocs];
 
  740     nLocalParts_ = procDist_[rank+1] - procDist_[rank];
 
  743     throw std::logic_error(
"partToProc error");
 
  748 template <
typename Adapter>
 
  749   void PartitioningSolution<Adapter>::setPartSizes(
 
  750     ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
 
  752   int widx = nWeightsPerObj_;
 
  755   size_t *countBuf = 
new size_t [widx*2];
 
  756   ArrayRCP<size_t> counts(countBuf, 0, widx*2, 
true);
 
  758   fail = ((ids.size() != widx) || (sizes.size() != widx));
 
  760   for (
int w=0; !fail && w < widx; w++){
 
  761     counts[w] = ids[w].size();
 
  762     if (ids[w].size() != sizes[w].size()) fail=
true;
 
  765   env_->globalBugAssertion(__FILE__, __LINE__, 
"bad argument arrays", fail==0,
 
  770   ArrayRCP<scalar_t> *emptySizes= 
new ArrayRCP<scalar_t> [widx];
 
  771   pSize_ = arcp(emptySizes, 0, widx);
 
  773   ArrayRCP<unsigned char> *emptyIndices= 
new ArrayRCP<unsigned char> [widx];
 
  774   pCompactIndex_ = arcp(emptyIndices, 0, widx);
 
  776   bool *info = 
new bool [widx];
 
  777   pSizeUniform_ = arcp(info, 0, widx);
 
  778   for (
int w=0; w < widx; w++)
 
  779     pSizeUniform_[w] = 
true;
 
  781   if (nGlobalParts_ == 1){
 
  785   size_t *ptr1 = counts.getRawPtr();
 
  786   size_t *ptr2 = counts.getRawPtr() + widx;
 
  789     reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
 
  795   for (
int w=0; w < widx; w++)
 
  796     if (counts[widx+w] > 0){
 
  798       pSizeUniform_[w] = 
false;
 
  807   int nprocs = comm_->getSize();
 
  808   int rank = comm_->getRank();
 
  810   for (
int w=0; w < widx; w++){
 
  811     if (pSizeUniform_[w]) 
continue;
 
  816     part_t length = ids[w].size();
 
  818     Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
 
  823       for (
int i=0; i < nprocs; i++)
 
  824         total += allLength[i];
 
  827       scalar_t *partSizes = 
new scalar_t [total];
 
  829       ArrayView<part_t> idArray(partNums, total);
 
  830       ArrayView<scalar_t> sizeArray(partSizes, total);
 
  833         for (
int i=0; i < length; i++){
 
  834           *partNums++ = ids[w][i];
 
  835           *partSizes++ = sizes[w][i];
 
  839       for (
int p=1; p < nprocs; p++){
 
  840         if (allLength[p] > 0){
 
  841           Teuchos::receive<int, part_t>(*comm_, p,
 
  842             allLength[p], partNums);
 
  843           Teuchos::receive<int, scalar_t>(*comm_, p,
 
  844             allLength[p], partSizes);
 
  845           partNums += allLength[p];
 
  846           partSizes += allLength[p];
 
  853         computePartSizes(w, idArray, sizeArray);
 
  857       delete [] idArray.getRawPtr();
 
  858       delete [] sizeArray.getRawPtr();
 
  863         Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
 
  864         Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
 
  868     broadcastPartSizes(w);
 
  872 template <
typename Adapter>
 
  873   void PartitioningSolution<Adapter>::broadcastPartSizes(
int widx)
 
  875   env_->localBugAssertion(__FILE__, __LINE__, 
"preallocations",
 
  876     pSize_.size()>widx &&
 
  877     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
 
  880   int rank = comm_->getRank();
 
  881   int nprocs = comm_->getSize();
 
  882   part_t nparts = nGlobalParts_;
 
  890     if (pSizeUniform_[widx] == 
true)
 
  892     else if (pCompactIndex_[widx].size() > 0)
 
  899     Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
 
  905       pSizeUniform_[widx] = 
true;
 
  914     unsigned char *idxbuf = NULL;
 
  917       idxbuf = 
new unsigned char [nparts];
 
  918       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
 
  921       env_->localBugAssertion(__FILE__, __LINE__, 
"index list size",
 
  923       idxbuf = pCompactIndex_[widx].getRawPtr();
 
  928       Teuchos::broadcast<int, char>(*comm_, 0, nparts,
 
  929         reinterpret_cast<char *
>(idxbuf));
 
  934       pCompactIndex_[widx] = arcp(idxbuf, 0, nparts, 
true);
 
  938     unsigned char maxIdx=0;
 
  939     for (
part_t p=0; p < nparts; p++)
 
  940       if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
 
  942     int numSizes = maxIdx + 1;
 
  944     scalar_t *sizeList = NULL;
 
  947       sizeList = 
new scalar_t [numSizes];
 
  948       env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
 
  951       env_->localBugAssertion(__FILE__, __LINE__, 
"wrong number of sizes",
 
  954       sizeList = pSize_[widx].getRawPtr();
 
  958       Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
 
  963       pSize_[widx] = arcp(sizeList, 0, numSizes, 
true);
 
  972     scalar_t *sizeList = NULL;
 
  975       sizeList = 
new scalar_t [nparts];
 
  976       env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
 
  979       env_->localBugAssertion(__FILE__, __LINE__, 
"wrong number of sizes",
 
  982       sizeList = pSize_[widx].getRawPtr();
 
  986       Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
 
  991       pSize_[widx] = arcp(sizeList, 0, nparts);
 
  997 template <
typename Adapter>
 
  998   void PartitioningSolution<Adapter>::computePartSizes(
int widx,
 
  999     ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
 
 1001   int len = ids.size();
 
 1004     pSizeUniform_[widx] = 
true;
 
 1008   env_->localBugAssertion(__FILE__, __LINE__, 
"bad array sizes",
 
 1011   env_->localBugAssertion(__FILE__, __LINE__, 
"bad index",
 
 1014   env_->localBugAssertion(__FILE__, __LINE__, 
"preallocations",
 
 1015     pSize_.size()>widx &&
 
 1016     pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
 
 1022   part_t nparts = nGlobalParts_;
 
 1023   unsigned char *buf = 
new unsigned char [nparts];
 
 1024   env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
 
 1025   memset(buf, 0, nparts);
 
 1026   ArrayRCP<unsigned char> partIdx(buf, 0, nparts, 
true);
 
 1028   scalar_t 
epsilon = 10e-5 / nparts;
 
 1029   scalar_t min=sizes[0], max=sizes[0], sum=0;
 
 1031   for (
int i=0; i < len; i++){
 
 1033     scalar_t size = sizes[i];
 
 1035     env_->localInputAssertion(__FILE__, __LINE__, 
"invalid part id",
 
 1038     env_->localInputAssertion(__FILE__, __LINE__, 
"invalid part size", size>=0,
 
 1045     env_->localInputAssertion(__FILE__, __LINE__,
 
 1046       "multiple sizes provided for one part", partIdx[
id]==0, 
BASIC_ASSERTION);
 
 1050     if (size < min) min = size;
 
 1051     if (size > max) max = size;
 
 1060     scalar_t *allSizes = 
new scalar_t [2];
 
 1061     env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
 
 1063     ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2, 
true);
 
 1065     int numNonZero = nparts - len;
 
 1068     allSizes[1] = 1.0 / numNonZero;
 
 1070     for (
part_t p=0; p < nparts; p++)
 
 1073     for (
int i=0; i < len; i++)
 
 1076     pSize_[widx] = sizeArray;
 
 1077     pCompactIndex_[widx] = partIdx;
 
 1082   if (max - min <= epsilon){
 
 1083     pSizeUniform_[widx] = 
true;
 
 1088   scalar_t avg = sum / nparts;
 
 1095   scalar_t *tmp = 
new scalar_t [len];
 
 1096   env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
 
 1097   memcpy(tmp, sizes.getRawPtr(), 
sizeof(scalar_t) * len);
 
 1098   ArrayRCP<scalar_t> partSizes(tmp, 0, len, 
true);
 
 1100   std::sort(partSizes.begin(), partSizes.end());
 
 1104   Array<scalar_t> nextUniqueSize;
 
 1105   nextUniqueSize.push_back(partSizes[len-1]);   
 
 1106   scalar_t curr = partSizes[len-1];
 
 1108   bool haveAvg = 
false;
 
 1109   if (curr - avg <= epsilon)
 
 1112   for (
int i=len-2; i >= 0; i--){
 
 1113     scalar_t val = partSizes[i];
 
 1114     if (curr - val > epsilon){
 
 1115       nextUniqueSize.push_back(val);  
 
 1117       if (avgIndex==len && val > avg && val - avg <= epsilon){
 
 1119         avgIndex = nextUniqueSize.size() - 1;
 
 1127   size_t numSizes = nextUniqueSize.size();
 
 1128   int sizeArrayLen = numSizes;
 
 1135     if (!haveAvg) sizeArrayLen++;   
 
 1137     scalar_t *allSizes = 
new scalar_t [sizeArrayLen];
 
 1138     env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
 
 1139     ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen, 
true);
 
 1141     int newAvgIndex = sizeArrayLen;
 
 1143     for (
int i=numSizes-1, idx=0; i >= 0; i--){
 
 1145       if (newAvgIndex == sizeArrayLen){
 
 1147         if (haveAvg && i==avgIndex)
 
 1150         else if (!haveAvg && avg < nextUniqueSize[i]){
 
 1152           allSizes[idx++] = avg;
 
 1156       allSizes[idx++] = nextUniqueSize[i];
 
 1159     env_->localBugAssertion(__FILE__, __LINE__, 
"finding average in list",
 
 1162     for (
int i=0; i < nparts; i++){
 
 1163       buf[i] = newAvgIndex;   
 
 1166     sum = (nparts - len) * allSizes[newAvgIndex];
 
 1168     for (
int i=0; i < len; i++){
 
 1170       scalar_t size = sizes[i];
 
 1175       if (size < avg && avg - size <= epsilon)
 
 1176         index = newAvgIndex;
 
 1178         typename ArrayRCP<scalar_t>::iterator found =
 
 1179           std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
 
 1181         env_->localBugAssertion(__FILE__, __LINE__, 
"size array",
 
 1184         index = found - sizeArray.begin();
 
 1188       sum += allSizes[index];
 
 1191     for (
int i=0; i < sizeArrayLen; i++){
 
 1192       sizeArray[i] /= sum;
 
 1195     pCompactIndex_[widx] = partIdx;
 
 1196     pSize_[widx] = sizeArray;
 
 1202     tmp = 
new scalar_t [nparts];
 
 1203     env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
 
 1205     sum += ((nparts - len) * avg);
 
 1207     for (
int i=0; i < nparts; i++){
 
 1211     for (
int i=0; i < len; i++){
 
 1212       tmp[ids[i]] = sizes[i]/sum;
 
 1215     pSize_[widx] = arcp(tmp, 0, nparts);
 
 1219 template <
typename Adapter>
 
 1224   size_t len = partList.size();
 
 1233   part_t lMin = (len > 0 ? std::numeric_limits<part_t>::max() : 0);
 
 1236   for (
size_t i = 0; i < len; i++) {
 
 1237     if (partList[i] < lMin) lMin = partList[i];
 
 1238     if (partList[i] > lMax) lMax = partList[i];
 
 1240   Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
 
 1241   Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
 
 1243   nGlobalPartsSolution_ = gMax - gMin + 1;
 
 1248   if (!onePartPerProc_){
 
 1250     int *procs = 
new int [len];
 
 1251     env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
 
 1252     procs_ = arcp<int>(procs, 0, len);
 
 1255       part_t *parts = partList.getRawPtr();
 
 1257       if (procDist_.size() > 0){    
 
 1260         for (
size_t i=0; i < len; i++){
 
 1261           partToProcsMap(parts[i], procs[i], procId);
 
 1266         lno_t *partCounter = 
new lno_t [nGlobalPartsSolution_];
 
 1267         env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
 
 1270         int numProcs = comm_->getSize();
 
 1274         memset(partCounter, 0, 
sizeof(
lno_t) * nGlobalPartsSolution_);
 
 1276         for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
 
 1277           partCounter[parts[i]]++;
 
 1280         env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
 
 1283         int proc2 = partDist_[0];
 
 1285         for (
part_t part=1; part < nGlobalParts_; part++){
 
 1287           proc2 = partDist_[part+1];
 
 1288           int numprocs = proc2 - proc1;
 
 1290           double dNum = partCounter[part];
 
 1291           double dProcs = numprocs;
 
 1294           double each = floor(dNum/dProcs);
 
 1295           double extra = fmod(dNum,dProcs);
 
 1297           for (
int proc=proc1, i=0; proc<proc2; proc++, i++){
 
 1299               procCounter[proc] = 
lno_t(each) + 1;
 
 1301               procCounter[proc] = 
lno_t(each);
 
 1305         delete [] partCounter;
 
 1307         for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
 
 1308           if (partList[i] >= nGlobalParts_){
 
 1311             procs[i] = comm_->getRank();
 
 1314           part_t partNum = parts[i];
 
 1315           proc1 = partDist_[partNum];
 
 1316           proc2 = partDist_[partNum + 1];
 
 1319           for (proc=proc1; proc < proc2; proc++){
 
 1320             if (procCounter[proc] > 0){
 
 1322               procCounter[proc]--;
 
 1326           env_->localBugAssertion(__FILE__, __LINE__, 
"part to proc",
 
 1330         delete [] procCounter;
 
 1340   bool doRemap = 
false;
 
 1341   const Teuchos::ParameterEntry *pe =
 
 1342                  env_->getParameters().getEntryPtr(
"remap_parts");
 
 1343   if (pe) doRemap = pe->getValue(&doRemap);
 
 1344   if (doRemap) RemapParts();
 
 1346   haveSolution_ = 
true;
 
 1348   env_->memory(
"After Solution has processed algorithm's answer");
 
 1353 template <
typename Adapter>
 
 1355     double &numParts, 
part_t &partMin, 
part_t &partMax)
 const 
 1357   if (onePartPerProc_){
 
 1359     partMin = partMax = procId;
 
 1361   else if (procDist_.size() > 0){
 
 1362     partMin = procDist_[procId];
 
 1363     partMax = procDist_[procId+1] - 1;
 
 1364     numParts = procDist_[procId+1] - partMin;
 
 1369     std::vector<int>::const_iterator entry;
 
 1370     entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
 
 1372     size_t partIdx = entry - partDist_.begin();
 
 1373     int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
 
 1374     partMin = partMax = int(partIdx) - 1;
 
 1375     numParts = 1.0 / numProcs;
 
 1379 template <
typename Adapter>
 
 1380   void PartitioningSolution<Adapter>::partToProcsMap(
part_t partId,
 
 1381     int &procMin, 
int &procMax)
 const 
 1383   if (partId >= nGlobalParts_){
 
 1387     procMin = procMax = comm_->getRank();
 
 1389   else if (onePartPerProc_){
 
 1390     procMin = procMax = int(partId);
 
 1392   else if (procDist_.size() > 0){
 
 1393     if (procDistEquallySpread_) {
 
 1395       double fProcs = comm_->getSize();
 
 1396       double fParts = nGlobalParts_;
 
 1397       double each = fParts / fProcs;
 
 1398       procMin = int(partId / each);
 
 1399       while (procDist_[procMin] > partId) procMin--;
 
 1400       while (procDist_[procMin+1] <= partId) procMin++;
 
 1407       typename std::vector<part_t>::const_iterator entry;
 
 1408       entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
 
 1410       size_t procIdx = entry - procDist_.begin();
 
 1411       procMin = procMax = int(procIdx) - 1;
 
 1415     procMin = partDist_[partId];
 
 1416     procMax = partDist_[partId+1] - 1;
 
 1420 template <
typename Adapter>
 
 1422     int c1, 
int c2)
 const 
 1424   if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
 
 1425     throw std::logic_error(
"criteriaHaveSamePartSizes error");
 
 1427   bool theSame = 
false;
 
 1432   else if (pSizeUniform_[c1] == 
true && pSizeUniform_[c2] == 
true)
 
 1435   else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
 
 1437     bool useIndex = pCompactIndex_[c1].size() > 0;
 
 1439       for (
part_t p=0; theSame && p < nGlobalParts_; p++)
 
 1440         if (pSize_[c1][pCompactIndex_[c1][p]] !=
 
 1441             pSize_[c2][pCompactIndex_[c2][p]])
 
 1445       for (
part_t p=0; theSame && p < nGlobalParts_; p++)
 
 1446         if (pSize_[c1][p] != pSize_[c2][p])
 
 1469 template <
typename Adapter>
 
 1471     bool doCheck, 
bool haveNumLocalParts, 
bool haveNumGlobalParts,
 
 1472     int numLocalParts, 
int numGlobalParts)
 
 1475   typedef SSIZE_T ssize_t;
 
 1477   int nprocs = comm_->getSize();
 
 1478   ssize_t reducevals[4];
 
 1479   ssize_t sumHaveGlobal=0, sumHaveLocal=0;
 
 1480   ssize_t sumGlobal=0, sumLocal=0;
 
 1481   ssize_t maxGlobal=0, maxLocal=0;
 
 1482   ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
 
 1483                      numGlobalParts, numLocalParts};
 
 1491       reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
 
 1495     sumHaveGlobal = reducevals[0];
 
 1496     sumHaveLocal = reducevals[1];
 
 1497     sumGlobal = reducevals[2];
 
 1498     sumLocal = reducevals[3];
 
 1500     env_->localInputAssertion(__FILE__, __LINE__,
 
 1501       "Either all procs specify num_global/local_parts or none do",
 
 1502       (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
 
 1503       (sumHaveLocal == 0 || sumHaveLocal == nprocs),
 
 1507     if (haveNumLocalParts)
 
 1508       sumLocal = numLocalParts * nprocs;
 
 1509     if (haveNumGlobalParts)
 
 1510       sumGlobal = numGlobalParts * nprocs;
 
 1512     sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
 
 1513     sumHaveLocal = haveNumLocalParts ? nprocs : 0;
 
 1515     maxLocal = numLocalParts;
 
 1516     maxGlobal = numGlobalParts;
 
 1519   if (!haveNumLocalParts && !haveNumGlobalParts){
 
 1520     onePartPerProc_ = 
true;   
 
 1524   if (haveNumGlobalParts){
 
 1526       vals[0] = numGlobalParts;
 
 1527       vals[1] = numLocalParts;
 
 1529         reduceAll<int, ssize_t>(
 
 1530           *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
 
 1534       maxGlobal = reducevals[0];
 
 1535       maxLocal = reducevals[1];
 
 1537       env_->localInputAssertion(__FILE__, __LINE__,
 
 1538         "Value for num_global_parts is different on different processes.",
 
 1543       env_->localInputAssertion(__FILE__, __LINE__,
 
 1544         "Sum of num_local_parts does not equal requested num_global_parts",
 
 1547       if (sumLocal == nprocs && maxLocal == 1){
 
 1548         onePartPerProc_ = 
true;   
 
 1553       if (maxGlobal == nprocs){
 
 1554         onePartPerProc_ = 
true;   
 
 1562   if (sumHaveLocal == nprocs){
 
 1568       procDist_.resize(nprocs+1);
 
 1570     catch (std::exception &){
 
 1571       throw(std::bad_alloc());
 
 1574     part_t *procArray = &procDist_[0];
 
 1578       gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
 
 1584     for (
int proc=0; proc < nprocs; proc++)
 
 1585       procArray[proc+1] += procArray[proc];
 
 1591     double fParts = numGlobalParts;
 
 1592     double fProcs = nprocs;
 
 1594     if (fParts < fProcs){
 
 1597         partDist_.resize(
size_t(fParts+1));
 
 1599       catch (std::exception &){
 
 1600         throw(std::bad_alloc());
 
 1603       int *partArray = &partDist_[0];
 
 1605       double each = floor(fProcs / fParts);
 
 1606       double extra = fmod(fProcs, fParts);
 
 1609       for (
part_t part=0; part < numGlobalParts; part++){
 
 1610         int numOwners = int(each + ((part<extra) ? 1 : 0));
 
 1611         partArray[part+1] = partArray[part] + numOwners;
 
 1614       env_->globalBugAssertion(__FILE__, __LINE__, 
"#parts != #procs",
 
 1617     else if (fParts > fProcs){
 
 1622       procDistEquallySpread_ = 
true;
 
 1625         procDist_.resize(
size_t(fProcs+1));
 
 1627       catch (std::exception &){
 
 1628         throw(std::bad_alloc());
 
 1631       part_t *procArray = &procDist_[0];
 
 1633       double each = floor(fParts / fProcs);
 
 1634       double extra = fmod(fParts, fProcs);
 
 1637       for (
int proc=0; proc < nprocs; proc++){
 
 1638         part_t numParts = 
part_t(each + ((proc<extra) ? 1 : 0));
 
 1639         procArray[proc+1] = procArray[proc] + numParts;
 
 1642       env_->globalBugAssertion(__FILE__, __LINE__, 
"#parts != #procs",
 
 1646       env_->globalBugAssertion(__FILE__, __LINE__,
 
 1664 template <
typename Adapter>
 
 1667   size_t len = parts_.size();
 
 1669   part_t me = comm_->getRank();
 
 1670   int np = comm_->getSize();
 
 1672   if (np < nGlobalParts_) {
 
 1674       std::ostringstream msg; 
 
 1675       msg << 
"Remapping not yet supported for " 
 1676            << 
"num_global_parts " << nGlobalParts_
 
 1677            << 
" > num procs " << np << std::endl;
 
 1692   std::map<part_t, long> edges;
 
 1697   for (
size_t i = 0; i < len; i++) {
 
 1699     if (parts_[i] == me) lstaying++;    
 
 1702   Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
 
 1703                                 &lstaying, &gstaying);
 
 1708   int nedges = edges.size();
 
 1711   part_t tnVtx = np + nGlobalParts_;  
 
 1715     idx = 
new int[tnVtx+1];
 
 1716     sizes = 
new int[np];
 
 1720     Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
 
 1725     for (
int i = 0; i < np; i++)
 
 1726       idx[i+1] = idx[i] + sizes[i];
 
 1734     bufv = 
new part_t[nedges];
 
 1735     bufw = 
new long[nedges];
 
 1737     for (
typename std::map<part_t, long>::iterator it = edges.begin();
 
 1738          it != edges.end(); it++) {
 
 1739       bufv[cnt] = it->first;  
 
 1740       bufw[cnt] = it->second; 
 
 1751     adj = 
new part_t[idx[np]];
 
 1752     wgt = 
new long[idx[np]];
 
 1755   Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
 
 1756   Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
 
 1767     for (
int i = 0; i < idx[np]; i++) {
 
 1772     for (
part_t i = np; i < tnVtx; i++) {
 
 1777     std::cout << 
"IDX ";
 
 1778     for (
part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] << 
" ";
 
 1779     std::cout << std::endl;
 
 1781     std::cout << 
"ADJ ";
 
 1782     for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] << 
" ";
 
 1783     std::cout << std::endl;
 
 1785     std::cout << 
"WGT ";
 
 1786     for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] << 
" ";
 
 1787     std::cout << std::endl;
 
 1792     for (
part_t i = 0; i < tnVtx; i++) match[i] = i;
 
 1794              Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
 
 1797     std::cout << 
"After matching:  " << nmatches << 
" found" << std::endl;
 
 1798     for (
part_t i = 0; i < tnVtx; i++)
 
 1799       std::cout << 
"match[" << i << 
"] = " << match[i]
 
 1800            << ((match[i] != i &&
 
 1801                (i < np && match[i] != i+np))
 
 1807     bool nontrivial = 
false;
 
 1809       for (
part_t i = 0; i < np; i++) {
 
 1810         if ((match[i] != i) && (match[i] != (i+np))) {
 
 1819       remap = 
new part_t[nGlobalParts_];
 
 1820       for (
part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
 
 1822       bool *used = 
new bool[np];
 
 1823       for (
part_t i = 0; i < np; i++) used[i] = 
false;
 
 1826       for (
part_t i = 0; i < nGlobalParts_; i++) {
 
 1828         if (match[tmp] != tmp) {
 
 1829           remap[i] = match[tmp];
 
 1830           used[match[tmp]] = 
true;
 
 1835       for (
part_t i = 0; i < nGlobalParts_; i++) {
 
 1836         if (remap[i] > -1) 
continue;
 
 1844       for (
part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
 
 1845         if (remap[i] > -1) 
continue;
 
 1846         while (used[uidx]) uidx++;
 
 1855     std::cout << 
"Remap vector: ";
 
 1856     for (
part_t i = 0; i < nGlobalParts_; i++) std::cout << remap[i] << 
" ";
 
 1857     std::cout << std::endl;
 
 1860     long newgstaying = measure_stays(remap, idx, adj, wgt,
 
 1862     doRemap = (newgstaying > gstaying);
 
 1863     std::ostringstream msg;
 
 1864     msg << 
"gstaying " << gstaying << 
" measure(input) " 
 1865          << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
 
 1866          << 
" newgstaying " << newgstaying
 
 1867          << 
" nontrivial " << nontrivial
 
 1868          << 
" doRemap " << doRemap << std::endl;
 
 1876   Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
 
 1879     if (me != 0) remap = 
new part_t[nGlobalParts_];
 
 1880     Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
 
 1881     for (
size_t i = 0; i < len; i++) {
 
 1882       parts_[i] = remap[parts_[i]];
 
const int * getProcListView() const 
Returns the process list corresponding to the global ID list. 
scalar_t getCriteriaPartSize(int idx, part_t part) const 
Get the size for a given weight index and a given part. 
Defines the Solution base class. 
fast typical checks for valid arguments 
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack. 
part_t pointAssign(int dim, scalar_t *point) const 
Return the part overlapping a given point in space;. 
void RemapParts()
Remap a new partition for maximum overlap with an input partition. 
Just a placeholder for now. 
more involved, like validate a graph 
map_t::global_ordinal_type gno_t
size_t getLocalNumberOfParts() const 
Returns the number of parts to be assigned to this process. 
virtual bool isPartitioningTreeBinary() const 
calculate if partition tree is binary. 
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const 
Get the parts belonging to a process. 
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const 
Get the processes containing a part. 
sub-steps, each method's entry and exit 
bool oneToOnePartDistribution() const 
Is the part-to-process distribution is one-to-one. 
SparseMatrixAdapter_t::part_t part_t
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
scalar_t getLocalFractionOfPart() const 
If parts are divided across processes, return the fraction of a part on this process. 
A PartitioningSolution is a solution to a partitioning problem. 
const RCP< const Comm< int > > & getCommunicator() const 
Return the communicator associated with the solution. 
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const 
Return an array of all parts overlapping a given box in space. 
const part_t * getPartListView() const 
Returns the part list corresponding to the global ID list. 
PartitioningSolution(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied. 
Algorithm defines the base class for all algorithms. 
map_t::local_ordinal_type lno_t
void getPartitionTree(part_t &numTreeVerts, std::vector< part_t > &permPartNums, std::vector< part_t > &splitRangeBeg, std::vector< part_t > &splitRangeEnd, std::vector< part_t > &treeVertParents) const 
get the partition tree - fill the relevant arrays 
Greedy Maximal Weight Matching. 
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution. 
static const std::string fail
std::vector< Zoltan2::coordinateModelPartBox > & getPartBoxesView() const 
returns the part box boundary list. 
Defines the Environment class. 
const part_t * getProcDistribution() const 
Return a distribution by process. 
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const 
returns communication graph resulting from geometric partitioning. 
int getNumberOfCriteria() const 
Get the number of criteria (object weights) 
const int * getPartDistribution() const 
Return a distribution by part. 
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library. 
size_t getActualGlobalNumberOfParts() const 
Returns the actual global number of parts provided in setParts(). 
bool criteriaHasUniformPartSizes(int idx) const 
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes...
bool criteriaHaveSamePartSizes(int c1, int c2) const 
Return true if the two weight indices have the same part size information. 
size_t getTargetGlobalNumberOfParts() const 
Returns the global number of parts desired in the solution. 
const RCP< const Environment > & getEnvironment() const 
Return the environment associated with the solution. 
Zoltan2::BasicUserTypes< zscalar_t, zlno_t, zgno_t > user_t