14 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
15 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
18 template <
typename Adapter>
55 template <
typename Adapter>
60 #ifndef DOXYGEN_SHOULD_SKIP_THIS
62 typedef typename Adapter::scalar_t scalar_t;
86 const RCP<
const Comm<int> > &comm,
120 const RCP<
const Comm<int> > &comm,
121 int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
122 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
180 if (partDist_.size() > 0)
return &partDist_[0];
201 if (procDist_.size() > 0)
return &procDist_[0];
232 if (pSizeUniform_[idx])
233 return 1.0 / nGlobalParts_;
234 else if (pCompactIndex_[idx].size())
235 return pSize_[idx][pCompactIndex_[idx][part]];
237 return pSize_[idx][part];
284 void setParts(ArrayRCP<part_t> &partList);
309 for (
part_t i = 0; i < nrhs; i++) {
310 part_t k = (remap ? remap[i] : i);
311 for (
part_t j = idx[k]; j < idx[k+1]; j++) {
312 if (i == (adj[j]-nlhs)) {
338 if (parts_.size() > 0)
return parts_.getRawPtr();
347 if (procs_.size() > 0)
return procs_.getRawPtr();
355 if (this->algorithm_ == Teuchos::null)
356 throw std::logic_error(
"no partitioning algorithm has been run yet");
357 return this->algorithm_->isPartitioningTreeBinary();
363 std::vector<part_t> & permPartNums,
364 std::vector<part_t> & splitRangeBeg,
365 std::vector<part_t> & splitRangeEnd,
366 std::vector<part_t> & treeVertParents)
const {
370 if (this->algorithm_ == Teuchos::null)
371 throw std::logic_error(
"no partitioning algorithm has been run yet");
372 this->algorithm_->getPartitionTree(
383 std::vector<Zoltan2::coordinateModelPartBox> &
386 return this->algorithm_->getPartBoxesView();
402 if (this->algorithm_ == Teuchos::null)
403 throw std::logic_error(
"no partitioning algorithm has been run yet");
405 p = this->algorithm_->pointAssign(dim, point);
423 void boxAssign(
int dim, scalar_t *lower, scalar_t *upper,
424 size_t &nPartsFound,
part_t **partsFound)
const
427 if (this->algorithm_ == Teuchos::null)
428 throw std::logic_error(
"no partitioning algorithm has been run yet");
430 this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound);
439 ArrayRCP <part_t> &comAdj)
const
442 if (this->algorithm_ == Teuchos::null)
443 throw std::logic_error(
"no partitioning algorithm has been run yet");
445 this->algorithm_->getCommunicationGraph(
this, comXAdj, comAdj);
470 env_->localInputAssertion(__FILE__, __LINE__,
"invalid process id",
473 procToPartsMap(procId, numParts, partMin, partMax);
489 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
492 partToProcsMap(partId, procMin, procMax);
496 void partToProc(
bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
497 int numLocalParts,
int numGlobalParts);
499 void procToPartsMap(
int procId,
double &numParts,
part_t &partMin,
502 void partToProcsMap(
part_t partId,
int &procMin,
int &procMax)
const;
504 void setPartDistribution();
506 void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
507 ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
509 void computePartSizes(
int widx, ArrayView<part_t> ids,
510 ArrayView<scalar_t> sizes);
512 void broadcastPartSizes(
int widx);
515 RCP<const Environment> env_;
516 const RCP<const Comm<int> > comm_;
519 RCP<std::vector<Zoltan2::coordinateModelPartBox> > partBoxes;
524 scalar_t localFraction_;
556 bool onePartPerProc_;
557 std::vector<int> partDist_;
558 std::vector<part_t> procDist_;
559 bool procDistEquallySpread_;
595 ArrayRCP<bool> pSizeUniform_;
596 ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
597 ArrayRCP<ArrayRCP<scalar_t> > pSize_;
602 ArrayRCP<part_t> parts_;
606 part_t nGlobalPartsSolution_;
612 ArrayRCP<int> procs_;
617 const RCP<Algorithm<Adapter> > algorithm_;
624 template <
typename Adapter>
626 const RCP<const Environment> &env,
627 const RCP<
const Comm<int> > &comm,
630 : env_(env), comm_(comm),
632 nGlobalParts_(0), nLocalParts_(0),
633 localFraction_(0), nWeightsPerObj_(),
634 onePartPerProc_(false), partDist_(), procDist_(),
635 procDistEquallySpread_(false),
636 pSizeUniform_(), pCompactIndex_(), pSize_(),
637 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
638 procs_(), algorithm_(algorithm)
640 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
642 setPartDistribution();
647 ArrayRCP<part_t> *noIds =
new ArrayRCP<part_t> [nWeightsPerObj_];
648 ArrayRCP<scalar_t> *noSizes =
new ArrayRCP<scalar_t> [nWeightsPerObj_];
649 ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_,
true);
650 ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_,
true);
652 setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
654 env_->memory(
"After construction of solution");
657 template <
typename Adapter>
659 const RCP<const Environment> &env,
660 const RCP<
const Comm<int> > &comm,
662 ArrayView<ArrayRCP<part_t> > reqPartIds,
663 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
665 : env_(env), comm_(comm),
667 nGlobalParts_(0), nLocalParts_(0),
668 localFraction_(0), nWeightsPerObj_(),
669 onePartPerProc_(false), partDist_(), procDist_(),
670 procDistEquallySpread_(false),
671 pSizeUniform_(), pCompactIndex_(), pSize_(),
672 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
673 procs_(), algorithm_(algorithm)
675 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
677 setPartDistribution();
679 setPartSizes(reqPartIds, reqPartSizes);
681 env_->memory(
"After construction of solution");
684 template <
typename Adapter>
689 const ParameterList &pl = env_->getParameters();
690 size_t haveGlobalNumParts=0, haveLocalNumParts=0;
691 int numLocal=0, numGlobal=0;
693 const Teuchos::ParameterEntry *pe = pl.getEntryPtr(
"num_global_parts");
696 haveGlobalNumParts = 1;
697 nGlobalParts_ =
part_t(pe->getValue(&nGlobalParts_));
698 numGlobal = nGlobalParts_;
701 pe = pl.getEntryPtr(
"num_local_parts");
704 haveLocalNumParts = 1;
705 nLocalParts_ =
part_t(pe->getValue(&nLocalParts_));
706 numLocal = nLocalParts_;
712 partToProc(
true, haveLocalNumParts, haveGlobalNumParts,
713 numLocal, numGlobal);
717 int nprocs = comm_->getSize();
718 int rank = comm_->getRank();
720 if (onePartPerProc_){
721 nGlobalParts_ = nprocs;
724 else if (partDist_.size() > 0){
725 nGlobalParts_ = partDist_.size() - 1;
726 int pstart = partDist_[0];
727 for (
part_t i=1; i <= nGlobalParts_; i++){
728 int pend = partDist_[i];
729 if (rank >= pstart && rank < pend){
730 int numOwners = pend - pstart;
732 localFraction_ = 1.0 / numOwners;
738 else if (procDist_.size() > 0){
739 nGlobalParts_ = procDist_[nprocs];
740 nLocalParts_ = procDist_[rank+1] - procDist_[rank];
743 throw std::logic_error(
"partToProc error");
748 template <
typename Adapter>
749 void PartitioningSolution<Adapter>::setPartSizes(
750 ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
752 int widx = nWeightsPerObj_;
755 size_t *countBuf =
new size_t [widx*2];
756 ArrayRCP<size_t> counts(countBuf, 0, widx*2,
true);
758 fail = ((ids.size() != widx) || (sizes.size() != widx));
760 for (
int w=0; !fail && w < widx; w++){
761 counts[w] = ids[w].size();
762 if (ids[w].size() != sizes[w].size()) fail=
true;
765 env_->globalBugAssertion(__FILE__, __LINE__,
"bad argument arrays", fail==0,
770 ArrayRCP<scalar_t> *emptySizes=
new ArrayRCP<scalar_t> [widx];
771 pSize_ = arcp(emptySizes, 0, widx);
773 ArrayRCP<unsigned char> *emptyIndices=
new ArrayRCP<unsigned char> [widx];
774 pCompactIndex_ = arcp(emptyIndices, 0, widx);
776 bool *info =
new bool [widx];
777 pSizeUniform_ = arcp(info, 0, widx);
778 for (
int w=0; w < widx; w++)
779 pSizeUniform_[w] =
true;
781 if (nGlobalParts_ == 1){
785 size_t *ptr1 = counts.getRawPtr();
786 size_t *ptr2 = counts.getRawPtr() + widx;
789 reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
795 for (
int w=0; w < widx; w++)
796 if (counts[widx+w] > 0){
798 pSizeUniform_[w] =
false;
807 int nprocs = comm_->getSize();
808 int rank = comm_->getRank();
810 for (
int w=0; w < widx; w++){
811 if (pSizeUniform_[w])
continue;
816 part_t length = ids[w].size();
818 Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
823 for (
int i=0; i < nprocs; i++)
824 total += allLength[i];
827 scalar_t *partSizes =
new scalar_t [total];
829 ArrayView<part_t> idArray(partNums, total);
830 ArrayView<scalar_t> sizeArray(partSizes, total);
833 for (
int i=0; i < length; i++){
834 *partNums++ = ids[w][i];
835 *partSizes++ = sizes[w][i];
839 for (
int p=1; p < nprocs; p++){
840 if (allLength[p] > 0){
841 Teuchos::receive<int, part_t>(*comm_, p,
842 allLength[p], partNums);
843 Teuchos::receive<int, scalar_t>(*comm_, p,
844 allLength[p], partSizes);
845 partNums += allLength[p];
846 partSizes += allLength[p];
853 computePartSizes(w, idArray, sizeArray);
857 delete [] idArray.getRawPtr();
858 delete [] sizeArray.getRawPtr();
863 Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
864 Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
868 broadcastPartSizes(w);
872 template <
typename Adapter>
873 void PartitioningSolution<Adapter>::broadcastPartSizes(
int widx)
875 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
876 pSize_.size()>widx &&
877 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
880 int rank = comm_->getRank();
881 int nprocs = comm_->getSize();
882 part_t nparts = nGlobalParts_;
890 if (pSizeUniform_[widx] ==
true)
892 else if (pCompactIndex_[widx].size() > 0)
899 Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
905 pSizeUniform_[widx] =
true;
914 unsigned char *idxbuf = NULL;
917 idxbuf =
new unsigned char [nparts];
918 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
921 env_->localBugAssertion(__FILE__, __LINE__,
"index list size",
923 idxbuf = pCompactIndex_[widx].getRawPtr();
928 Teuchos::broadcast<int, char>(*comm_, 0, nparts,
929 reinterpret_cast<char *
>(idxbuf));
934 pCompactIndex_[widx] = arcp(idxbuf, 0, nparts,
true);
938 unsigned char maxIdx=0;
939 for (
part_t p=0; p < nparts; p++)
940 if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
942 int numSizes = maxIdx + 1;
944 scalar_t *sizeList = NULL;
947 sizeList =
new scalar_t [numSizes];
948 env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
951 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
954 sizeList = pSize_[widx].getRawPtr();
958 Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
963 pSize_[widx] = arcp(sizeList, 0, numSizes,
true);
972 scalar_t *sizeList = NULL;
975 sizeList =
new scalar_t [nparts];
976 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
979 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
982 sizeList = pSize_[widx].getRawPtr();
986 Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
991 pSize_[widx] = arcp(sizeList, 0, nparts);
997 template <
typename Adapter>
998 void PartitioningSolution<Adapter>::computePartSizes(
int widx,
999 ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
1001 int len = ids.size();
1004 pSizeUniform_[widx] =
true;
1008 env_->localBugAssertion(__FILE__, __LINE__,
"bad array sizes",
1011 env_->localBugAssertion(__FILE__, __LINE__,
"bad index",
1014 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
1015 pSize_.size()>widx &&
1016 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
1022 part_t nparts = nGlobalParts_;
1023 unsigned char *buf =
new unsigned char [nparts];
1024 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
1025 memset(buf, 0, nparts);
1026 ArrayRCP<unsigned char> partIdx(buf, 0, nparts,
true);
1028 scalar_t
epsilon = 10e-5 / nparts;
1029 scalar_t min=sizes[0], max=sizes[0], sum=0;
1031 for (
int i=0; i < len; i++){
1033 scalar_t size = sizes[i];
1035 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
1038 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part size", size>=0,
1045 env_->localInputAssertion(__FILE__, __LINE__,
1046 "multiple sizes provided for one part", partIdx[
id]==0,
BASIC_ASSERTION);
1050 if (size < min) min = size;
1051 if (size > max) max = size;
1060 scalar_t *allSizes =
new scalar_t [2];
1061 env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
1063 ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2,
true);
1065 int numNonZero = nparts - len;
1068 allSizes[1] = 1.0 / numNonZero;
1070 for (
part_t p=0; p < nparts; p++)
1073 for (
int i=0; i < len; i++)
1076 pSize_[widx] = sizeArray;
1077 pCompactIndex_[widx] = partIdx;
1082 if (max - min <= epsilon){
1083 pSizeUniform_[widx] =
true;
1088 scalar_t avg = sum / nparts;
1095 scalar_t *tmp =
new scalar_t [len];
1096 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
1097 memcpy(tmp, sizes.getRawPtr(),
sizeof(scalar_t) * len);
1098 ArrayRCP<scalar_t> partSizes(tmp, 0, len,
true);
1100 std::sort(partSizes.begin(), partSizes.end());
1104 Array<scalar_t> nextUniqueSize;
1105 nextUniqueSize.push_back(partSizes[len-1]);
1106 scalar_t curr = partSizes[len-1];
1108 bool haveAvg =
false;
1109 if (curr - avg <= epsilon)
1112 for (
int i=len-2; i >= 0; i--){
1113 scalar_t val = partSizes[i];
1114 if (curr - val > epsilon){
1115 nextUniqueSize.push_back(val);
1117 if (avgIndex==len && val > avg && val - avg <= epsilon){
1119 avgIndex = nextUniqueSize.size() - 1;
1127 size_t numSizes = nextUniqueSize.size();
1128 int sizeArrayLen = numSizes;
1135 if (!haveAvg) sizeArrayLen++;
1137 scalar_t *allSizes =
new scalar_t [sizeArrayLen];
1138 env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
1139 ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen,
true);
1141 int newAvgIndex = sizeArrayLen;
1143 for (
int i=numSizes-1, idx=0; i >= 0; i--){
1145 if (newAvgIndex == sizeArrayLen){
1147 if (haveAvg && i==avgIndex)
1150 else if (!haveAvg && avg < nextUniqueSize[i]){
1152 allSizes[idx++] = avg;
1156 allSizes[idx++] = nextUniqueSize[i];
1159 env_->localBugAssertion(__FILE__, __LINE__,
"finding average in list",
1162 for (
int i=0; i < nparts; i++){
1163 buf[i] = newAvgIndex;
1166 sum = (nparts - len) * allSizes[newAvgIndex];
1168 for (
int i=0; i < len; i++){
1170 scalar_t size = sizes[i];
1175 if (size < avg && avg - size <= epsilon)
1176 index = newAvgIndex;
1178 typename ArrayRCP<scalar_t>::iterator found =
1179 std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
1181 env_->localBugAssertion(__FILE__, __LINE__,
"size array",
1184 index = found - sizeArray.begin();
1188 sum += allSizes[index];
1191 for (
int i=0; i < sizeArrayLen; i++){
1192 sizeArray[i] /= sum;
1195 pCompactIndex_[widx] = partIdx;
1196 pSize_[widx] = sizeArray;
1202 tmp =
new scalar_t [nparts];
1203 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
1205 sum += ((nparts - len) * avg);
1207 for (
int i=0; i < nparts; i++){
1211 for (
int i=0; i < len; i++){
1212 tmp[ids[i]] = sizes[i]/sum;
1215 pSize_[widx] = arcp(tmp, 0, nparts);
1219 template <
typename Adapter>
1224 size_t len = partList.size();
1233 part_t lMin = (len > 0 ? std::numeric_limits<part_t>::max() : 0);
1236 for (
size_t i = 0; i < len; i++) {
1237 if (partList[i] < lMin) lMin = partList[i];
1238 if (partList[i] > lMax) lMax = partList[i];
1240 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
1241 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
1243 nGlobalPartsSolution_ = gMax - gMin + 1;
1248 if (!onePartPerProc_){
1250 int *procs =
new int [len];
1251 env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
1252 procs_ = arcp<int>(procs, 0, len);
1255 part_t *parts = partList.getRawPtr();
1257 if (procDist_.size() > 0){
1260 for (
size_t i=0; i < len; i++){
1261 partToProcsMap(parts[i], procs[i], procId);
1266 lno_t *partCounter =
new lno_t [nGlobalPartsSolution_];
1267 env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
1270 int numProcs = comm_->getSize();
1274 memset(partCounter, 0,
sizeof(
lno_t) * nGlobalPartsSolution_);
1276 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
1277 partCounter[parts[i]]++;
1280 env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
1283 int proc2 = partDist_[0];
1285 for (
part_t part=1; part < nGlobalParts_; part++){
1287 proc2 = partDist_[part+1];
1288 int numprocs = proc2 - proc1;
1290 double dNum = partCounter[part];
1291 double dProcs = numprocs;
1294 double each = floor(dNum/dProcs);
1295 double extra = fmod(dNum,dProcs);
1297 for (
int proc=proc1, i=0; proc<proc2; proc++, i++){
1299 procCounter[proc] =
lno_t(each) + 1;
1301 procCounter[proc] =
lno_t(each);
1305 delete [] partCounter;
1307 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
1308 if (partList[i] >= nGlobalParts_){
1311 procs[i] = comm_->getRank();
1314 part_t partNum = parts[i];
1315 proc1 = partDist_[partNum];
1316 proc2 = partDist_[partNum + 1];
1319 for (proc=proc1; proc < proc2; proc++){
1320 if (procCounter[proc] > 0){
1322 procCounter[proc]--;
1326 env_->localBugAssertion(__FILE__, __LINE__,
"part to proc",
1330 delete [] procCounter;
1340 bool doRemap =
false;
1341 const Teuchos::ParameterEntry *pe =
1342 env_->getParameters().getEntryPtr(
"remap_parts");
1343 if (pe) doRemap = pe->getValue(&doRemap);
1344 if (doRemap) RemapParts();
1346 haveSolution_ =
true;
1348 env_->memory(
"After Solution has processed algorithm's answer");
1353 template <
typename Adapter>
1355 double &numParts,
part_t &partMin,
part_t &partMax)
const
1357 if (onePartPerProc_){
1359 partMin = partMax = procId;
1361 else if (procDist_.size() > 0){
1362 partMin = procDist_[procId];
1363 partMax = procDist_[procId+1] - 1;
1364 numParts = procDist_[procId+1] - partMin;
1369 std::vector<int>::const_iterator entry;
1370 entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
1372 size_t partIdx = entry - partDist_.begin();
1373 int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
1374 partMin = partMax = int(partIdx) - 1;
1375 numParts = 1.0 / numProcs;
1379 template <
typename Adapter>
1380 void PartitioningSolution<Adapter>::partToProcsMap(
part_t partId,
1381 int &procMin,
int &procMax)
const
1383 if (partId >= nGlobalParts_){
1387 procMin = procMax = comm_->getRank();
1389 else if (onePartPerProc_){
1390 procMin = procMax = int(partId);
1392 else if (procDist_.size() > 0){
1393 if (procDistEquallySpread_) {
1395 double fProcs = comm_->getSize();
1396 double fParts = nGlobalParts_;
1397 double each = fParts / fProcs;
1398 procMin = int(partId / each);
1399 while (procDist_[procMin] > partId) procMin--;
1400 while (procDist_[procMin+1] <= partId) procMin++;
1407 typename std::vector<part_t>::const_iterator entry;
1408 entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
1410 size_t procIdx = entry - procDist_.begin();
1411 procMin = procMax = int(procIdx) - 1;
1415 procMin = partDist_[partId];
1416 procMax = partDist_[partId+1] - 1;
1420 template <
typename Adapter>
1422 int c1,
int c2)
const
1424 if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
1425 throw std::logic_error(
"criteriaHaveSamePartSizes error");
1427 bool theSame =
false;
1432 else if (pSizeUniform_[c1] ==
true && pSizeUniform_[c2] ==
true)
1435 else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
1437 bool useIndex = pCompactIndex_[c1].size() > 0;
1439 for (
part_t p=0; theSame && p < nGlobalParts_; p++)
1440 if (pSize_[c1][pCompactIndex_[c1][p]] !=
1441 pSize_[c2][pCompactIndex_[c2][p]])
1445 for (
part_t p=0; theSame && p < nGlobalParts_; p++)
1446 if (pSize_[c1][p] != pSize_[c2][p])
1469 template <
typename Adapter>
1471 bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
1472 int numLocalParts,
int numGlobalParts)
1475 typedef SSIZE_T ssize_t;
1477 int nprocs = comm_->getSize();
1478 ssize_t reducevals[4];
1479 ssize_t sumHaveGlobal=0, sumHaveLocal=0;
1480 ssize_t sumGlobal=0, sumLocal=0;
1481 ssize_t maxGlobal=0, maxLocal=0;
1482 ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
1483 numGlobalParts, numLocalParts};
1491 reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
1495 sumHaveGlobal = reducevals[0];
1496 sumHaveLocal = reducevals[1];
1497 sumGlobal = reducevals[2];
1498 sumLocal = reducevals[3];
1500 env_->localInputAssertion(__FILE__, __LINE__,
1501 "Either all procs specify num_global/local_parts or none do",
1502 (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
1503 (sumHaveLocal == 0 || sumHaveLocal == nprocs),
1507 if (haveNumLocalParts)
1508 sumLocal = numLocalParts * nprocs;
1509 if (haveNumGlobalParts)
1510 sumGlobal = numGlobalParts * nprocs;
1512 sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
1513 sumHaveLocal = haveNumLocalParts ? nprocs : 0;
1515 maxLocal = numLocalParts;
1516 maxGlobal = numGlobalParts;
1519 if (!haveNumLocalParts && !haveNumGlobalParts){
1520 onePartPerProc_ =
true;
1524 if (haveNumGlobalParts){
1526 vals[0] = numGlobalParts;
1527 vals[1] = numLocalParts;
1529 reduceAll<int, ssize_t>(
1530 *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
1534 maxGlobal = reducevals[0];
1535 maxLocal = reducevals[1];
1537 env_->localInputAssertion(__FILE__, __LINE__,
1538 "Value for num_global_parts is different on different processes.",
1543 env_->localInputAssertion(__FILE__, __LINE__,
1544 "Sum of num_local_parts does not equal requested num_global_parts",
1547 if (sumLocal == nprocs && maxLocal == 1){
1548 onePartPerProc_ =
true;
1553 if (maxGlobal == nprocs){
1554 onePartPerProc_ =
true;
1562 if (sumHaveLocal == nprocs){
1568 procDist_.resize(nprocs+1);
1570 catch (std::exception &){
1571 throw(std::bad_alloc());
1574 part_t *procArray = &procDist_[0];
1578 gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
1584 for (
int proc=0; proc < nprocs; proc++)
1585 procArray[proc+1] += procArray[proc];
1591 double fParts = numGlobalParts;
1592 double fProcs = nprocs;
1594 if (fParts < fProcs){
1597 partDist_.resize(
size_t(fParts+1));
1599 catch (std::exception &){
1600 throw(std::bad_alloc());
1603 int *partArray = &partDist_[0];
1605 double each = floor(fProcs / fParts);
1606 double extra = fmod(fProcs, fParts);
1609 for (
part_t part=0; part < numGlobalParts; part++){
1610 int numOwners = int(each + ((part<extra) ? 1 : 0));
1611 partArray[part+1] = partArray[part] + numOwners;
1614 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1617 else if (fParts > fProcs){
1622 procDistEquallySpread_ =
true;
1625 procDist_.resize(
size_t(fProcs+1));
1627 catch (std::exception &){
1628 throw(std::bad_alloc());
1631 part_t *procArray = &procDist_[0];
1633 double each = floor(fParts / fProcs);
1634 double extra = fmod(fParts, fProcs);
1637 for (
int proc=0; proc < nprocs; proc++){
1638 part_t numParts =
part_t(each + ((proc<extra) ? 1 : 0));
1639 procArray[proc+1] = procArray[proc] + numParts;
1642 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1646 env_->globalBugAssertion(__FILE__, __LINE__,
1664 template <
typename Adapter>
1667 size_t len = parts_.size();
1669 part_t me = comm_->getRank();
1670 int np = comm_->getSize();
1672 if (np < nGlobalParts_) {
1674 std::ostringstream msg;
1675 msg <<
"Remapping not yet supported for "
1676 <<
"num_global_parts " << nGlobalParts_
1677 <<
" > num procs " << np << std::endl;
1692 std::map<part_t, long> edges;
1697 for (
size_t i = 0; i < len; i++) {
1699 if (parts_[i] == me) lstaying++;
1702 Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
1703 &lstaying, &gstaying);
1708 int nedges = edges.size();
1711 part_t tnVtx = np + nGlobalParts_;
1715 idx =
new int[tnVtx+1];
1716 sizes =
new int[np];
1720 Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
1725 for (
int i = 0; i < np; i++)
1726 idx[i+1] = idx[i] + sizes[i];
1734 bufv =
new part_t[nedges];
1735 bufw =
new long[nedges];
1737 for (
typename std::map<part_t, long>::iterator it = edges.begin();
1738 it != edges.end(); it++) {
1739 bufv[cnt] = it->first;
1740 bufw[cnt] = it->second;
1751 adj =
new part_t[idx[np]];
1752 wgt =
new long[idx[np]];
1755 Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
1756 Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
1767 for (
int i = 0; i < idx[np]; i++) {
1772 for (
part_t i = np; i < tnVtx; i++) {
1777 std::cout <<
"IDX ";
1778 for (
part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] <<
" ";
1779 std::cout << std::endl;
1781 std::cout <<
"ADJ ";
1782 for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] <<
" ";
1783 std::cout << std::endl;
1785 std::cout <<
"WGT ";
1786 for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] <<
" ";
1787 std::cout << std::endl;
1792 for (
part_t i = 0; i < tnVtx; i++) match[i] = i;
1794 Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
1797 std::cout <<
"After matching: " << nmatches <<
" found" << std::endl;
1798 for (
part_t i = 0; i < tnVtx; i++)
1799 std::cout <<
"match[" << i <<
"] = " << match[i]
1800 << ((match[i] != i &&
1801 (i < np && match[i] != i+np))
1807 bool nontrivial =
false;
1809 for (
part_t i = 0; i < np; i++) {
1810 if ((match[i] != i) && (match[i] != (i+np))) {
1819 remap =
new part_t[nGlobalParts_];
1820 for (
part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
1822 bool *used =
new bool[np];
1823 for (
part_t i = 0; i < np; i++) used[i] =
false;
1826 for (
part_t i = 0; i < nGlobalParts_; i++) {
1828 if (match[tmp] != tmp) {
1829 remap[i] = match[tmp];
1830 used[match[tmp]] =
true;
1835 for (
part_t i = 0; i < nGlobalParts_; i++) {
1836 if (remap[i] > -1)
continue;
1844 for (
part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
1845 if (remap[i] > -1)
continue;
1846 while (used[uidx]) uidx++;
1855 std::cout <<
"Remap vector: ";
1856 for (
part_t i = 0; i < nGlobalParts_; i++) std::cout << remap[i] <<
" ";
1857 std::cout << std::endl;
1860 long newgstaying = measure_stays(remap, idx, adj, wgt,
1862 doRemap = (newgstaying > gstaying);
1863 std::ostringstream msg;
1864 msg <<
"gstaying " << gstaying <<
" measure(input) "
1865 << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
1866 <<
" newgstaying " << newgstaying
1867 <<
" nontrivial " << nontrivial
1868 <<
" doRemap " << doRemap << std::endl;
1876 Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
1879 if (me != 0) remap =
new part_t[nGlobalParts_];
1880 Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
1881 for (
size_t i = 0; i < len; i++) {
1882 parts_[i] = remap[parts_[i]];
const int * getProcListView() const
Returns the process list corresponding to the global ID list.
scalar_t getCriteriaPartSize(int idx, part_t part) const
Get the size for a given weight index and a given part.
Defines the Solution base class.
fast typical checks for valid arguments
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack.
part_t pointAssign(int dim, scalar_t *point) const
Return the part overlapping a given point in space;.
void RemapParts()
Remap a new partition for maximum overlap with an input partition.
Just a placeholder for now.
more involved, like validate a graph
map_t::global_ordinal_type gno_t
size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
virtual bool isPartitioningTreeBinary() const
calculate if partition tree is binary.
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const
Get the parts belonging to a process.
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
Get the processes containing a part.
sub-steps, each method's entry and exit
bool oneToOnePartDistribution() const
Is the part-to-process distribution is one-to-one.
SparseMatrixAdapter_t::part_t part_t
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
scalar_t getLocalFractionOfPart() const
If parts are divided across processes, return the fraction of a part on this process.
A PartitioningSolution is a solution to a partitioning problem.
const RCP< const Comm< int > > & getCommunicator() const
Return the communicator associated with the solution.
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const
Return an array of all parts overlapping a given box in space.
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
PartitioningSolution(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied.
Algorithm defines the base class for all algorithms.
map_t::local_ordinal_type lno_t
void getPartitionTree(part_t &numTreeVerts, std::vector< part_t > &permPartNums, std::vector< part_t > &splitRangeBeg, std::vector< part_t > &splitRangeEnd, std::vector< part_t > &treeVertParents) const
get the partition tree - fill the relevant arrays
Greedy Maximal Weight Matching.
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
static const std::string fail
std::vector< Zoltan2::coordinateModelPartBox > & getPartBoxesView() const
returns the part box boundary list.
Defines the Environment class.
const part_t * getProcDistribution() const
Return a distribution by process.
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
int getNumberOfCriteria() const
Get the number of criteria (object weights)
const int * getPartDistribution() const
Return a distribution by part.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
bool criteriaHasUniformPartSizes(int idx) const
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes...
bool criteriaHaveSamePartSizes(int c1, int c2) const
Return true if the two weight indices have the same part size information.
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
const RCP< const Environment > & getEnvironment() const
Return the environment associated with the solution.
Zoltan2::BasicUserTypes< zscalar_t, zlno_t, zgno_t > user_t