50 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
51 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
54 template <
typename Adapter>
91 template <
typename Adapter>
96 #ifndef DOXYGEN_SHOULD_SKIP_THIS
98 typedef typename Adapter::scalar_t scalar_t;
122 const RCP<
const Comm<int> > &comm,
156 const RCP<
const Comm<int> > &comm,
157 int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
158 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
216 if (partDist_.size() > 0)
return &partDist_[0];
237 if (procDist_.size() > 0)
return &procDist_[0];
268 if (pSizeUniform_[idx])
269 return 1.0 / nGlobalParts_;
270 else if (pCompactIndex_[idx].size())
271 return pSize_[idx][pCompactIndex_[idx][part]];
273 return pSize_[idx][part];
320 void setParts(ArrayRCP<part_t> &partList);
345 for (
part_t i = 0; i < nrhs; i++) {
346 part_t k = (remap ? remap[i] : i);
347 for (
part_t j = idx[k]; j < idx[k+1]; j++) {
348 if (i == (adj[j]-nlhs)) {
374 if (parts_.size() > 0)
return parts_.getRawPtr();
383 if (procs_.size() > 0)
return procs_.getRawPtr();
391 if (this->algorithm_ == Teuchos::null)
392 throw std::logic_error(
"no partitioning algorithm has been run yet");
393 return this->algorithm_->isPartitioningTreeBinary();
399 std::vector<part_t> & permPartNums,
400 std::vector<part_t> & splitRangeBeg,
401 std::vector<part_t> & splitRangeEnd,
402 std::vector<part_t> & treeVertParents)
const {
406 if (this->algorithm_ == Teuchos::null)
407 throw std::logic_error(
"no partitioning algorithm has been run yet");
408 this->algorithm_->getPartitionTree(
419 std::vector<Zoltan2::coordinateModelPartBox> &
422 return this->algorithm_->getPartBoxesView();
438 if (this->algorithm_ == Teuchos::null)
439 throw std::logic_error(
"no partitioning algorithm has been run yet");
441 p = this->algorithm_->pointAssign(dim, point);
459 void boxAssign(
int dim, scalar_t *lower, scalar_t *upper,
460 size_t &nPartsFound,
part_t **partsFound)
const
463 if (this->algorithm_ == Teuchos::null)
464 throw std::logic_error(
"no partitioning algorithm has been run yet");
466 this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound);
475 ArrayRCP <part_t> &comAdj)
const
478 if (this->algorithm_ == Teuchos::null)
479 throw std::logic_error(
"no partitioning algorithm has been run yet");
481 this->algorithm_->getCommunicationGraph(
this, comXAdj, comAdj);
506 env_->localInputAssertion(__FILE__, __LINE__,
"invalid process id",
509 procToPartsMap(procId, numParts, partMin, partMax);
525 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
528 partToProcsMap(partId, procMin, procMax);
532 void partToProc(
bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
533 int numLocalParts,
int numGlobalParts);
535 void procToPartsMap(
int procId,
double &numParts,
part_t &partMin,
538 void partToProcsMap(
part_t partId,
int &procMin,
int &procMax)
const;
540 void setPartDistribution();
542 void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
543 ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
545 void computePartSizes(
int widx, ArrayView<part_t> ids,
546 ArrayView<scalar_t> sizes);
548 void broadcastPartSizes(
int widx);
551 RCP<const Environment> env_;
552 const RCP<const Comm<int> > comm_;
555 RCP<std::vector<Zoltan2::coordinateModelPartBox> > partBoxes;
560 scalar_t localFraction_;
592 bool onePartPerProc_;
593 std::vector<int> partDist_;
594 std::vector<part_t> procDist_;
595 bool procDistEquallySpread_;
631 ArrayRCP<bool> pSizeUniform_;
632 ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
633 ArrayRCP<ArrayRCP<scalar_t> > pSize_;
638 ArrayRCP<part_t> parts_;
642 part_t nGlobalPartsSolution_;
648 ArrayRCP<int> procs_;
653 const RCP<Algorithm<Adapter> > algorithm_;
660 template <
typename Adapter>
662 const RCP<const Environment> &env,
663 const RCP<
const Comm<int> > &comm,
666 : env_(env), comm_(comm),
668 nGlobalParts_(0), nLocalParts_(0),
669 localFraction_(0), nWeightsPerObj_(),
670 onePartPerProc_(false), partDist_(), procDist_(),
671 procDistEquallySpread_(false),
672 pSizeUniform_(), pCompactIndex_(), pSize_(),
673 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
674 procs_(), algorithm_(algorithm)
676 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
678 setPartDistribution();
683 ArrayRCP<part_t> *noIds =
new ArrayRCP<part_t> [nWeightsPerObj_];
684 ArrayRCP<scalar_t> *noSizes =
new ArrayRCP<scalar_t> [nWeightsPerObj_];
685 ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_,
true);
686 ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_,
true);
688 setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
690 env_->memory(
"After construction of solution");
693 template <
typename Adapter>
695 const RCP<const Environment> &env,
696 const RCP<
const Comm<int> > &comm,
698 ArrayView<ArrayRCP<part_t> > reqPartIds,
699 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
701 : env_(env), comm_(comm),
703 nGlobalParts_(0), nLocalParts_(0),
704 localFraction_(0), nWeightsPerObj_(),
705 onePartPerProc_(false), partDist_(), procDist_(),
706 procDistEquallySpread_(false),
707 pSizeUniform_(), pCompactIndex_(), pSize_(),
708 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
709 procs_(), algorithm_(algorithm)
711 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
713 setPartDistribution();
715 setPartSizes(reqPartIds, reqPartSizes);
717 env_->memory(
"After construction of solution");
720 template <
typename Adapter>
725 const ParameterList &pl = env_->getParameters();
726 size_t haveGlobalNumParts=0, haveLocalNumParts=0;
727 int numLocal=0, numGlobal=0;
729 const Teuchos::ParameterEntry *pe = pl.getEntryPtr(
"num_global_parts");
732 haveGlobalNumParts = 1;
733 nGlobalParts_ =
part_t(pe->getValue(&nGlobalParts_));
734 numGlobal = nGlobalParts_;
737 pe = pl.getEntryPtr(
"num_local_parts");
740 haveLocalNumParts = 1;
741 nLocalParts_ =
part_t(pe->getValue(&nLocalParts_));
742 numLocal = nLocalParts_;
748 partToProc(
true, haveLocalNumParts, haveGlobalNumParts,
749 numLocal, numGlobal);
753 int nprocs = comm_->getSize();
754 int rank = comm_->getRank();
756 if (onePartPerProc_){
757 nGlobalParts_ = nprocs;
760 else if (partDist_.size() > 0){
761 nGlobalParts_ = partDist_.size() - 1;
762 int pstart = partDist_[0];
763 for (
part_t i=1; i <= nGlobalParts_; i++){
764 int pend = partDist_[i];
765 if (rank >= pstart && rank < pend){
766 int numOwners = pend - pstart;
768 localFraction_ = 1.0 / numOwners;
774 else if (procDist_.size() > 0){
775 nGlobalParts_ = procDist_[nprocs];
776 nLocalParts_ = procDist_[rank+1] - procDist_[rank];
779 throw std::logic_error(
"partToProc error");
784 template <
typename Adapter>
785 void PartitioningSolution<Adapter>::setPartSizes(
786 ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
788 int widx = nWeightsPerObj_;
791 size_t *countBuf =
new size_t [widx*2];
792 ArrayRCP<size_t> counts(countBuf, 0, widx*2,
true);
794 fail = ((ids.size() != widx) || (sizes.size() != widx));
796 for (
int w=0; !fail && w < widx; w++){
797 counts[w] = ids[w].size();
798 if (ids[w].size() != sizes[w].size()) fail=
true;
801 env_->globalBugAssertion(__FILE__, __LINE__,
"bad argument arrays", fail==0,
806 ArrayRCP<scalar_t> *emptySizes=
new ArrayRCP<scalar_t> [widx];
807 pSize_ = arcp(emptySizes, 0, widx);
809 ArrayRCP<unsigned char> *emptyIndices=
new ArrayRCP<unsigned char> [widx];
810 pCompactIndex_ = arcp(emptyIndices, 0, widx);
812 bool *info =
new bool [widx];
813 pSizeUniform_ = arcp(info, 0, widx);
814 for (
int w=0; w < widx; w++)
815 pSizeUniform_[w] =
true;
817 if (nGlobalParts_ == 1){
821 size_t *ptr1 = counts.getRawPtr();
822 size_t *ptr2 = counts.getRawPtr() + widx;
825 reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
831 for (
int w=0; w < widx; w++)
832 if (counts[widx+w] > 0){
834 pSizeUniform_[w] =
false;
843 int nprocs = comm_->getSize();
844 int rank = comm_->getRank();
846 for (
int w=0; w < widx; w++){
847 if (pSizeUniform_[w])
continue;
852 part_t length = ids[w].size();
854 Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
859 for (
int i=0; i < nprocs; i++)
860 total += allLength[i];
863 scalar_t *partSizes =
new scalar_t [total];
865 ArrayView<part_t> idArray(partNums, total);
866 ArrayView<scalar_t> sizeArray(partSizes, total);
869 for (
int i=0; i < length; i++){
870 *partNums++ = ids[w][i];
871 *partSizes++ = sizes[w][i];
875 for (
int p=1; p < nprocs; p++){
876 if (allLength[p] > 0){
877 Teuchos::receive<int, part_t>(*comm_, p,
878 allLength[p], partNums);
879 Teuchos::receive<int, scalar_t>(*comm_, p,
880 allLength[p], partSizes);
881 partNums += allLength[p];
882 partSizes += allLength[p];
889 computePartSizes(w, idArray, sizeArray);
893 delete [] idArray.getRawPtr();
894 delete [] sizeArray.getRawPtr();
899 Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
900 Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
904 broadcastPartSizes(w);
908 template <
typename Adapter>
909 void PartitioningSolution<Adapter>::broadcastPartSizes(
int widx)
911 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
912 pSize_.size()>widx &&
913 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
916 int rank = comm_->getRank();
917 int nprocs = comm_->getSize();
918 part_t nparts = nGlobalParts_;
926 if (pSizeUniform_[widx] ==
true)
928 else if (pCompactIndex_[widx].size() > 0)
935 Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
941 pSizeUniform_[widx] =
true;
950 unsigned char *idxbuf = NULL;
953 idxbuf =
new unsigned char [nparts];
954 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
957 env_->localBugAssertion(__FILE__, __LINE__,
"index list size",
959 idxbuf = pCompactIndex_[widx].getRawPtr();
964 Teuchos::broadcast<int, char>(*comm_, 0, nparts,
965 reinterpret_cast<char *
>(idxbuf));
970 pCompactIndex_[widx] = arcp(idxbuf, 0, nparts,
true);
974 unsigned char maxIdx=0;
975 for (
part_t p=0; p < nparts; p++)
976 if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
978 int numSizes = maxIdx + 1;
980 scalar_t *sizeList = NULL;
983 sizeList =
new scalar_t [numSizes];
984 env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
987 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
990 sizeList = pSize_[widx].getRawPtr();
994 Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
999 pSize_[widx] = arcp(sizeList, 0, numSizes,
true);
1008 scalar_t *sizeList = NULL;
1011 sizeList =
new scalar_t [nparts];
1012 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
1015 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
1018 sizeList = pSize_[widx].getRawPtr();
1022 Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
1027 pSize_[widx] = arcp(sizeList, 0, nparts);
1033 template <
typename Adapter>
1034 void PartitioningSolution<Adapter>::computePartSizes(
int widx,
1035 ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
1037 int len = ids.size();
1040 pSizeUniform_[widx] =
true;
1044 env_->localBugAssertion(__FILE__, __LINE__,
"bad array sizes",
1047 env_->localBugAssertion(__FILE__, __LINE__,
"bad index",
1050 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
1051 pSize_.size()>widx &&
1052 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
1058 part_t nparts = nGlobalParts_;
1059 unsigned char *buf =
new unsigned char [nparts];
1060 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
1061 memset(buf, 0, nparts);
1062 ArrayRCP<unsigned char> partIdx(buf, 0, nparts,
true);
1064 scalar_t
epsilon = 10e-5 / nparts;
1065 scalar_t min=sizes[0], max=sizes[0], sum=0;
1067 for (
int i=0; i < len; i++){
1069 scalar_t size = sizes[i];
1071 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
1074 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part size", size>=0,
1081 env_->localInputAssertion(__FILE__, __LINE__,
1082 "multiple sizes provided for one part", partIdx[
id]==0,
BASIC_ASSERTION);
1086 if (size < min) min = size;
1087 if (size > max) max = size;
1096 scalar_t *allSizes =
new scalar_t [2];
1097 env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
1099 ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2,
true);
1101 int numNonZero = nparts - len;
1104 allSizes[1] = 1.0 / numNonZero;
1106 for (
part_t p=0; p < nparts; p++)
1109 for (
int i=0; i < len; i++)
1112 pSize_[widx] = sizeArray;
1113 pCompactIndex_[widx] = partIdx;
1118 if (max - min <= epsilon){
1119 pSizeUniform_[widx] =
true;
1124 scalar_t avg = sum / nparts;
1131 scalar_t *tmp =
new scalar_t [len];
1132 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
1133 memcpy(tmp, sizes.getRawPtr(),
sizeof(scalar_t) * len);
1134 ArrayRCP<scalar_t> partSizes(tmp, 0, len,
true);
1136 std::sort(partSizes.begin(), partSizes.end());
1140 Array<scalar_t> nextUniqueSize;
1141 nextUniqueSize.push_back(partSizes[len-1]);
1142 scalar_t curr = partSizes[len-1];
1144 bool haveAvg =
false;
1145 if (curr - avg <= epsilon)
1148 for (
int i=len-2; i >= 0; i--){
1149 scalar_t val = partSizes[i];
1150 if (curr - val > epsilon){
1151 nextUniqueSize.push_back(val);
1153 if (avgIndex==len && val > avg && val - avg <= epsilon){
1155 avgIndex = nextUniqueSize.size() - 1;
1163 size_t numSizes = nextUniqueSize.size();
1164 int sizeArrayLen = numSizes;
1171 if (!haveAvg) sizeArrayLen++;
1173 scalar_t *allSizes =
new scalar_t [sizeArrayLen];
1174 env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
1175 ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen,
true);
1177 int newAvgIndex = sizeArrayLen;
1179 for (
int i=numSizes-1, idx=0; i >= 0; i--){
1181 if (newAvgIndex == sizeArrayLen){
1183 if (haveAvg && i==avgIndex)
1186 else if (!haveAvg && avg < nextUniqueSize[i]){
1188 allSizes[idx++] = avg;
1192 allSizes[idx++] = nextUniqueSize[i];
1195 env_->localBugAssertion(__FILE__, __LINE__,
"finding average in list",
1198 for (
int i=0; i < nparts; i++){
1199 buf[i] = newAvgIndex;
1202 sum = (nparts - len) * allSizes[newAvgIndex];
1204 for (
int i=0; i < len; i++){
1206 scalar_t size = sizes[i];
1211 if (size < avg && avg - size <= epsilon)
1212 index = newAvgIndex;
1214 typename ArrayRCP<scalar_t>::iterator found =
1215 std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
1217 env_->localBugAssertion(__FILE__, __LINE__,
"size array",
1220 index = found - sizeArray.begin();
1224 sum += allSizes[index];
1227 for (
int i=0; i < sizeArrayLen; i++){
1228 sizeArray[i] /= sum;
1231 pCompactIndex_[widx] = partIdx;
1232 pSize_[widx] = sizeArray;
1238 tmp =
new scalar_t [nparts];
1239 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
1241 sum += ((nparts - len) * avg);
1243 for (
int i=0; i < nparts; i++){
1247 for (
int i=0; i < len; i++){
1248 tmp[ids[i]] = sizes[i]/sum;
1251 pSize_[widx] = arcp(tmp, 0, nparts);
1255 template <
typename Adapter>
1260 size_t len = partList.size();
1269 part_t lMin = (len > 0 ? std::numeric_limits<part_t>::max() : 0);
1272 for (
size_t i = 0; i < len; i++) {
1273 if (partList[i] < lMin) lMin = partList[i];
1274 if (partList[i] > lMax) lMax = partList[i];
1276 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
1277 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
1279 nGlobalPartsSolution_ = gMax - gMin + 1;
1284 if (!onePartPerProc_){
1286 int *procs =
new int [len];
1287 env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
1288 procs_ = arcp<int>(procs, 0, len);
1291 part_t *parts = partList.getRawPtr();
1293 if (procDist_.size() > 0){
1296 for (
size_t i=0; i < len; i++){
1297 partToProcsMap(parts[i], procs[i], procId);
1302 lno_t *partCounter =
new lno_t [nGlobalPartsSolution_];
1303 env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
1306 int numProcs = comm_->getSize();
1310 memset(partCounter, 0,
sizeof(
lno_t) * nGlobalPartsSolution_);
1312 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
1313 partCounter[parts[i]]++;
1316 env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
1319 int proc2 = partDist_[0];
1321 for (
part_t part=1; part < nGlobalParts_; part++){
1323 proc2 = partDist_[part+1];
1324 int numprocs = proc2 - proc1;
1326 double dNum = partCounter[part];
1327 double dProcs = numprocs;
1330 double each = floor(dNum/dProcs);
1331 double extra = fmod(dNum,dProcs);
1333 for (
int proc=proc1, i=0; proc<proc2; proc++, i++){
1335 procCounter[proc] =
lno_t(each) + 1;
1337 procCounter[proc] =
lno_t(each);
1341 delete [] partCounter;
1343 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
1344 if (partList[i] >= nGlobalParts_){
1347 procs[i] = comm_->getRank();
1350 part_t partNum = parts[i];
1351 proc1 = partDist_[partNum];
1352 proc2 = partDist_[partNum + 1];
1355 for (proc=proc1; proc < proc2; proc++){
1356 if (procCounter[proc] > 0){
1358 procCounter[proc]--;
1362 env_->localBugAssertion(__FILE__, __LINE__,
"part to proc",
1366 delete [] procCounter;
1376 bool doRemap =
false;
1377 const Teuchos::ParameterEntry *pe =
1378 env_->getParameters().getEntryPtr(
"remap_parts");
1379 if (pe) doRemap = pe->getValue(&doRemap);
1380 if (doRemap) RemapParts();
1382 haveSolution_ =
true;
1384 env_->memory(
"After Solution has processed algorithm's answer");
1389 template <
typename Adapter>
1391 double &numParts,
part_t &partMin,
part_t &partMax)
const
1393 if (onePartPerProc_){
1395 partMin = partMax = procId;
1397 else if (procDist_.size() > 0){
1398 partMin = procDist_[procId];
1399 partMax = procDist_[procId+1] - 1;
1400 numParts = procDist_[procId+1] - partMin;
1405 std::vector<int>::const_iterator entry;
1406 entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
1408 size_t partIdx = entry - partDist_.begin();
1409 int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
1410 partMin = partMax = int(partIdx) - 1;
1411 numParts = 1.0 / numProcs;
1415 template <
typename Adapter>
1416 void PartitioningSolution<Adapter>::partToProcsMap(
part_t partId,
1417 int &procMin,
int &procMax)
const
1419 if (partId >= nGlobalParts_){
1423 procMin = procMax = comm_->getRank();
1425 else if (onePartPerProc_){
1426 procMin = procMax = int(partId);
1428 else if (procDist_.size() > 0){
1429 if (procDistEquallySpread_) {
1431 double fProcs = comm_->getSize();
1432 double fParts = nGlobalParts_;
1433 double each = fParts / fProcs;
1434 procMin = int(partId / each);
1435 while (procDist_[procMin] > partId) procMin--;
1436 while (procDist_[procMin+1] <= partId) procMin++;
1443 typename std::vector<part_t>::const_iterator entry;
1444 entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
1446 size_t procIdx = entry - procDist_.begin();
1447 procMin = procMax = int(procIdx) - 1;
1451 procMin = partDist_[partId];
1452 procMax = partDist_[partId+1] - 1;
1456 template <
typename Adapter>
1458 int c1,
int c2)
const
1460 if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
1461 throw std::logic_error(
"criteriaHaveSamePartSizes error");
1463 bool theSame =
false;
1468 else if (pSizeUniform_[c1] ==
true && pSizeUniform_[c2] ==
true)
1471 else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
1473 bool useIndex = pCompactIndex_[c1].size() > 0;
1475 for (
part_t p=0; theSame && p < nGlobalParts_; p++)
1476 if (pSize_[c1][pCompactIndex_[c1][p]] !=
1477 pSize_[c2][pCompactIndex_[c2][p]])
1481 for (
part_t p=0; theSame && p < nGlobalParts_; p++)
1482 if (pSize_[c1][p] != pSize_[c2][p])
1505 template <
typename Adapter>
1507 bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
1508 int numLocalParts,
int numGlobalParts)
1511 typedef SSIZE_T ssize_t;
1513 int nprocs = comm_->getSize();
1514 ssize_t reducevals[4];
1515 ssize_t sumHaveGlobal=0, sumHaveLocal=0;
1516 ssize_t sumGlobal=0, sumLocal=0;
1517 ssize_t maxGlobal=0, maxLocal=0;
1518 ssize_t vals[4] = {haveNumGlobalParts, haveNumLocalParts,
1519 numGlobalParts, numLocalParts};
1527 reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4, vals, reducevals);
1531 sumHaveGlobal = reducevals[0];
1532 sumHaveLocal = reducevals[1];
1533 sumGlobal = reducevals[2];
1534 sumLocal = reducevals[3];
1536 env_->localInputAssertion(__FILE__, __LINE__,
1537 "Either all procs specify num_global/local_parts or none do",
1538 (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
1539 (sumHaveLocal == 0 || sumHaveLocal == nprocs),
1543 if (haveNumLocalParts)
1544 sumLocal = numLocalParts * nprocs;
1545 if (haveNumGlobalParts)
1546 sumGlobal = numGlobalParts * nprocs;
1548 sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
1549 sumHaveLocal = haveNumLocalParts ? nprocs : 0;
1551 maxLocal = numLocalParts;
1552 maxGlobal = numGlobalParts;
1555 if (!haveNumLocalParts && !haveNumGlobalParts){
1556 onePartPerProc_ =
true;
1560 if (haveNumGlobalParts){
1562 vals[0] = numGlobalParts;
1563 vals[1] = numLocalParts;
1565 reduceAll<int, ssize_t>(
1566 *comm_, Teuchos::REDUCE_MAX, 2, vals, reducevals);
1570 maxGlobal = reducevals[0];
1571 maxLocal = reducevals[1];
1573 env_->localInputAssertion(__FILE__, __LINE__,
1574 "Value for num_global_parts is different on different processes.",
1579 env_->localInputAssertion(__FILE__, __LINE__,
1580 "Sum of num_local_parts does not equal requested num_global_parts",
1583 if (sumLocal == nprocs && maxLocal == 1){
1584 onePartPerProc_ =
true;
1589 if (maxGlobal == nprocs){
1590 onePartPerProc_ =
true;
1598 if (sumHaveLocal == nprocs){
1604 procDist_.resize(nprocs+1);
1606 catch (std::exception &){
1607 throw(std::bad_alloc());
1610 part_t *procArray = &procDist_[0];
1614 gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
1620 for (
int proc=0; proc < nprocs; proc++)
1621 procArray[proc+1] += procArray[proc];
1627 double fParts = numGlobalParts;
1628 double fProcs = nprocs;
1630 if (fParts < fProcs){
1633 partDist_.resize(
size_t(fParts+1));
1635 catch (std::exception &){
1636 throw(std::bad_alloc());
1639 int *partArray = &partDist_[0];
1641 double each = floor(fProcs / fParts);
1642 double extra = fmod(fProcs, fParts);
1645 for (
part_t part=0; part < numGlobalParts; part++){
1646 int numOwners = int(each + ((part<extra) ? 1 : 0));
1647 partArray[part+1] = partArray[part] + numOwners;
1650 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1653 else if (fParts > fProcs){
1658 procDistEquallySpread_ =
true;
1661 procDist_.resize(
size_t(fProcs+1));
1663 catch (std::exception &){
1664 throw(std::bad_alloc());
1667 part_t *procArray = &procDist_[0];
1669 double each = floor(fParts / fProcs);
1670 double extra = fmod(fParts, fProcs);
1673 for (
int proc=0; proc < nprocs; proc++){
1674 part_t numParts =
part_t(each + ((proc<extra) ? 1 : 0));
1675 procArray[proc+1] = procArray[proc] + numParts;
1678 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1682 env_->globalBugAssertion(__FILE__, __LINE__,
1700 template <
typename Adapter>
1703 size_t len = parts_.size();
1705 part_t me = comm_->getRank();
1706 int np = comm_->getSize();
1708 if (np < nGlobalParts_) {
1710 std::ostringstream msg;
1711 msg <<
"Remapping not yet supported for "
1712 <<
"num_global_parts " << nGlobalParts_
1713 <<
" > num procs " << np << std::endl;
1728 std::map<part_t, long> edges;
1733 for (
size_t i = 0; i < len; i++) {
1735 if (parts_[i] == me) lstaying++;
1738 Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
1739 &lstaying, &gstaying);
1744 int nedges = edges.size();
1747 part_t tnVtx = np + nGlobalParts_;
1751 idx =
new int[tnVtx+1];
1752 sizes =
new int[np];
1756 Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
1761 for (
int i = 0; i < np; i++)
1762 idx[i+1] = idx[i] + sizes[i];
1770 bufv =
new part_t[nedges];
1771 bufw =
new long[nedges];
1773 for (
typename std::map<part_t, long>::iterator it = edges.begin();
1774 it != edges.end(); it++) {
1775 bufv[cnt] = it->first;
1776 bufw[cnt] = it->second;
1787 adj =
new part_t[idx[np]];
1788 wgt =
new long[idx[np]];
1791 Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
1792 Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
1803 for (
int i = 0; i < idx[np]; i++) {
1808 for (
part_t i = np; i < tnVtx; i++) {
1813 std::cout <<
"IDX ";
1814 for (
part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] <<
" ";
1815 std::cout << std::endl;
1817 std::cout <<
"ADJ ";
1818 for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] <<
" ";
1819 std::cout << std::endl;
1821 std::cout <<
"WGT ";
1822 for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] <<
" ";
1823 std::cout << std::endl;
1828 for (
part_t i = 0; i < tnVtx; i++) match[i] = i;
1830 Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
1833 std::cout <<
"After matching: " << nmatches <<
" found" << std::endl;
1834 for (
part_t i = 0; i < tnVtx; i++)
1835 std::cout <<
"match[" << i <<
"] = " << match[i]
1836 << ((match[i] != i &&
1837 (i < np && match[i] != i+np))
1843 bool nontrivial =
false;
1845 for (
part_t i = 0; i < np; i++) {
1846 if ((match[i] != i) && (match[i] != (i+np))) {
1855 remap =
new part_t[nGlobalParts_];
1856 for (
part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
1858 bool *used =
new bool[np];
1859 for (
part_t i = 0; i < np; i++) used[i] =
false;
1862 for (
part_t i = 0; i < nGlobalParts_; i++) {
1864 if (match[tmp] != tmp) {
1865 remap[i] = match[tmp];
1866 used[match[tmp]] =
true;
1871 for (
part_t i = 0; i < nGlobalParts_; i++) {
1872 if (remap[i] > -1)
continue;
1880 for (
part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
1881 if (remap[i] > -1)
continue;
1882 while (used[uidx]) uidx++;
1891 std::cout <<
"Remap vector: ";
1892 for (
part_t i = 0; i < nGlobalParts_; i++) std::cout << remap[i] <<
" ";
1893 std::cout << std::endl;
1896 long newgstaying = measure_stays(remap, idx, adj, wgt,
1898 doRemap = (newgstaying > gstaying);
1899 std::ostringstream msg;
1900 msg <<
"gstaying " << gstaying <<
" measure(input) "
1901 << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
1902 <<
" newgstaying " << newgstaying
1903 <<
" nontrivial " << nontrivial
1904 <<
" doRemap " << doRemap << std::endl;
1912 Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
1915 if (me != 0) remap =
new part_t[nGlobalParts_];
1916 Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
1917 for (
size_t i = 0; i < len; i++) {
1918 parts_[i] = remap[parts_[i]];
const int * getProcListView() const
Returns the process list corresponding to the global ID list.
scalar_t getCriteriaPartSize(int idx, part_t part) const
Get the size for a given weight index and a given part.
Defines the Solution base class.
fast typical checks for valid arguments
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack.
part_t pointAssign(int dim, scalar_t *point) const
Return the part overlapping a given point in space;.
void RemapParts()
Remap a new partition for maximum overlap with an input partition.
Just a placeholder for now.
more involved, like validate a graph
map_t::global_ordinal_type gno_t
size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
virtual bool isPartitioningTreeBinary() const
calculate if partition tree is binary.
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const
Get the parts belonging to a process.
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
Get the processes containing a part.
sub-steps, each method's entry and exit
bool oneToOnePartDistribution() const
Is the part-to-process distribution is one-to-one.
SparseMatrixAdapter_t::part_t part_t
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
scalar_t getLocalFractionOfPart() const
If parts are divided across processes, return the fraction of a part on this process.
A PartitioningSolution is a solution to a partitioning problem.
const RCP< const Comm< int > > & getCommunicator() const
Return the communicator associated with the solution.
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const
Return an array of all parts overlapping a given box in space.
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
PartitioningSolution(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied.
Algorithm defines the base class for all algorithms.
map_t::local_ordinal_type lno_t
void getPartitionTree(part_t &numTreeVerts, std::vector< part_t > &permPartNums, std::vector< part_t > &splitRangeBeg, std::vector< part_t > &splitRangeEnd, std::vector< part_t > &treeVertParents) const
get the partition tree - fill the relevant arrays
Greedy Maximal Weight Matching.
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
static const std::string fail
std::vector< Zoltan2::coordinateModelPartBox > & getPartBoxesView() const
returns the part box boundary list.
Defines the Environment class.
const part_t * getProcDistribution() const
Return a distribution by process.
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
int getNumberOfCriteria() const
Get the number of criteria (object weights)
const int * getPartDistribution() const
Return a distribution by part.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
bool criteriaHasUniformPartSizes(int idx) const
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes...
bool criteriaHaveSamePartSizes(int c1, int c2) const
Return true if the two weight indices have the same part size information.
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
const RCP< const Environment > & getEnvironment() const
Return the environment associated with the solution.
Zoltan2::BasicUserTypes< zscalar_t, zlno_t, zgno_t > user_t