50 #ifndef _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
51 #define _ZOLTAN2_PARTITIONINGSOLUTION_HPP_
54 template <
typename Adapter>
90 template <
typename Adapter>
95 #ifndef DOXYGEN_SHOULD_SKIP_THIS
96 typedef typename Adapter::gno_t gno_t;
97 typedef typename Adapter::scalar_t scalar_t;
98 typedef typename Adapter::lno_t lno_t;
121 const RCP<
const Comm<int> > &comm,
155 const RCP<
const Comm<int> > &comm,
156 int nUserWeights, ArrayView<ArrayRCP<part_t> > reqPartIds,
157 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
215 if (partDist_.size() > 0)
return &partDist_[0];
236 if (procDist_.size() > 0)
return &procDist_[0];
267 if (pSizeUniform_[idx])
268 return 1.0 / nGlobalParts_;
269 else if (pCompactIndex_[idx].size())
270 return pSize_[idx][pCompactIndex_[idx][part]];
272 return pSize_[idx][part];
319 void setParts(ArrayRCP<part_t> &partList);
344 for (
part_t i = 0; i < nrhs; i++) {
345 part_t k = (remap ? remap[i] : i);
346 for (
part_t j = idx[k]; j < idx[k+1]; j++) {
347 if (i == (adj[j]-nlhs)) {
373 if (parts_.size() > 0)
return parts_.getRawPtr();
382 if (procs_.size() > 0)
return procs_.getRawPtr();
390 if (this->algorithm_ == Teuchos::null)
391 throw std::logic_error(
"no partitioning algorithm has been run yet");
392 return this->algorithm_->isPartitioningTreeBinary();
398 std::vector<part_t> & permPartNums,
399 std::vector<part_t> & splitRangeBeg,
400 std::vector<part_t> & splitRangeEnd,
401 std::vector<part_t> & treeVertParents)
const {
405 if (this->algorithm_ == Teuchos::null)
406 throw std::logic_error(
"no partitioning algorithm has been run yet");
407 this->algorithm_->getPartitionTree(
418 std::vector<Zoltan2::coordinateModelPartBox> &
421 return this->algorithm_->getPartBoxesView();
437 if (this->algorithm_ == Teuchos::null)
438 throw std::logic_error(
"no partitioning algorithm has been run yet");
440 p = this->algorithm_->pointAssign(dim, point);
458 void boxAssign(
int dim, scalar_t *lower, scalar_t *upper,
459 size_t &nPartsFound,
part_t **partsFound)
const
462 if (this->algorithm_ == Teuchos::null)
463 throw std::logic_error(
"no partitioning algorithm has been run yet");
465 this->algorithm_->boxAssign(dim, lower, upper, nPartsFound, partsFound);
474 ArrayRCP <part_t> &comAdj)
const
477 if (this->algorithm_ == Teuchos::null)
478 throw std::logic_error(
"no partitioning algorithm has been run yet");
480 this->algorithm_->getCommunicationGraph(
this, comXAdj, comAdj);
505 env_->localInputAssertion(__FILE__, __LINE__,
"invalid process id",
508 procToPartsMap(procId, numParts, partMin, partMax);
524 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
527 partToProcsMap(partId, procMin, procMax);
531 void partToProc(
bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
532 int numLocalParts,
int numGlobalParts);
534 void procToPartsMap(
int procId,
double &numParts,
part_t &partMin,
537 void partToProcsMap(
part_t partId,
int &procMin,
int &procMax)
const;
539 void setPartDistribution();
541 void setPartSizes(ArrayView<ArrayRCP<part_t> > reqPartIds,
542 ArrayView<ArrayRCP<scalar_t> > reqPartSizes);
544 void computePartSizes(
int widx, ArrayView<part_t> ids,
545 ArrayView<scalar_t> sizes);
547 void broadcastPartSizes(
int widx);
550 RCP<const Environment> env_;
551 const RCP<const Comm<int> > comm_;
554 RCP<std::vector<Zoltan2::coordinateModelPartBox> > partBoxes;
559 scalar_t localFraction_;
591 bool onePartPerProc_;
592 std::vector<int> partDist_;
593 std::vector<part_t> procDist_;
594 bool procDistEquallySpread_;
630 ArrayRCP<bool> pSizeUniform_;
631 ArrayRCP<ArrayRCP<unsigned char> > pCompactIndex_;
632 ArrayRCP<ArrayRCP<scalar_t> > pSize_;
637 ArrayRCP<part_t> parts_;
641 part_t nGlobalPartsSolution_;
647 ArrayRCP<int> procs_;
652 const RCP<Algorithm<Adapter> > algorithm_;
659 template <
typename Adapter>
661 const RCP<const Environment> &env,
662 const RCP<
const Comm<int> > &comm,
665 : env_(env), comm_(comm),
667 nGlobalParts_(0), nLocalParts_(0),
668 localFraction_(0), nWeightsPerObj_(),
669 onePartPerProc_(false), partDist_(), procDist_(),
670 procDistEquallySpread_(false),
671 pSizeUniform_(), pCompactIndex_(), pSize_(),
672 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
673 procs_(), algorithm_(algorithm)
675 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
677 setPartDistribution();
682 ArrayRCP<part_t> *noIds =
new ArrayRCP<part_t> [nWeightsPerObj_];
683 ArrayRCP<scalar_t> *noSizes =
new ArrayRCP<scalar_t> [nWeightsPerObj_];
684 ArrayRCP<ArrayRCP<part_t> > ids(noIds, 0, nWeightsPerObj_,
true);
685 ArrayRCP<ArrayRCP<scalar_t> > sizes(noSizes, 0, nWeightsPerObj_,
true);
687 setPartSizes(ids.view(0, nWeightsPerObj_), sizes.view(0, nWeightsPerObj_));
689 env_->memory(
"After construction of solution");
692 template <
typename Adapter>
694 const RCP<const Environment> &env,
695 const RCP<
const Comm<int> > &comm,
697 ArrayView<ArrayRCP<part_t> > reqPartIds,
698 ArrayView<ArrayRCP<scalar_t> > reqPartSizes,
700 : env_(env), comm_(comm),
702 nGlobalParts_(0), nLocalParts_(0),
703 localFraction_(0), nWeightsPerObj_(),
704 onePartPerProc_(false), partDist_(), procDist_(),
705 procDistEquallySpread_(false),
706 pSizeUniform_(), pCompactIndex_(), pSize_(),
707 parts_(), haveSolution_(false), nGlobalPartsSolution_(0),
708 procs_(), algorithm_(algorithm)
710 nWeightsPerObj_ = (nUserWeights ? nUserWeights : 1);
712 setPartDistribution();
714 setPartSizes(reqPartIds, reqPartSizes);
716 env_->memory(
"After construction of solution");
719 template <
typename Adapter>
724 const ParameterList &pl = env_->getParameters();
725 size_t haveGlobalNumParts=0, haveLocalNumParts=0;
726 int numLocal=0, numGlobal=0;
728 const Teuchos::ParameterEntry *pe = pl.getEntryPtr(
"num_global_parts");
731 haveGlobalNumParts = 1;
732 nGlobalParts_ =
part_t(pe->getValue(&nGlobalParts_));
733 numGlobal = nGlobalParts_;
736 pe = pl.getEntryPtr(
"num_local_parts");
739 haveLocalNumParts = 1;
740 nLocalParts_ =
part_t(pe->getValue(&nLocalParts_));
741 numLocal = nLocalParts_;
747 partToProc(
true, haveLocalNumParts, haveGlobalNumParts,
748 numLocal, numGlobal);
752 int nprocs = comm_->getSize();
753 int rank = comm_->getRank();
755 if (onePartPerProc_){
756 nGlobalParts_ = nprocs;
759 else if (partDist_.size() > 0){
760 nGlobalParts_ = partDist_.size() - 1;
761 int pstart = partDist_[0];
762 for (
part_t i=1; i <= nGlobalParts_; i++){
763 int pend = partDist_[i];
764 if (rank >= pstart && rank < pend){
765 int numOwners = pend - pstart;
767 localFraction_ = 1.0 / numOwners;
773 else if (procDist_.size() > 0){
774 nGlobalParts_ = procDist_[nprocs];
775 nLocalParts_ = procDist_[rank+1] - procDist_[rank];
778 throw std::logic_error(
"partToProc error");
783 template <
typename Adapter>
784 void PartitioningSolution<Adapter>::setPartSizes(
785 ArrayView<ArrayRCP<part_t> > ids, ArrayView<ArrayRCP<scalar_t> > sizes)
787 int widx = nWeightsPerObj_;
790 size_t *countBuf =
new size_t [widx*2];
791 ArrayRCP<size_t> counts(countBuf, 0, widx*2,
true);
793 fail = ((ids.size() != widx) || (sizes.size() != widx));
795 for (
int w=0; !fail && w < widx; w++){
796 counts[w] = ids[w].size();
797 if (ids[w].size() != sizes[w].size()) fail=
true;
800 env_->globalBugAssertion(__FILE__, __LINE__,
"bad argument arrays", fail==0,
805 ArrayRCP<scalar_t> *emptySizes=
new ArrayRCP<scalar_t> [widx];
806 pSize_ = arcp(emptySizes, 0, widx);
808 ArrayRCP<unsigned char> *emptyIndices=
new ArrayRCP<unsigned char> [widx];
809 pCompactIndex_ = arcp(emptyIndices, 0, widx);
811 bool *info =
new bool [widx];
812 pSizeUniform_ = arcp(info, 0, widx);
813 for (
int w=0; w < widx; w++)
814 pSizeUniform_[w] =
true;
816 if (nGlobalParts_ == 1){
820 size_t *ptr1 = counts.getRawPtr();
821 size_t *ptr2 = counts.getRawPtr() + widx;
824 reduceAll<int, size_t>(*comm_, Teuchos::REDUCE_MAX, widx, ptr1, ptr2);
830 for (
int w=0; w < widx; w++)
831 if (counts[widx+w] > 0){
833 pSizeUniform_[w] =
false;
842 int nprocs = comm_->getSize();
843 int rank = comm_->getRank();
845 for (
int w=0; w < widx; w++){
846 if (pSizeUniform_[w])
continue;
851 part_t length = ids[w].size();
853 Teuchos::gatherAll<int, part_t>(*comm_, 1, &length,
858 for (
int i=0; i < nprocs; i++)
859 total += allLength[i];
862 scalar_t *partSizes =
new scalar_t [total];
864 ArrayView<part_t> idArray(partNums, total);
865 ArrayView<scalar_t> sizeArray(partSizes, total);
868 for (
int i=0; i < length; i++){
869 *partNums++ = ids[w][i];
870 *partSizes++ = sizes[w][i];
874 for (
int p=1; p < nprocs; p++){
875 if (allLength[p] > 0){
876 Teuchos::receive<int, part_t>(*comm_, p,
877 allLength[p], partNums);
878 Teuchos::receive<int, scalar_t>(*comm_, p,
879 allLength[p], partSizes);
880 partNums += allLength[p];
881 partSizes += allLength[p];
888 computePartSizes(w, idArray, sizeArray);
892 delete [] idArray.getRawPtr();
893 delete [] sizeArray.getRawPtr();
898 Teuchos::send<int, part_t>(*comm_, length, ids[w].getRawPtr(), 0);
899 Teuchos::send<int, scalar_t>(*comm_, length, sizes[w].getRawPtr(), 0);
903 broadcastPartSizes(w);
907 template <
typename Adapter>
908 void PartitioningSolution<Adapter>::broadcastPartSizes(
int widx)
910 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
911 pSize_.size()>widx &&
912 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
915 int rank = comm_->getRank();
916 int nprocs = comm_->getSize();
917 part_t nparts = nGlobalParts_;
925 if (pSizeUniform_[widx] ==
true)
927 else if (pCompactIndex_[widx].size() > 0)
934 Teuchos::broadcast<int, char>(*comm_, 0, 1, &flag);
940 pSizeUniform_[widx] =
true;
949 unsigned char *idxbuf = NULL;
952 idxbuf =
new unsigned char [nparts];
953 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, idxbuf);
956 env_->localBugAssertion(__FILE__, __LINE__,
"index list size",
958 idxbuf = pCompactIndex_[widx].getRawPtr();
963 Teuchos::broadcast<int, char>(*comm_, 0, nparts,
964 reinterpret_cast<char *
>(idxbuf));
969 pCompactIndex_[widx] = arcp(idxbuf, 0, nparts,
true);
973 unsigned char maxIdx=0;
974 for (
part_t p=0; p < nparts; p++)
975 if (idxbuf[p] > maxIdx) maxIdx = idxbuf[p];
977 int numSizes = maxIdx + 1;
979 scalar_t *sizeList = NULL;
982 sizeList =
new scalar_t [numSizes];
983 env_->localMemoryAssertion(__FILE__, __LINE__, numSizes, sizeList);
986 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
989 sizeList = pSize_[widx].getRawPtr();
993 Teuchos::broadcast<int, scalar_t>(*comm_, 0, numSizes, sizeList);
998 pSize_[widx] = arcp(sizeList, 0, numSizes,
true);
1007 scalar_t *sizeList = NULL;
1010 sizeList =
new scalar_t [nparts];
1011 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, sizeList);
1014 env_->localBugAssertion(__FILE__, __LINE__,
"wrong number of sizes",
1017 sizeList = pSize_[widx].getRawPtr();
1021 Teuchos::broadcast<int, scalar_t >(*comm_, 0, nparts, sizeList);
1026 pSize_[widx] = arcp(sizeList, 0, nparts);
1032 template <
typename Adapter>
1033 void PartitioningSolution<Adapter>::computePartSizes(
int widx,
1034 ArrayView<part_t> ids, ArrayView<scalar_t> sizes)
1036 int len = ids.size();
1039 pSizeUniform_[widx] =
true;
1043 env_->localBugAssertion(__FILE__, __LINE__,
"bad array sizes",
1046 env_->localBugAssertion(__FILE__, __LINE__,
"bad index",
1049 env_->localBugAssertion(__FILE__, __LINE__,
"preallocations",
1050 pSize_.size()>widx &&
1051 pSizeUniform_.size()>widx && pCompactIndex_.size()>widx,
1057 part_t nparts = nGlobalParts_;
1058 unsigned char *buf =
new unsigned char [nparts];
1059 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, buf);
1060 memset(buf, 0, nparts);
1061 ArrayRCP<unsigned char> partIdx(buf, 0, nparts,
true);
1063 scalar_t
epsilon = 10e-5 / nparts;
1064 scalar_t min=sizes[0], max=sizes[0], sum=0;
1066 for (
int i=0; i < len; i++){
1068 scalar_t size = sizes[i];
1070 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part id",
1073 env_->localInputAssertion(__FILE__, __LINE__,
"invalid part size", size>=0,
1080 env_->localInputAssertion(__FILE__, __LINE__,
1081 "multiple sizes provided for one part", partIdx[
id]==0,
BASIC_ASSERTION);
1085 if (size < min) min = size;
1086 if (size > max) max = size;
1095 scalar_t *allSizes =
new scalar_t [2];
1096 env_->localMemoryAssertion(__FILE__, __LINE__, 2, allSizes);
1098 ArrayRCP<scalar_t> sizeArray(allSizes, 0, 2,
true);
1100 int numNonZero = nparts - len;
1103 allSizes[1] = 1.0 / numNonZero;
1105 for (
part_t p=0; p < nparts; p++)
1108 for (
int i=0; i < len; i++)
1111 pSize_[widx] = sizeArray;
1112 pCompactIndex_[widx] = partIdx;
1117 if (max - min <= epsilon){
1118 pSizeUniform_[widx] =
true;
1123 scalar_t avg = sum / nparts;
1130 scalar_t *tmp =
new scalar_t [len];
1131 env_->localMemoryAssertion(__FILE__, __LINE__, len, tmp);
1132 memcpy(tmp, sizes.getRawPtr(),
sizeof(scalar_t) * len);
1133 ArrayRCP<scalar_t> partSizes(tmp, 0, len,
true);
1135 std::sort(partSizes.begin(), partSizes.end());
1139 Array<scalar_t> nextUniqueSize;
1140 nextUniqueSize.push_back(partSizes[len-1]);
1141 scalar_t curr = partSizes[len-1];
1143 bool haveAvg =
false;
1144 if (curr - avg <= epsilon)
1147 for (
int i=len-2; i >= 0; i--){
1148 scalar_t val = partSizes[i];
1149 if (curr - val > epsilon){
1150 nextUniqueSize.push_back(val);
1152 if (avgIndex==len && val > avg && val - avg <= epsilon){
1154 avgIndex = nextUniqueSize.size() - 1;
1162 size_t numSizes = nextUniqueSize.size();
1163 int sizeArrayLen = numSizes;
1170 if (!haveAvg) sizeArrayLen++;
1172 scalar_t *allSizes =
new scalar_t [sizeArrayLen];
1173 env_->localMemoryAssertion(__FILE__, __LINE__, sizeArrayLen, allSizes);
1174 ArrayRCP<scalar_t> sizeArray(allSizes, 0, sizeArrayLen,
true);
1176 int newAvgIndex = sizeArrayLen;
1178 for (
int i=numSizes-1, idx=0; i >= 0; i--){
1180 if (newAvgIndex == sizeArrayLen){
1182 if (haveAvg && i==avgIndex)
1185 else if (!haveAvg && avg < nextUniqueSize[i]){
1187 allSizes[idx++] = avg;
1191 allSizes[idx++] = nextUniqueSize[i];
1194 env_->localBugAssertion(__FILE__, __LINE__,
"finding average in list",
1197 for (
int i=0; i < nparts; i++){
1198 buf[i] = newAvgIndex;
1201 sum = (nparts - len) * allSizes[newAvgIndex];
1203 for (
int i=0; i < len; i++){
1205 scalar_t size = sizes[i];
1210 if (size < avg && avg - size <= epsilon)
1211 index = newAvgIndex;
1213 typename ArrayRCP<scalar_t>::iterator found =
1214 std::lower_bound(sizeArray.begin(), sizeArray.end(), size);
1216 env_->localBugAssertion(__FILE__, __LINE__,
"size array",
1219 index = found - sizeArray.begin();
1223 sum += allSizes[index];
1226 for (
int i=0; i < sizeArrayLen; i++){
1227 sizeArray[i] /= sum;
1230 pCompactIndex_[widx] = partIdx;
1231 pSize_[widx] = sizeArray;
1237 tmp =
new scalar_t [nparts];
1238 env_->localMemoryAssertion(__FILE__, __LINE__, nparts, tmp);
1240 sum += ((nparts - len) * avg);
1242 for (
int i=0; i < nparts; i++){
1246 for (
int i=0; i < len; i++){
1247 tmp[ids[i]] = sizes[i]/sum;
1250 pSize_[widx] = arcp(tmp, 0, nparts);
1254 template <
typename Adapter>
1259 size_t len = partList.size();
1268 part_t lMin = (len > 0 ? std::numeric_limits<part_t>::max() : 0);
1271 for (
size_t i = 0; i < len; i++) {
1272 if (partList[i] < lMin) lMin = partList[i];
1273 if (partList[i] > lMax) lMax = partList[i];
1275 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MIN, 1, &lMin, &gMin);
1276 Teuchos::reduceAll<int, part_t>(*comm_, Teuchos::REDUCE_MAX, 1, &lMax, &gMax);
1278 nGlobalPartsSolution_ = gMax - gMin + 1;
1283 if (!onePartPerProc_){
1285 int *procs =
new int [len];
1286 env_->localMemoryAssertion(__FILE__, __LINE__, len, procs);
1287 procs_ = arcp<int>(procs, 0, len);
1290 part_t *parts = partList.getRawPtr();
1292 if (procDist_.size() > 0){
1295 for (
size_t i=0; i < len; i++){
1296 partToProcsMap(parts[i], procs[i], procId);
1301 lno_t *partCounter =
new lno_t [nGlobalPartsSolution_];
1302 env_->localMemoryAssertion(__FILE__, __LINE__, nGlobalPartsSolution_,
1305 int numProcs = comm_->getSize();
1309 memset(partCounter, 0,
sizeof(lno_t) * nGlobalPartsSolution_);
1311 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++)
1312 partCounter[parts[i]]++;
1314 lno_t *procCounter =
new lno_t [numProcs];
1315 env_->localMemoryAssertion(__FILE__, __LINE__, numProcs, procCounter);
1318 int proc2 = partDist_[0];
1320 for (
part_t part=1; part < nGlobalParts_; part++){
1322 proc2 = partDist_[part+1];
1323 int numprocs = proc2 - proc1;
1325 double dNum = partCounter[part];
1326 double dProcs = numprocs;
1329 double each = floor(dNum/dProcs);
1330 double extra = fmod(dNum,dProcs);
1332 for (
int proc=proc1, i=0; proc<proc2; proc++, i++){
1334 procCounter[proc] = lno_t(each) + 1;
1336 procCounter[proc] = lno_t(each);
1340 delete [] partCounter;
1342 for (
typename ArrayRCP<part_t>::size_type i=0; i < partList.size(); i++){
1343 if (partList[i] >= nGlobalParts_){
1346 procs[i] = comm_->getRank();
1349 part_t partNum = parts[i];
1350 proc1 = partDist_[partNum];
1351 proc2 = partDist_[partNum + 1];
1354 for (proc=proc1; proc < proc2; proc++){
1355 if (procCounter[proc] > 0){
1357 procCounter[proc]--;
1361 env_->localBugAssertion(__FILE__, __LINE__,
"part to proc",
1365 delete [] procCounter;
1375 bool doRemap =
false;
1376 const Teuchos::ParameterEntry *pe =
1377 env_->getParameters().getEntryPtr(
"remap_parts");
1378 if (pe) doRemap = pe->getValue(&doRemap);
1379 if (doRemap) RemapParts();
1381 haveSolution_ =
true;
1383 env_->memory(
"After Solution has processed algorithm's answer");
1388 template <
typename Adapter>
1390 double &numParts,
part_t &partMin,
part_t &partMax)
const
1392 if (onePartPerProc_){
1394 partMin = partMax = procId;
1396 else if (procDist_.size() > 0){
1397 partMin = procDist_[procId];
1398 partMax = procDist_[procId+1] - 1;
1399 numParts = procDist_[procId+1] - partMin;
1404 std::vector<int>::const_iterator entry;
1405 entry = std::upper_bound(partDist_.begin(), partDist_.end(), procId);
1407 size_t partIdx = entry - partDist_.begin();
1408 int numProcs = partDist_[partIdx] - partDist_[partIdx-1];
1409 partMin = partMax = int(partIdx) - 1;
1410 numParts = 1.0 / numProcs;
1414 template <
typename Adapter>
1415 void PartitioningSolution<Adapter>::partToProcsMap(
part_t partId,
1416 int &procMin,
int &procMax)
const
1418 if (partId >= nGlobalParts_){
1422 procMin = procMax = comm_->getRank();
1424 else if (onePartPerProc_){
1425 procMin = procMax = int(partId);
1427 else if (procDist_.size() > 0){
1428 if (procDistEquallySpread_) {
1430 double fProcs = comm_->getSize();
1431 double fParts = nGlobalParts_;
1432 double each = fParts / fProcs;
1433 procMin = int(partId / each);
1434 while (procDist_[procMin] > partId) procMin--;
1435 while (procDist_[procMin+1] <= partId) procMin++;
1442 typename std::vector<part_t>::const_iterator entry;
1443 entry = std::upper_bound(procDist_.begin(), procDist_.end(), partId);
1445 size_t procIdx = entry - procDist_.begin();
1446 procMin = procMax = int(procIdx) - 1;
1450 procMin = partDist_[partId];
1451 procMax = partDist_[partId+1] - 1;
1455 template <
typename Adapter>
1457 int c1,
int c2)
const
1459 if (c1 < 0 || c1 >= nWeightsPerObj_ || c2 < 0 || c2 >= nWeightsPerObj_ )
1460 throw std::logic_error(
"criteriaHaveSamePartSizes error");
1462 bool theSame =
false;
1467 else if (pSizeUniform_[c1] ==
true && pSizeUniform_[c2] ==
true)
1470 else if (pCompactIndex_[c1].size() == pCompactIndex_[c2].size()){
1472 bool useIndex = pCompactIndex_[c1].size() > 0;
1474 for (
part_t p=0; theSame && p < nGlobalParts_; p++)
1475 if (pSize_[c1][pCompactIndex_[c1][p]] !=
1476 pSize_[c2][pCompactIndex_[c2][p]])
1480 for (
part_t p=0; theSame && p < nGlobalParts_; p++)
1481 if (pSize_[c1][p] != pSize_[c2][p])
1504 template <
typename Adapter>
1506 bool doCheck,
bool haveNumLocalParts,
bool haveNumGlobalParts,
1507 int numLocalParts,
int numGlobalParts)
1510 typedef SSIZE_T ssize_t;
1512 int nprocs = comm_->getSize();
1513 ssize_t reducevals[4];
1514 ssize_t sumHaveGlobal=0, sumHaveLocal=0;
1515 ssize_t sumGlobal=0, sumLocal=0;
1516 ssize_t maxGlobal=0, maxLocal=0;
1517 ssize_t
vals[4] = {haveNumGlobalParts, haveNumLocalParts,
1518 numGlobalParts, numLocalParts};
1526 reduceAll<int, ssize_t>(*comm_, Teuchos::REDUCE_SUM, 4,
vals, reducevals);
1530 sumHaveGlobal = reducevals[0];
1531 sumHaveLocal = reducevals[1];
1532 sumGlobal = reducevals[2];
1533 sumLocal = reducevals[3];
1535 env_->localInputAssertion(__FILE__, __LINE__,
1536 "Either all procs specify num_global/local_parts or none do",
1537 (sumHaveGlobal == 0 || sumHaveGlobal == nprocs) &&
1538 (sumHaveLocal == 0 || sumHaveLocal == nprocs),
1542 if (haveNumLocalParts)
1543 sumLocal = numLocalParts * nprocs;
1544 if (haveNumGlobalParts)
1545 sumGlobal = numGlobalParts * nprocs;
1547 sumHaveGlobal = haveNumGlobalParts ? nprocs : 0;
1548 sumHaveLocal = haveNumLocalParts ? nprocs : 0;
1550 maxLocal = numLocalParts;
1551 maxGlobal = numGlobalParts;
1554 if (!haveNumLocalParts && !haveNumGlobalParts){
1555 onePartPerProc_ =
true;
1559 if (haveNumGlobalParts){
1561 vals[0] = numGlobalParts;
1562 vals[1] = numLocalParts;
1564 reduceAll<int, ssize_t>(
1565 *comm_, Teuchos::REDUCE_MAX, 2,
vals, reducevals);
1569 maxGlobal = reducevals[0];
1570 maxLocal = reducevals[1];
1572 env_->localInputAssertion(__FILE__, __LINE__,
1573 "Value for num_global_parts is different on different processes.",
1578 env_->localInputAssertion(__FILE__, __LINE__,
1579 "Sum of num_local_parts does not equal requested num_global_parts",
1582 if (sumLocal == nprocs && maxLocal == 1){
1583 onePartPerProc_ =
true;
1588 if (maxGlobal == nprocs){
1589 onePartPerProc_ =
true;
1597 if (sumHaveLocal == nprocs){
1603 procDist_.resize(nprocs+1);
1605 catch (std::exception &e){
1606 throw(std::bad_alloc());
1609 part_t *procArray = &procDist_[0];
1613 gatherAll<int, part_t>(*comm_, 1, &tmp, nprocs, procArray + 1);
1619 for (
int proc=0; proc < nprocs; proc++)
1620 procArray[proc+1] += procArray[proc];
1626 double fParts = numGlobalParts;
1627 double fProcs = nprocs;
1629 if (fParts < fProcs){
1632 partDist_.resize(
size_t(fParts+1));
1634 catch (std::exception &e){
1635 throw(std::bad_alloc());
1638 int *partArray = &partDist_[0];
1640 double each = floor(fProcs / fParts);
1641 double extra = fmod(fProcs, fParts);
1644 for (
part_t part=0; part < numGlobalParts; part++){
1645 int numOwners = int(each + ((part<extra) ? 1 : 0));
1646 partArray[part+1] = partArray[part] + numOwners;
1649 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1652 else if (fParts > fProcs){
1657 procDistEquallySpread_ =
true;
1660 procDist_.resize(
size_t(fProcs+1));
1662 catch (std::exception &e){
1663 throw(std::bad_alloc());
1666 part_t *procArray = &procDist_[0];
1668 double each = floor(fParts / fProcs);
1669 double extra = fmod(fParts, fProcs);
1672 for (
int proc=0; proc < nprocs; proc++){
1673 part_t numParts =
part_t(each + ((proc<extra) ? 1 : 0));
1674 procArray[proc+1] = procArray[proc] + numParts;
1677 env_->globalBugAssertion(__FILE__, __LINE__,
"#parts != #procs",
1681 env_->globalBugAssertion(__FILE__, __LINE__,
1699 template <
typename Adapter>
1702 size_t len = parts_.size();
1704 part_t me = comm_->getRank();
1705 int np = comm_->getSize();
1707 if (np < nGlobalParts_) {
1709 std::cout <<
"Remapping not yet supported for "
1710 <<
"num_global_parts " << nGlobalParts_
1711 <<
" > num procs " << np << std::endl;
1724 std::map<part_t, long> edges;
1729 for (
size_t i = 0; i < len; i++) {
1731 if (parts_[i] == me) lstaying++;
1734 Teuchos::reduceAll<int, long>(*comm_, Teuchos::REDUCE_SUM, 1,
1735 &lstaying, &gstaying);
1740 int nedges = edges.size();
1743 part_t tnVtx = np + nGlobalParts_;
1747 idx =
new int[tnVtx+1];
1748 sizes =
new int[np];
1752 Teuchos::gather<int, int>(&nedges, 1, sizes, 1, 0, *comm_);
1757 for (
int i = 0; i < np; i++)
1758 idx[i+1] = idx[i] + sizes[i];
1766 bufv =
new part_t[nedges];
1767 bufw =
new long[nedges];
1769 for (
typename std::map<part_t, long>::iterator it = edges.begin();
1770 it != edges.end(); it++) {
1771 bufv[cnt] = it->first;
1772 bufw[cnt] = it->second;
1783 adj =
new part_t[idx[np]];
1784 wgt =
new long[idx[np]];
1787 Teuchos::gatherv<int, part_t>(bufv, cnt, adj, sizes, idx, 0, *comm_);
1788 Teuchos::gatherv<int, long>(bufw, cnt, wgt, sizes, idx, 0, *comm_);
1799 for (
int i = 0; i < idx[np]; i++) {
1804 for (
part_t i = np; i < tnVtx; i++) {
1809 std::cout <<
"IDX ";
1810 for (
part_t i = 0; i <= tnVtx; i++) std::cout << idx[i] <<
" ";
1811 std::cout << std::endl;
1813 std::cout <<
"ADJ ";
1814 for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << adj[i] <<
" ";
1815 std::cout << std::endl;
1817 std::cout <<
"WGT ";
1818 for (
part_t i = 0; i < idx[tnVtx]; i++) std::cout << wgt[i] <<
" ";
1819 std::cout << std::endl;
1824 for (
part_t i = 0; i < tnVtx; i++) match[i] = i;
1826 Zoltan2::GreedyMWM<part_t, long>(idx, adj, wgt, tnVtx, match);
1829 std::cout <<
"After matching: " << nmatches <<
" found" << std::endl;
1830 for (
part_t i = 0; i < tnVtx; i++)
1831 std::cout <<
"match[" << i <<
"] = " << match[i]
1832 << ((match[i] != i &&
1833 (i < np && match[i] != i+np))
1839 bool nontrivial =
false;
1841 for (
part_t i = 0; i < np; i++) {
1842 if ((match[i] != i) && (match[i] != (i+np))) {
1851 remap =
new part_t[nGlobalParts_];
1852 for (
part_t i = 0; i < nGlobalParts_; i++) remap[i] = -1;
1854 bool *used =
new bool[np];
1855 for (
part_t i = 0; i < np; i++) used[i] =
false;
1858 for (
part_t i = 0; i < nGlobalParts_; i++) {
1860 if (match[tmp] != tmp) {
1861 remap[i] = match[tmp];
1862 used[match[tmp]] =
true;
1867 for (
part_t i = 0; i < nGlobalParts_; i++) {
1868 if (remap[i] > -1)
continue;
1876 for (
part_t i = 0, uidx = 0; i < nGlobalParts_; i++) {
1877 if (remap[i] > -1)
continue;
1878 while (used[uidx]) uidx++;
1887 std::cout <<
"Remap vector: ";
1888 for (
part_t i = 0; i < nGlobalParts_; i++) std::cout << remap[i] <<
" ";
1889 std::cout << std::endl;
1892 long newgstaying = measure_stays(remap, idx, adj, wgt,
1894 doRemap = (newgstaying > gstaying);
1895 std::cout <<
"gstaying " << gstaying <<
" measure(input) "
1896 << measure_stays(NULL, idx, adj, wgt, nGlobalParts_, np)
1897 <<
" newgstaying " << newgstaying
1898 <<
" nontrivial " << nontrivial
1899 <<
" doRemap " << doRemap << std::endl;
1906 Teuchos::broadcast<int, int>(*comm_, 0, 1, &doRemap);
1909 if (me != 0) remap =
new part_t[nGlobalParts_];
1910 Teuchos::broadcast<int, part_t>(*comm_, 0, nGlobalParts_, remap);
1911 for (
size_t i = 0; i < len; i++) {
1912 parts_[i] = remap[parts_[i]];
const int * getProcListView() const
Returns the process list corresponding to the global ID list.
scalar_t getCriteriaPartSize(int idx, part_t part) const
Get the size for a given weight index and a given part.
Defines the Solution base class.
fast typical checks for valid arguments
#define Z2_FORWARD_EXCEPTIONS
Forward an exception back through call stack.
part_t pointAssign(int dim, scalar_t *point) const
Return the part overlapping a given point in space;.
void RemapParts()
Remap a new partition for maximum overlap with an input partition.
Just a placeholder for now.
more involved, like validate a graph
size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
virtual bool isPartitioningTreeBinary() const
calculate if partition tree is binary.
void getPartsForProc(int procId, double &numParts, part_t &partMin, part_t &partMax) const
Get the parts belonging to a process.
void getProcsForPart(part_t partId, part_t &procMin, part_t &procMax) const
Get the processes containing a part.
sub-steps, each method's entry and exit
bool oneToOnePartDistribution() const
Is the part-to-process distribution is one-to-one.
SparseMatrixAdapter_t::part_t part_t
long measure_stays(part_t *remap, int *idx, part_t *adj, long *wgt, part_t nrhs, part_t nlhs)
scalar_t getLocalFractionOfPart() const
If parts are divided across processes, return the fraction of a part on this process.
A PartitioningSolution is a solution to a partitioning problem.
const RCP< const Comm< int > > & getCommunicator() const
Return the communicator associated with the solution.
void boxAssign(int dim, scalar_t *lower, scalar_t *upper, size_t &nPartsFound, part_t **partsFound) const
Return an array of all parts overlapping a given box in space.
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
PartitioningSolution(const RCP< const Environment > &env, const RCP< const Comm< int > > &comm, int nUserWeights, const RCP< Algorithm< Adapter > > &algorithm=Teuchos::null)
Constructor when part sizes are not supplied.
Algorithm defines the base class for all algorithms.
void getPartitionTree(part_t &numTreeVerts, std::vector< part_t > &permPartNums, std::vector< part_t > &splitRangeBeg, std::vector< part_t > &splitRangeEnd, std::vector< part_t > &treeVertParents) const
get the partition tree - fill the relevant arrays
Greedy Maximal Weight Matching.
static const std::string fail
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
std::vector< Zoltan2::coordinateModelPartBox > & getPartBoxesView() const
returns the part box boundary list.
Defines the Environment class.
const part_t * getProcDistribution() const
Return a distribution by process.
void getCommunicationGraph(ArrayRCP< part_t > &comXAdj, ArrayRCP< part_t > &comAdj) const
returns communication graph resulting from geometric partitioning.
int getNumberOfCriteria() const
Get the number of criteria (object weights)
const int * getPartDistribution() const
Return a distribution by part.
#define Z2_THROW_OUTSIDE_ERROR(env)
Throw an error returned from outside the Zoltan2 library.
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
bool criteriaHasUniformPartSizes(int idx) const
Determine if balancing criteria has uniform part sizes. (User can specify differing part sizes...
bool criteriaHaveSamePartSizes(int c1, int c2) const
Return true if the two weight indices have the same part size information.
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
const RCP< const Environment > & getEnvironment() const
Return the environment associated with the solution.
Zoltan2::BasicUserTypes< zscalar_t, zlno_t, zgno_t > user_t