10 #include "Teko_BlockingTpetra.hpp"
12 #include "Tpetra_Details_makeColMap_decl.hpp"
14 #include "Tpetra_Vector.hpp"
15 #include "Tpetra_Map.hpp"
16 #include "Kokkos_StdAlgorithms.hpp"
17 #include "Kokkos_Sort.hpp"
18 #include "Kokkos_NestedSort.hpp"
19 #include "KokkosSparse_SortCrs.hpp"
25 namespace TpetraHelpers {
41 const MapPair buildSubMap(
const std::vector<GO>& gid,
const Teuchos::Comm<int>& comm) {
42 using GST = Tpetra::global_size_t;
43 const GST invalid = Teuchos::OrdinalTraits<GST>::invalid();
44 Teuchos::RCP<Tpetra::Map<LO, GO, NT>> gidMap = rcp(
45 new Tpetra::Map<LO, GO, NT>(invalid, Teuchos::ArrayView<const GO>(gid), 0, rcpFromRef(comm)));
46 Teuchos::RCP<Tpetra::Map<LO, GO, NT>> contigMap =
47 rcp(
new Tpetra::Map<LO, GO, NT>(invalid, gid.size(), 0, rcpFromRef(comm)));
49 return std::make_pair(gidMap, contigMap);
61 const ImExPair buildExportImport(
const Tpetra::Map<LO, GO, NT>& baseMap,
const MapPair& maps) {
62 return std::make_pair(rcp(
new Tpetra::Import<LO, GO, NT>(rcpFromRef(baseMap), maps.first)),
63 rcp(
new Tpetra::Export<LO, GO, NT>(maps.first, rcpFromRef(baseMap))));
73 void buildSubVectors(
const std::vector<MapPair>& maps,
74 std::vector<RCP<Tpetra::MultiVector<ST, LO, GO, NT>>>& vectors,
int count) {
75 std::vector<MapPair>::const_iterator mapItr;
78 for (mapItr = maps.begin(); mapItr != maps.end(); mapItr++) {
80 RCP<Tpetra::MultiVector<ST, LO, GO, NT>> mv =
81 rcp(
new Tpetra::MultiVector<ST, LO, GO, NT>((*mapItr).second, count));
82 vectors.push_back(mv);
93 void one2many(std::vector<RCP<Tpetra::MultiVector<ST, LO, GO, NT>>>& many,
94 const Tpetra::MultiVector<ST, LO, GO, NT>& single,
95 const std::vector<RCP<Tpetra::Import<LO, GO, NT>>>& subImport) {
96 std::vector<RCP<Tpetra::MultiVector<ST, LO, GO, NT>>>::const_iterator vecItr;
97 std::vector<RCP<Tpetra::Import<LO, GO, NT>>>::const_iterator impItr;
100 for (vecItr = many.begin(), impItr = subImport.begin(); vecItr != many.end();
101 ++vecItr, ++impItr) {
103 RCP<Tpetra::MultiVector<ST, LO, GO, NT>> destVec = *vecItr;
106 const Tpetra::Map<LO, GO, NT>& globalMap = *(*impItr)->getTargetMap();
109 GO lda = destVec->getStride();
110 GO destSize = destVec->getGlobalLength() * destVec->getNumVectors();
111 std::vector<ST> destArray(destSize);
112 Teuchos::ArrayView<ST> destVals(destArray);
113 destVec->get1dCopy(destVals, lda);
114 Tpetra::MultiVector<ST, LO, GO, NT> importVector(rcpFromRef(globalMap), destVals, lda,
115 destVec->getNumVectors());
118 importVector.doImport(single, **impItr, Tpetra::INSERT);
120 Tpetra::Import<LO, GO, NT> importer(destVec->getMap(), destVec->getMap());
121 importVector.replaceMap(destVec->getMap());
122 destVec->doExport(importVector, importer, Tpetra::INSERT);
136 void many2one(Tpetra::MultiVector<ST, LO, GO, NT>& one,
137 const std::vector<RCP<
const Tpetra::MultiVector<ST, LO, GO, NT>>>& many,
138 const std::vector<RCP<Tpetra::Export<LO, GO, NT>>>& subExport) {
139 std::vector<RCP<const Tpetra::MultiVector<ST, LO, GO, NT>>>::const_iterator vecItr;
140 std::vector<RCP<Tpetra::Export<LO, GO, NT>>>::const_iterator expItr;
143 for (vecItr = many.begin(), expItr = subExport.begin(); vecItr != many.end();
144 ++vecItr, ++expItr) {
146 RCP<const Tpetra::MultiVector<ST, LO, GO, NT>> srcVec = *vecItr;
149 const Tpetra::Map<LO, GO, NT>& globalMap = *(*expItr)->getSourceMap();
152 GO lda = srcVec->getStride();
153 GO srcSize = srcVec->getGlobalLength() * srcVec->getNumVectors();
154 std::vector<ST> srcArray(srcSize);
155 Teuchos::ArrayView<ST> srcVals(srcArray);
156 srcVec->get1dCopy(srcVals, lda);
157 Tpetra::MultiVector<ST, LO, GO, NT> exportVector(rcpFromRef(globalMap), srcVals, lda,
158 srcVec->getNumVectors());
161 one.doExport(exportVector, **expItr, Tpetra::INSERT);
170 RCP<Tpetra::Vector<GO, LO, GO, NT>> getSubBlockColumnGIDs(
171 const Tpetra::CrsMatrix<ST, LO, GO, NT>& A,
const MapPair& mapPair) {
172 RCP<const Tpetra::Map<LO, GO, NT>> blkGIDMap = mapPair.first;
173 RCP<const Tpetra::Map<LO, GO, NT>> blkContigMap = mapPair.second;
176 Tpetra::Vector<GO, LO, GO, NT> rIndex(A.getRowMap(),
true);
177 const auto invalidLO = Teuchos::OrdinalTraits<LO>::invalid();
178 for (
size_t i = 0; i < A.getLocalNumRows(); i++) {
180 LO lid = blkGIDMap->getLocalElement(
181 A.getRowMap()->getGlobalElement(i));
182 if (lid != invalidLO)
183 rIndex.replaceLocalValue(i, blkContigMap->getGlobalElement(lid));
185 rIndex.replaceLocalValue(i, invalidLO);
190 Tpetra::Import<LO, GO, NT>
import(A.getRowMap(), A.getColMap());
191 RCP<Tpetra::Vector<GO, LO, GO, NT>> cIndex =
192 rcp(
new Tpetra::Vector<GO, LO, GO, NT>(A.getColMap(),
true));
193 cIndex->doImport(rIndex,
import, Tpetra::INSERT);
198 RCP<Tpetra::CrsMatrix<ST, LO, GO, NT>> buildSubBlock(
199 int i,
int j,
const RCP<
const Tpetra::CrsMatrix<ST, LO, GO, NT>>& A,
200 const std::vector<MapPair>& subMaps,
201 Teuchos::RCP<Tpetra::Vector<GO, LO, GO, NT>> plocal2ContigGIDs) {
203 int numVarFamily = subMaps.size();
205 TEUCHOS_ASSERT(i >= 0 && i < numVarFamily);
206 TEUCHOS_ASSERT(j >= 0 && j < numVarFamily);
207 const RCP<const Tpetra::Map<LO, GO, NT>> gRowMap = subMaps[i].first;
208 const RCP<const Tpetra::Map<LO, GO, NT>> rangeMap = subMaps[i].second;
209 const RCP<const Tpetra::Map<LO, GO, NT>> domainMap = subMaps[j].second;
210 const RCP<const Tpetra::Map<LO, GO, NT>> rowMap = rangeMap;
212 if (!plocal2ContigGIDs) {
213 plocal2ContigGIDs = Blocking::getSubBlockColumnGIDs(*A, subMaps[j]);
215 Tpetra::Vector<GO, LO, GO, NT>& local2ContigGIDs = *plocal2ContigGIDs;
218 LO numMyRows = rangeMap->getLocalNumElements();
219 const auto invalidLO = Teuchos::OrdinalTraits<LO>::invalid();
221 auto nEntriesPerRow =
222 Kokkos::View<LO*>(Kokkos::ViewAllocateWithoutInitializing(
"nEntriesPerRow"), numMyRows);
223 Kokkos::deep_copy(nEntriesPerRow, invalidLO);
225 using local_matrix_type = Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type;
226 using row_map_type = local_matrix_type::row_map_type::non_const_type;
227 using values_type = local_matrix_type::values_type::non_const_type;
228 using index_type = local_matrix_type::index_type::non_const_type;
229 auto prefixSumEntriesPerRow = row_map_type(
230 Kokkos::ViewAllocateWithoutInitializing(
"prefixSumEntriesPerRow"), numMyRows + 1);
232 const auto invalid = Teuchos::OrdinalTraits<GO>::invalid();
234 auto A_dev = A->getLocalMatrixDevice();
235 auto gRowMap_dev = gRowMap->getLocalMap();
236 auto A_rowmap_dev = A->getRowMap()->getLocalMap();
237 auto data = local2ContigGIDs.getLocalViewDevice(Tpetra::Access::ReadOnly);
239 using matrix_execution_space =
240 typename Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type::execution_space;
242 LO totalNumOwnedCols = 0;
243 Kokkos::parallel_scan(
244 Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(0, numMyRows),
245 KOKKOS_LAMBDA(
const LO localRow, LO& sumNumEntries,
bool finalPass) {
246 auto numOwnedCols = nEntriesPerRow(localRow);
248 if (numOwnedCols == invalidLO) {
249 GO globalRow = gRowMap_dev.getGlobalElement(localRow);
250 LO lid = A_rowmap_dev.getLocalElement(globalRow);
251 const auto sparseRowView = A_dev.row(lid);
254 for (
auto localCol = 0; localCol < sparseRowView.length; localCol++) {
255 GO gid = data(sparseRowView.colidx(localCol), 0);
256 if (gid == invalid)
continue;
259 nEntriesPerRow(localRow) = numOwnedCols;
263 prefixSumEntriesPerRow(localRow) = sumNumEntries;
264 if (localRow == (numMyRows - 1)) {
265 prefixSumEntriesPerRow(numMyRows) = prefixSumEntriesPerRow(localRow) + numOwnedCols;
268 sumNumEntries += numOwnedCols;
272 using device_type =
typename NT::device_type;
273 auto columnIndices = Kokkos::View<GO*, device_type>(
274 Kokkos::ViewAllocateWithoutInitializing(
"columnIndices"), totalNumOwnedCols);
275 auto values = values_type(Kokkos::ViewAllocateWithoutInitializing(
"values"), totalNumOwnedCols);
277 LO maxNumEntriesSubblock = 0;
278 Kokkos::parallel_reduce(
279 Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(0, numMyRows),
280 KOKKOS_LAMBDA(
const LO localRow, LO& maxNumEntries) {
281 GO globalRow = gRowMap_dev.getGlobalElement(localRow);
282 LO lid = A_rowmap_dev.getLocalElement(globalRow);
283 const auto sparseRowView = A_dev.row(lid);
286 LO colIdStart = prefixSumEntriesPerRow[localRow];
287 for (
auto localCol = 0; localCol < sparseRowView.length; localCol++) {
288 GO gid = data(sparseRowView.colidx(localCol), 0);
289 if (gid == invalid)
continue;
290 auto value = sparseRowView.value(localCol);
291 columnIndices(colId + colIdStart) = gid;
292 values(colId + colIdStart) = value;
295 const auto numOwnedCols = nEntriesPerRow(localRow);
296 maxNumEntries = Kokkos::max(maxNumEntries, numOwnedCols);
298 Kokkos::Max<LO>(maxNumEntriesSubblock));
300 Teuchos::RCP<const Tpetra::Map<LO, GO, NT>> colMap;
301 Tpetra::Details::makeColMap<LO, GO, NT>(colMap, domainMap, columnIndices);
302 TEUCHOS_ASSERT(colMap);
304 auto colMap_dev = colMap->getLocalMap();
305 auto localColumnIndices =
306 index_type(Kokkos::ViewAllocateWithoutInitializing(
"localColumnIndices"), totalNumOwnedCols);
307 Kokkos::parallel_for(
308 Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(
309 0, totalNumOwnedCols),
310 KOKKOS_LAMBDA(
const LO index) {
311 localColumnIndices(index) = colMap_dev.getLocalElement(columnIndices(index));
314 KokkosSparse::sort_crs_matrix<matrix_execution_space, row_map_type, index_type, values_type>(
315 prefixSumEntriesPerRow, localColumnIndices, values);
317 auto lcl_mat = Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type(
318 "localMat", numMyRows, maxNumEntriesSubblock, totalNumOwnedCols, values,
319 prefixSumEntriesPerRow, localColumnIndices);
321 RCP<Tpetra::CrsMatrix<ST, LO, GO, NT>> mat =
322 rcp(
new Tpetra::CrsMatrix<ST, LO, GO, NT>(lcl_mat, rowMap, colMap, domainMap, rangeMap));
327 void rebuildSubBlock(
int i,
int j,
const RCP<
const Tpetra::CrsMatrix<ST, LO, GO, NT>>& A,
328 const std::vector<MapPair>& subMaps, Tpetra::CrsMatrix<ST, LO, GO, NT>& mat,
329 Teuchos::RCP<Tpetra::Vector<GO, LO, GO, NT>> plocal2ContigGIDs) {
331 int numVarFamily = subMaps.size();
333 TEUCHOS_ASSERT(i >= 0 && i < numVarFamily);
334 TEUCHOS_ASSERT(j >= 0 && j < numVarFamily);
336 const Tpetra::Map<LO, GO, NT>& gRowMap = *subMaps[i].first;
337 const Tpetra::Map<LO, GO, NT>& rangeMap = *subMaps[i].second;
338 const Tpetra::Map<LO, GO, NT>& domainMap = *subMaps[j].second;
339 const Tpetra::Map<LO, GO, NT>& rowMap = *subMaps[i].second;
340 const auto& colMap = mat.getColMap();
342 if (!plocal2ContigGIDs) {
343 plocal2ContigGIDs = Blocking::getSubBlockColumnGIDs(*A, subMaps[j]);
345 Tpetra::Vector<GO, LO, GO, NT>& local2ContigGIDs = *plocal2ContigGIDs;
348 mat.setAllToScalar(0.0);
350 LO numMyRows = rowMap.getLocalNumElements();
351 using matrix_execution_space =
352 typename Tpetra::CrsMatrix<ST, LO, GO, NT>::local_matrix_device_type::execution_space;
353 auto A_dev = A->getLocalMatrixDevice();
354 auto mat_dev = mat.getLocalMatrixDevice();
355 auto gRowMap_dev = gRowMap.getLocalMap();
356 auto A_rowmap_dev = A->getRowMap()->getLocalMap();
357 auto colMap_dev = colMap->getLocalMap();
358 auto data = local2ContigGIDs.getLocalViewDevice(Tpetra::Access::ReadOnly);
359 const auto invalid = Teuchos::OrdinalTraits<GO>::invalid();
361 Kokkos::parallel_for(
362 Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>, matrix_execution_space>(0, numMyRows),
363 KOKKOS_LAMBDA(
const LO localRow) {
364 GO globalRow = gRowMap_dev.getGlobalElement(localRow);
365 LO lid = A_rowmap_dev.getLocalElement(globalRow);
366 const auto sparseRowView = A_dev.row(lid);
368 for (
auto localCol = 0; localCol < sparseRowView.length; localCol++) {
369 GO gid = data(sparseRowView.colidx(localCol), 0);
370 if (gid == invalid)
continue;
372 auto lidCol = colMap_dev.getLocalElement(gid);
373 auto value = sparseRowView.value(localCol);
374 mat_dev.sumIntoValues(localRow, &lidCol, 1, &value,
true,
false);
378 mat.fillComplete(rcpFromRef(domainMap), rcpFromRef(rangeMap));