Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MatrixMarket_TpetraNew.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef __MATRIXMARKET_TPETRA_NEW_HPP
11 #define __MATRIXMARKET_TPETRA_NEW_HPP
12 
14 // Functions to read a MatrixMarket file and load it into a matrix.
15 // Adapted from
16 // Trilinos/packages/epetraext/src/inout/EpetraExt_CrsMatrixIn.cpp
17 // Modified by Jon Berry and Karen Devine to make matrix reallocations
18 // more efficient; rather than insert each non-zero separately, we
19 // collect rows of non-zeros for insertion.
20 // Modified by Karen Devine and Steve Plimpton to prevent all processors
21 // from reading the same file at the same time (ouch!); chunks of the file
22 // are read and broadcast by processor zero; each processor then extracts
23 // its nonzeros from the chunk, sorts them by row, and inserts them into
24 // the matrix.
25 // The variable "chunkSize" can be changed to modify the size of the chunks
26 // read from the file. Larger values of chunkSize lead to faster execution
27 // and greater memory use.
29 
30 private:
31 template <typename global_ordinal_type, typename scalar_type>
32 static Teuchos::RCP<Distribution<global_ordinal_type, scalar_type> >
33 buildDistribution(
34  std::string &distribution, // string indicating whether to use 1D, 2D or
35  // file-based matrix distribution
36  size_t nRow, // Number of global matrix rows
37  size_t nCol, // Number of global matrix rows
38  const Teuchos::ParameterList &params, // Parameters to the file reading
39  const Teuchos::RCP<const Teuchos::Comm<int> > &comm
40  // communicator to be used in maps
41 ) {
42  // Function to build the sets of GIDs for 1D or 2D matrix distribution.
43  // Output is a Distribution object.
44 
45  int me = comm->getRank();
46 
47  using basedist_t = Distribution<global_ordinal_type, scalar_type>;
48  basedist_t *retval;
49 
50  bool randomize = false; // Randomly permute rows and columns before storing
51  {
52  const Teuchos::ParameterEntry *pe = params.getEntryPtr("randomize");
53  if (pe != NULL)
54  randomize = pe->getValue<bool>(&randomize);
55  }
56 
57  std::string partitionFile = ""; // File to reassign rows to parts
58  {
59  const Teuchos::ParameterEntry *pe = params.getEntryPtr("partitionFile");
60  if (pe != NULL)
61  partitionFile = pe->getValue<std::string>(&partitionFile);
62  }
63 
64  if (distribution == "2D") { // Generate 2D distribution
65  if (partitionFile != "") {
66  // Generate 2D distribution with vector partition file
67  TEUCHOS_TEST_FOR_EXCEPTION(randomize, std::logic_error,
68  "Randomization not available for 2DVec distributions.");
69  Distribution2DVec<global_ordinal_type, scalar_type> *dist =
70  new Distribution2DVec<global_ordinal_type, scalar_type>(
71  nRow, comm, params, partitionFile);
72  retval = dynamic_cast<basedist_t *>(dist);
73  } else if (randomize) {
74  // Randomly assign rows/columns to processors
75  Distribution2DRandom<global_ordinal_type, scalar_type> *dist =
76  new Distribution2DRandom<global_ordinal_type, scalar_type>(
77  nRow, comm, params);
78  retval = dynamic_cast<basedist_t *>(dist);
79  } else {
80  // The vector will be distributed linearly, with extra vector entries
81  // spread among processors to maintain balance in rows and columns.
82  Distribution2DLinear<global_ordinal_type, scalar_type> *dist =
83  new Distribution2DLinear<global_ordinal_type, scalar_type>(
84  nRow, comm, params);
85  retval = dynamic_cast<basedist_t *>(dist);
86  }
87  } else if (distribution == "1D") {
88  if (partitionFile != "") {
89  // Generate 1D row-based distribution with vector partition file
90  Distribution1DVec<global_ordinal_type, scalar_type> *dist =
91  new Distribution1DVec<global_ordinal_type, scalar_type>(
92  nRow, comm, params, partitionFile);
93  retval = dynamic_cast<basedist_t *>(dist);
94  } else if (randomize) {
95  // Randomly assign rows to processors.
96  Distribution1DRandom<global_ordinal_type, scalar_type> *dist =
97  new Distribution1DRandom<global_ordinal_type, scalar_type>(
98  nRow, comm, params);
99  retval = dynamic_cast<basedist_t *>(dist);
100  } else {
101  // Linear map similar to Trilinos default.
102  Distribution1DLinear<global_ordinal_type, scalar_type> *dist =
103  new Distribution1DLinear<global_ordinal_type, scalar_type>(
104  nRow, comm, params);
105  retval = dynamic_cast<basedist_t *>(dist);
106  }
107  } else if (distribution == "LowerTriangularBlock") {
108  if (randomize && me == 0) {
109  std::cout << "WARNING: Randomization not available for "
110  << "LowerTriangularBlock distributions." << std::endl;
111  }
112 
113  DistributionLowerTriangularBlock<global_ordinal_type, scalar_type> *dist =
114  new DistributionLowerTriangularBlock<global_ordinal_type, scalar_type>(
115  nRow, comm, params);
116  retval = dynamic_cast<basedist_t *>(dist);
117  } else if (distribution == "MMFile") {
118  // Generate user-defined distribution from Matrix-Market file
119  if (randomize && me == 0) {
120  std::cout << "WARNING: Randomization not available for MMFile "
121  << "distributions." << std::endl;
122  }
123  DistributionMMFile<global_ordinal_type, scalar_type> *dist =
124  new DistributionMMFile<global_ordinal_type, scalar_type>(
125  nRow, comm, params);
126  retval = dynamic_cast<basedist_t *>(dist);
127  }
128 
129  else {
130  std::cout << "ERROR: Invalid distribution method " << distribution
131  << std::endl;
132  exit(-1);
133  }
134  return Teuchos::rcp<basedist_t>(retval);
135 }
136 
137 static void
138 readMatrixMarket(
139  const std::string &filename, // MatrixMarket file to read
140  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
141  const Teuchos::ParameterList &params,
142  size_t &nRow,
143  size_t &nCol,
144  typename Distribution<global_ordinal_type, scalar_type>::LocalNZmap_t &localNZ,
145  Teuchos::RCP<Distribution<global_ordinal_type, scalar_type> > &dist) {
146  bool useTimers = false; // should we time various parts of the reader?
147  {
148  const Teuchos::ParameterEntry *pe = params.getEntryPtr("useTimers");
149  if (pe != NULL)
150  useTimers = pe->getValue<bool>(&useTimers);
151  }
152 
153  Teuchos::RCP<Teuchos::TimeMonitor> timer = Teuchos::null;
154  if (useTimers)
155  timer = rcp(new Teuchos::TimeMonitor(
156  *Teuchos::TimeMonitor::getNewTimer("RMM parameterSetup")));
157 
158  int me = comm->getRank();
159 
160  bool verbose = false; // Print status as reading
161  {
162  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
163  if (pe != NULL)
164  verbose = pe->getValue<bool>(&verbose);
165  }
166 
167  size_t chunkSize = 500000; // Number of lines to read / broadcast at once
168  {
169  const Teuchos::ParameterEntry *pe = params.getEntryPtr("chunkSize");
170  if (pe != NULL)
171  chunkSize = pe->getValue<size_t>(&chunkSize);
172  }
173 
174  bool symmetrize = false; // Symmetrize the matrix
175  {
176  const Teuchos::ParameterEntry *pe = params.getEntryPtr("symmetrize");
177  if (pe != NULL)
178  symmetrize = pe->getValue<bool>(&symmetrize);
179  }
180 
181  bool transpose = false; // Store the transpose
182  {
183  const Teuchos::ParameterEntry *pe = params.getEntryPtr("transpose");
184  if (pe != NULL)
185  transpose = pe->getValue<bool>(&transpose);
186  }
187 
188  std::string diagonal = ""; // Are diagonal entries required (so add them)
189  // or ignored (so delete them) or neither?
190  // Default is neither.
191  {
192  const Teuchos::ParameterEntry *pe = params.getEntryPtr("diagonal");
193  if (pe != NULL)
194  diagonal = pe->getValue<std::string>(&diagonal);
195  }
196  bool ignoreDiagonal = (diagonal == "exclude");
197  bool requireDiagonal = (diagonal == "require");
198 
199  std::string distribution = "1D"; // Default distribution is 1D row-based
200  {
201  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
202  if (pe != NULL)
203  distribution = pe->getValue<std::string>(&distribution);
204  }
205 
206  if (useTimers) {
207  timer = Teuchos::null;
208  timer = rcp(new Teuchos::TimeMonitor(
209  *Teuchos::TimeMonitor::getNewTimer("RMM header")));
210  }
211 
212  if (verbose && me == 0)
213  std::cout << "Reading matrix market file... " << filename << std::endl;
214 
215  FILE *fp = NULL;
216  size_t dim[3] = {0, 0, 0}; // nRow, nCol, nNz as read from MatrixMarket
217  MM_typecode mmcode;
218 
219  if (me == 0) {
220  fp = fopen(filename.c_str(), "r");
221 
222  if (fp == NULL) {
223  std::cout << "Error: cannot open file " << filename << std::endl;
224  } else {
225  // Read MatrixMarket banner to get type of matrix
226  if (mm_read_banner(fp, &mmcode) != 0) {
227  std::cout << "Error: bad MatrixMarket banner " << std::endl;
228  } else {
229  // Error check for file types that this function can read
230  if (!mm_is_valid(mmcode) ||
231  mm_is_dense(mmcode) || mm_is_array(mmcode) ||
232  mm_is_complex(mmcode) ||
233  mm_is_skew(mmcode) || mm_is_hermitian(mmcode)) {
234  std::cout << "Error: matrix type is not supported by this reader\n "
235  << "This reader supports sparse, coordinate, "
236  << "real, pattern, integer, symmetric, general"
237  << std::endl;
238  } else {
239  // Read nRow, nCol, nNz from MatrixMarket file
240  if (mm_read_mtx_crd_size(fp, &dim[0], &dim[1], &dim[2]) != 0) {
241  std::cout << "Error: bad matrix dimensions " << std::endl;
242  dim[0] = dim[1] = dim[2] = 0;
243  }
244  }
245  }
246  }
247  }
248 
249  // Broadcast relevant info
250  // Bad input if dim[0] or dim[1] still is zero after broadcast;
251  // all procs throw together
252  Teuchos::broadcast<int, size_t>(*comm, 0, 3, dim);
253  if (dim[0] == 0 || dim[1] == 0) {
254  throw std::runtime_error("Error: bad matrix header information");
255  }
256  Teuchos::broadcast<int, char>(*comm, 0, sizeof(MM_typecode), mmcode);
257 
258  nRow = dim[0];
259  nCol = dim[1];
260 
261  TEUCHOS_TEST_FOR_EXCEPTION(nRow != nCol, std::logic_error,
262  "This overload of readSparseFile requires nRow == nCol "
263  << "(nRow = " << nRow << ", nCol = " << nCol << "); "
264  << "For now, use a different overload of readSparseFile until this "
265  << "overload is fixed to read rectangular matrices. "
266  << "See Trilinos github issues #7045 and #8472.");
267 
268  size_t nNz = dim[2];
269  bool patternInput = mm_is_pattern(mmcode);
270  bool symmetricInput = mm_is_symmetric(mmcode);
271  if (symmetricInput) symmetrize = true;
272 
273  if (verbose && me == 0)
274  std::cout << "Matrix market file... "
275  << "\n symmetrize = " << symmetrize
276  << "\n transpose = " << transpose
277  << "\n change diagonal = " << diagonal
278  << "\n distribution = " << distribution
279  << std::endl;
280 
281  if (useTimers) {
282  timer = Teuchos::null;
283  timer = rcp(new Teuchos::TimeMonitor(
284  *Teuchos::TimeMonitor::getNewTimer("RMM distribution")));
285  }
286 
287  // Create distribution based on nRow, nCol, npRow, npCol
288  dist = buildDistribution<global_ordinal_type, scalar_type>(distribution,
289  nRow, nCol, params,
290  comm);
291  if (useTimers) {
292  timer = Teuchos::null;
293  timer = rcp(new Teuchos::TimeMonitor(
294  *Teuchos::TimeMonitor::getNewTimer("RMM readChunks")));
295  }
296 
297  std::set<global_ordinal_type> diagset;
298  // If diagonal == require, this set keeps track of
299  // which diagonal entries already existing so we can
300  // add those that don't
301 
302  using nzindex_t =
303  typename Distribution<global_ordinal_type, scalar_type>::NZindex_t;
304 
305  // Chunk information and buffers
306  const int maxLineLength = 81;
307  char *buffer = new char[chunkSize * maxLineLength + 1];
308  size_t nChunk;
309  size_t nMillion = 0;
310  size_t nRead = 0;
311  size_t rlen;
312 
313  auto timerRead = Teuchos::TimeMonitor::getNewTimer("RMM readNonzeros");
314  auto timerSelect = Teuchos::TimeMonitor::getNewTimer("RMM selectNonzeros");
315  // Read chunks until the entire file is read
316  Teuchos::RCP<Teuchos::TimeMonitor> innerTimer = Teuchos::null;
317  while (nRead < nNz) {
318  innerTimer = rcp(new Teuchos::TimeMonitor(*timerRead));
319 
320  if (nNz - nRead > chunkSize)
321  nChunk = chunkSize;
322  else
323  nChunk = (nNz - nRead);
324 
325  // Processor 0 reads a chunk
326  if (me == 0) {
327  char *eof;
328  rlen = 0;
329  for (size_t i = 0; i < nChunk; i++) {
330  eof = fgets(&buffer[rlen], maxLineLength, fp);
331  if (eof == NULL) {
332  std::cout << "Unexpected end of matrix file." << std::endl;
333  std::cout.flush();
334  exit(-1);
335  }
336  rlen += strlen(&buffer[rlen]);
337  }
338  buffer[rlen++] = '\n';
339  }
340 
341  // Processor 0 broadcasts the chunk
342  Teuchos::broadcast<int, size_t>(*comm, 0, 1, &rlen);
343  Teuchos::broadcast<int, char>(*comm, 0, rlen, buffer);
344 
345  buffer[rlen++] = '\0';
346  nRead += nChunk;
347 
348  innerTimer = Teuchos::null;
349  innerTimer = rcp(new Teuchos::TimeMonitor(*timerSelect));
350 
351  // All processors check the received data, saving nonzeros belonging to them
352  char *lineptr = buffer;
353  for (rlen = 0; rlen < nChunk; rlen++) {
354  char *next = strchr(lineptr, '\n');
355  global_ordinal_type I = atol(strtok(lineptr, " \t\n")) - 1; // since MatrixMarket files are one-based
356  global_ordinal_type J = atol(strtok(NULL, " \t\n")) - 1; // since MatrixMarket files are one-based
357 
358  scalar_type V = (patternInput ? -1. : atof(strtok(NULL, " \t\n")));
359  lineptr = next + 1;
360 
361  // Special processing of nonzero
362  if ((I == J) && ignoreDiagonal) continue;
363 
364  if (transpose) std::swap<global_ordinal_type>(I, J);
365 
366  // Add nonzero (I,J) to the map if it should be on this processor
367  // Some file-based distributions have processor assignment stored as
368  // the non-zero's value, so pass the value to Mine.
369  if (dist->Mine(I, J, int(V))) {
370  nzindex_t idx = std::make_pair(I, J);
371  localNZ[idx] = V;
372  if (requireDiagonal && (I == J)) diagset.insert(I);
373  }
374 
375  // If symmetrizing, add (J,I) to the map if it should be on this processor
376  // Some file-based distributions have processor assignment stored as
377  // the non-zero's value, so pass the value to Mine.
378  if (symmetrize && (I != J) && dist->Mine(J, I, int(V))) {
379  // Add entry (J, I) if need to symmetrize
380  // This processor keeps this non-zero.
381  nzindex_t idx = std::make_pair(J, I);
382  localNZ[idx] = V;
383  }
384  }
385 
386  // Status check
387  if (verbose) {
388  if (nRead / 1000000 > nMillion) {
389  nMillion++;
390  if (me == 0) std::cout << nMillion << "M ";
391  }
392  }
393 
394  innerTimer = Teuchos::null;
395  }
396 
397  if (verbose && me == 0)
398  std::cout << std::endl
399  << nRead << " nonzeros read " << std::endl;
400 
401  if (fp != NULL) fclose(fp);
402  delete[] buffer;
403 
404  if (useTimers) {
405  timer = Teuchos::null;
406  timer = rcp(new Teuchos::TimeMonitor(
407  *Teuchos::TimeMonitor::getNewTimer("RMM diagonal")));
408  }
409 
410  // Add diagonal entries if they are required.
411  // Check to make sure they are all here; add them if they are not.
412  if (requireDiagonal) {
413  if (dist->DistType() == MMFile) {
414  // All diagonal entries should be present in the file; we cannot create
415  // them for file-based data distributions.
416  // Do an error check to ensure they are all there.
417  size_t localss = diagset.size();
418  size_t globalss;
419  Teuchos::reduceAll<int, size_t>(*comm, Teuchos::REDUCE_SUM, 1,
420  &localss, &globalss);
421  TEUCHOS_TEST_FOR_EXCEPTION(globalss != nRow, std::logic_error,
422  "File-based nonzero distribution requires all diagonal "
423  << "entries to be present in the file. \n"
424  << "Number of diagonal entries in file = " << globalss << "\n"
425  << "Number of matrix rows = " << nRow << "\n");
426  } else {
427  for (global_ordinal_type i = 0;
428  i < static_cast<global_ordinal_type>(nRow); i++) {
429  if (dist->Mine(i, i)) {
430  if (diagset.find(i) == diagset.end()) {
431  nzindex_t idx = std::make_pair(i, i);
432  localNZ[idx] = 0;
433  }
434  }
435  }
436  }
437  }
438  // Done with diagset; free its memory
439  std::set<global_ordinal_type>().swap(diagset);
440 
441  if (useTimers)
442  timer = Teuchos::null;
443 }
444 
447 // (This format is used by the upcoming readBinary function) //
449 //
450 // File format:
451 // #vertices #edges src_1 dst_1 src_2 dst_2 ... src_#edges dst_#edges
452 //
453 // Types:
454 // #edges: unsigned long long
455 // everything else: unsigned int
456 //
457 // Base of indexing:
458 // 1
459 //
461 //
462 // A code example that converts MatrixMarket format into this format:
463 //
464 // typedef unsigned int ord_type;
465 // typedef unsigned long long size_type;
466 //
467 // std::string line;
468 // std::ifstream in(infilename);
469 // std::ofstream out(outfilename, std::ios::out | std::ios::binary);
470 //
471 // ord_type nv, dummy, edge[2];
472 // size_type ne;
473 //
474 // do
475 // std::getline (in, line);
476 // while(line[0] == '%');
477 //
478 // std::stringstream ss(line);
479 // ss >> nv >> dummy >> ne;
480 // out.write((char *)&nv, sizeof(ord_type));
481 // out.write((char *)&ne, sizeof(size_type));
482 //
483 // while(std::getline(in, line)) {
484 // std::stringstream ss2(line);
485 // ss2 >> edge[0] >> edge[1];
486 // out.write((char *)edge, sizeof(ord_type)*2);
487 //
488 // }
489 // in.close();
490 // out.close();
491 //
493 
494 static void
495 readBinary(
496  const std::string &filename, // MatrixMarket file to read
497  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
498  const Teuchos::ParameterList &params,
499  size_t &nRow,
500  size_t &nCol,
501  typename Distribution<global_ordinal_type, scalar_type>::LocalNZmap_t &localNZ,
502  Teuchos::RCP<Distribution<global_ordinal_type, scalar_type> > &dist) {
503  int me = comm->getRank();
504 
505  bool verbose = false; // Print status as reading
506  {
507  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
508  if (pe != NULL)
509  verbose = pe->getValue<bool>(&verbose);
510  }
511 
512  size_t chunkSize = 500000; // Number of lines to read / broadcast at once
513  {
514  const Teuchos::ParameterEntry *pe = params.getEntryPtr("chunkSize");
515  if (pe != NULL)
516  chunkSize = pe->getValue<size_t>(&chunkSize);
517  }
518 
519  bool symmetrize = false; // Symmetrize the matrix
520  {
521  const Teuchos::ParameterEntry *pe = params.getEntryPtr("symmetrize");
522  if (pe != NULL)
523  symmetrize = pe->getValue<bool>(&symmetrize);
524  }
525 
526  bool transpose = false; // Store the transpose
527  {
528  const Teuchos::ParameterEntry *pe = params.getEntryPtr("transpose");
529  if (pe != NULL)
530  transpose = pe->getValue<bool>(&transpose);
531  }
532 
533  std::string diagonal = ""; // Are diagonal entries required (so add them)
534  // or ignored (so delete them) or neither?
535  // Default is neither.
536  {
537  const Teuchos::ParameterEntry *pe = params.getEntryPtr("diagonal");
538  if (pe != NULL)
539  diagonal = pe->getValue<std::string>(&diagonal);
540  }
541  bool ignoreDiagonal = (diagonal == "exclude");
542  bool requireDiagonal = (diagonal == "require");
543 
544  std::string distribution = "1D"; // Default distribution is 1D row-based
545  {
546  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
547  if (pe != NULL)
548  distribution = pe->getValue<std::string>(&distribution);
549  }
550 
551  if (verbose && me == 0)
552  std::cout << "Reading binary file... " << filename << std::endl;
553 
554  FILE *fp = NULL;
555  size_t dim[3] = {0, 0, 0}; // Expected to read nRow and nNz, nCol = nRow
556 
557  if (me == 0) {
558  fp = fopen(filename.c_str(), "rb");
559 
560  if (fp == NULL) {
561  std::cout << "Error: cannot open file " << filename << std::endl;
562  } else {
563  // The header in the binary file contains only numVertices and numEdges
564  unsigned int nv = 0;
565  unsigned long long ne = 0;
566  if (fread(&nv, sizeof(unsigned int), 1, fp) != 1)
567  throw std::runtime_error("Error: bad number of read values.");
568  if (fread(&ne, sizeof(unsigned long long), 1, fp) != 1)
569  throw std::runtime_error("Error: bad number of read values.");
570  dim[0] = nv;
571  dim[1] = dim[0];
572  dim[2] = ne;
573  }
574  }
575 
576  // Broadcast relevant info
577  // Bad input if dim[0] or dim[1] still is zero after broadcast;
578  // all procs throw together
579  Teuchos::broadcast<int, size_t>(*comm, 0, 3, dim);
580  if (dim[0] == 0 || dim[1] == 0) {
581  throw std::runtime_error("Error: bad matrix header information");
582  }
583 
584  nRow = dim[0];
585  nCol = dim[1];
586  size_t nNz = dim[2];
587 
588  if (verbose && me == 0)
589  std::cout << "Binary file... "
590  << "\n symmetrize = " << symmetrize
591  << "\n transpose = " << transpose
592  << "\n change diagonal = " << diagonal
593  << "\n distribution = " << distribution
594  << std::endl;
595 
596  // Create distribution based on nRow, nCol, npRow, npCol
597  dist = buildDistribution<global_ordinal_type, scalar_type>(distribution,
598  nRow, nCol, params,
599  comm);
600 
601  std::set<global_ordinal_type> diagset;
602  // If diagonal == require, this set keeps track of
603  // which diagonal entries already existing so we can
604  // add those that don't
605 
606  using nzindex_t =
607  typename Distribution<global_ordinal_type, scalar_type>::NZindex_t;
608 
609  // Chunk information and buffers
610  unsigned int *buffer = new unsigned int[chunkSize * 2];
611  size_t nChunk;
612  size_t nMillion = 0;
613  size_t nRead = 0;
614  size_t rlen;
615  const scalar_type ONE = Teuchos::ScalarTraits<scalar_type>::one();
616 
617  // Read chunks until the entire file is read
618  while (nRead < nNz) {
619  if (nNz - nRead > chunkSize)
620  nChunk = chunkSize;
621  else
622  nChunk = (nNz - nRead);
623 
624  // Processor 0 reads a chunk
625  if (me == 0) {
626  size_t ret = 0;
627  rlen = 0;
628  for (size_t i = 0; i < nChunk; i++) {
629  ret = fread(&buffer[rlen], sizeof(unsigned int), 2, fp);
630  if (ret == 0) {
631  std::cout << "Unexpected end of matrix file." << std::endl;
632  std::cout.flush();
633  exit(-1);
634  }
635  rlen += 2;
636  }
637  }
638 
639  // Processor 0 broadcasts the chunk
640  Teuchos::broadcast<int, size_t>(*comm, 0, 1, &rlen);
641  Teuchos::broadcast<int, unsigned int>(*comm, 0, rlen, buffer);
642 
643  nRead += nChunk;
644 
645  // All processors check the received data, saving nonzeros belonging to them
646  for (rlen = 0; rlen < nChunk; rlen++) {
647  global_ordinal_type I = buffer[2 * rlen] - 1;
648  global_ordinal_type J = buffer[2 * rlen + 1] - 1;
649 
650  // Special processing of nonzero
651  if ((I == J) && ignoreDiagonal) continue;
652 
653  if (transpose) std::swap<global_ordinal_type>(I, J);
654 
655  // Add nonzero (I,J) to the map if it should be on this processor
656  // Some file-based distributions have processor assignment stored as
657  // the non-zero's value, so pass the value to Mine.
658  if (dist->Mine(I, J, ONE)) {
659  nzindex_t idx = std::make_pair(I, J);
660  localNZ[idx] = ONE; // For now, the input binary format does not
661  // support numeric values, so we insert one.
662  if (requireDiagonal && (I == J)) diagset.insert(I);
663  }
664 
665  // If symmetrizing, add (J,I) to the map if it should be on this processor
666  // Some file-based distributions have processor assignment stored as
667  // the non-zero's value, so pass the value to Mine.
668  if (symmetrize && (I != J) && dist->Mine(J, I, ONE)) {
669  // Add entry (J, I) if need to symmetrize
670  // This processor keeps this non-zero.
671  nzindex_t idx = std::make_pair(J, I);
672  localNZ[idx] = ONE;
673  }
674  }
675 
676  // Status check
677  if (verbose) {
678  if (nRead / 1000000 > nMillion) {
679  nMillion++;
680  if (me == 0) std::cout << nMillion << "M ";
681  }
682  }
683  }
684 
685  if (verbose && me == 0)
686  std::cout << std::endl
687  << nRead << " nonzeros read " << std::endl;
688 
689  if (fp != NULL) fclose(fp);
690  delete[] buffer;
691 
692  // Add diagonal entries if they are required.
693  // Check to make sure they are all here; add them if they are not.
694  if (requireDiagonal) {
695  if (dist->DistType() == MMFile) {
696  // All diagonal entries should be present in the file; we cannot create
697  // them for file-based data distributions.
698  // Do an error check to ensure they are all there.
699  size_t localss = diagset.size();
700  size_t globalss;
701  Teuchos::reduceAll<int, size_t>(*comm, Teuchos::REDUCE_SUM, 1,
702  &localss, &globalss);
703  TEUCHOS_TEST_FOR_EXCEPTION(globalss != nRow, std::logic_error,
704  "File-based nonzero distribution requires all diagonal "
705  << "entries to be present in the file. \n"
706  << "Number of diagonal entries in file = " << globalss << "\n"
707  << "Number of matrix rows = " << nRow << "\n");
708  } else {
709  for (global_ordinal_type i = 0;
710  i < static_cast<global_ordinal_type>(nRow); i++) {
711  if (dist->Mine(i, i)) {
712  if (diagset.find(i) == diagset.end()) {
713  nzindex_t idx = std::make_pair(i, i);
714  localNZ[idx] = 0;
715  }
716  }
717  }
718  }
719  }
720  // Done with diagset; free its memory
721  std::set<global_ordinal_type>().swap(diagset);
722 }
723 
726 // (This format is used by the upcoming readPerProcessBinary function) //
728 //
729 //
730 // FILE FORMAT:
731 // globalNumRows globalNumCols localNumNnzs row_1 col_1 ... row_n col_n,
732 // where n = localNumNnzs
733 //
734 //
735 // ASSUMPTIONS:
736 // - The nonzeros should be sorted by their row indices within each file.
737 // - Nonzeros have global row and column indices.
738 // - The user specifies the base <filename>, where rank i reads <filename>.i.cooBin.
739 //
740 //
741 // TYPES:
742 // localNumNnzs: unsigned long long
743 // everything else: unsigned int
744 //
745 //
746 // BASE OF INDEXING: 1
747 //
748 //
750 static void
751 readPerProcessBinary(
752  const std::string &filename,
753  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
754  const Teuchos::ParameterList &params,
755  size_t &nRow,
756  size_t &nCol,
757  typename Distribution<global_ordinal_type, scalar_type>::LocalNZmap_t &localNZ,
758  Teuchos::RCP<Distribution<global_ordinal_type, scalar_type> > &dist,
759  unsigned int *&buffer,
760  size_t &nNz) {
761  int me = comm->getRank();
762 
763  bool verbose = false; // Print status as reading
764  {
765  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
766  if (pe != NULL)
767  verbose = pe->getValue<bool>(&verbose);
768  }
769 
770  std::string distribution = "1D"; // Default distribution is 1D row-based
771  {
772  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
773  if (pe != NULL)
774  distribution = pe->getValue<std::string>(&distribution);
775  }
776 
777  if (verbose && me == 0)
778  std::cout << "Reading per-process binary files... " << filename << std::endl;
779 
780  std::string rankFileName = filename + "." + std::to_string(me) + ".cooBin";
781  FILE *fp = NULL;
782 
783  fp = fopen(rankFileName.c_str(), "rb");
784  if (fp == NULL) {
785  std::cout << "Error: cannot open file " << filename << std::endl;
786  throw std::runtime_error("Error: non-existing input file: " + rankFileName);
787  }
788 
789  // The header in each per-process file: globalNumRows globalNumCols localNumNonzeros
790  unsigned int globalNumRows = 0, globalNumCols = 0;
791  unsigned long long localNumNonzeros = 0;
792  if (fread(&globalNumRows, sizeof(unsigned int), 1, fp) != 1)
793  throw std::runtime_error("Error: bad number of read values.");
794  if (fread(&globalNumCols, sizeof(unsigned int), 1, fp) != 1)
795  throw std::runtime_error("Error: bad number of read values.");
796  if (fread(&localNumNonzeros, sizeof(unsigned long long), 1, fp) != 1)
797  throw std::runtime_error("Error: bad number of read values.");
798 
799  nRow = static_cast<size_t>(globalNumRows);
800  nCol = static_cast<size_t>(globalNumCols);
801  nNz = static_cast<size_t>(localNumNonzeros);
802 
803  // Fill the simple buffer array instead of a std::map
804  // S. Acer: With large graphs, we can't afford std::map
805  buffer = new unsigned int[nNz * 2];
806 
807  if (nNz > 0) {
808  size_t ret = fread(buffer, sizeof(unsigned int), 2 * nNz, fp);
809  if (ret == 0) {
810  std::cout << "Unexpected end of matrix file: " << rankFileName << std::endl;
811  std::cout.flush();
812  delete[] buffer;
813  exit(-1);
814  }
815  }
816  if (fp != NULL) fclose(fp);
817 
818  // This barrier is not necessary but useful for debugging
819  comm->barrier();
820  if (verbose && me == 0)
821  std::cout << "All ranks finished reading their nonzeros from their individual files\n";
822 
823  // Create distribution based on nRow, nCol, npRow, npCol
824  dist = buildDistribution<global_ordinal_type, scalar_type>(distribution,
825  nRow, nCol, params,
826  comm);
827 }
828 
829 public:
830 // This is the default interface.
831 static Teuchos::RCP<sparse_matrix_type>
832 readSparseFile(
833  const std::string &filename, // MatrixMarket file to read
834  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
835  const Teuchos::ParameterList &params) {
836  Teuchos::RCP<Distribution<global_ordinal_type, scalar_type> > dist;
837  return readSparseFile(filename, comm, params, dist);
838 }
839 
840 // This version has the Distribution object as an output parameter.
841 // S. Acer needs the distribution object to get the chunk cuts from
842 // LowerTriangularBlock distribution.
843 static Teuchos::RCP<sparse_matrix_type>
844 readSparseFile(
845  const std::string &filename, // MatrixMarket file to read
846  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
847  const Teuchos::ParameterList &params,
848  Teuchos::RCP<Distribution<global_ordinal_type, scalar_type> > &dist) {
849  bool useTimers = false; // should we time various parts of the reader?
850  {
851  const Teuchos::ParameterEntry *pe = params.getEntryPtr("useTimers");
852  if (pe != NULL)
853  useTimers = pe->getValue<bool>(&useTimers);
854  }
855 
856  Teuchos::RCP<Teuchos::TimeMonitor> timer = Teuchos::null;
857  if (useTimers)
858  timer = rcp(new Teuchos::TimeMonitor(
859  *Teuchos::TimeMonitor::getNewTimer("RSF parameterSetup")));
860 
861  int me = comm->getRank();
862 
863  // Check parameters to determine how to process the matrix while reading
864  // TODO: Add validators for the parameters
865 
866  bool verbose = false; // Print status as reading
867  {
868  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
869  if (pe != NULL)
870  verbose = pe->getValue<bool>(&verbose);
871  }
872 
873  bool callFillComplete = true; // should we fillComplete the new CrsMatrix?
874  {
875  const Teuchos::ParameterEntry *pe = params.getEntryPtr("callFillComplete");
876  if (pe != NULL)
877  callFillComplete = pe->getValue<bool>(&callFillComplete);
878  }
879 
880  // Don't want to use MatrixMarket's coordinate reader, because don't want
881  // entire matrix on one processor.
882  // Instead, Proc 0 reads nonzeros in chunks and broadcasts chunks to all
883  // processors.
884  // All processors insert nonzeros they own into a std::map
885 
886  // Storage for this processor's nonzeros.
887  using localNZmap_t =
888  typename Distribution<global_ordinal_type, scalar_type>::LocalNZmap_t;
889  localNZmap_t localNZ;
890 
891  bool binary = false; // should we read a binary file?
892  {
893  const Teuchos::ParameterEntry *pe = params.getEntryPtr("binary");
894  if (pe != NULL)
895  binary = pe->getValue<bool>(&binary);
896  }
897 
898  bool readPerProcess = false; // should we read a separate file per process?
899  {
900  const Teuchos::ParameterEntry *pe = params.getEntryPtr("readPerProcess");
901  if (pe != NULL)
902  readPerProcess = pe->getValue<bool>(&readPerProcess);
903  }
904 
905  if (useTimers) {
906  const char *timername = (binary ? "RSF readBinary" : "RSF readMatrixMarket");
907  timer = Teuchos::null;
908  timer = rcp(new Teuchos::TimeMonitor(
909  *Teuchos::TimeMonitor::getNewTimer(timername)));
910  }
911 
912  // Read nonzeros from the given file(s)
913  size_t nRow = 0, nCol = 0;
914  unsigned int *buffer = 0;
915  size_t nNz = 0;
916  if (binary) {
917  if (readPerProcess)
918  readPerProcessBinary(filename, comm, params, nRow, nCol, localNZ, dist, buffer, nNz);
919  else
920  readBinary(filename, comm, params, nRow, nCol, localNZ, dist);
921  } else
922  readMatrixMarket(filename, comm, params, nRow, nCol, localNZ, dist);
923 
924  if (readPerProcess == false) {
925  // Redistribute nonzeros as needed to satisfy the Distribution
926  // For most Distributions, this is a no-op
927  if (useTimers) {
928  timer = Teuchos::null;
929  timer = rcp(new Teuchos::TimeMonitor(
930  *Teuchos::TimeMonitor::getNewTimer("RSF redistribute")));
931  }
932 
933  dist->Redistribute(localNZ);
934  }
935 
936  if (useTimers) {
937  timer = Teuchos::null;
938  timer = rcp(new Teuchos::TimeMonitor(
939  *Teuchos::TimeMonitor::getNewTimer("RSF nonzerosConstruction")));
940  }
941 
942  // Now construct the matrix.
943  // Count number of entries in each row for best use of StaticProfile
944  if (verbose && me == 0)
945  std::cout << std::endl
946  << "Constructing the matrix" << std::endl;
947 
948  Teuchos::Array<global_ordinal_type> rowIdx;
949  Teuchos::Array<size_t> nnzPerRow;
950  Teuchos::Array<global_ordinal_type> colIdx;
951  Teuchos::Array<scalar_type> val;
952  Teuchos::Array<size_t> offsets;
953 
954  if (readPerProcess) {
955  global_ordinal_type prevI = std::numeric_limits<global_ordinal_type>::max();
956  for (size_t it = 0; it < nNz; it++) {
957  global_ordinal_type I = buffer[2 * it] - 1;
958  global_ordinal_type J = buffer[2 * it + 1] - 1;
959  if (prevI != I) {
960  prevI = I;
961  rowIdx.push_back(I);
962  nnzPerRow.push_back(0);
963  }
964  nnzPerRow.back()++;
965  colIdx.push_back(J);
966  }
967  delete[] buffer;
968  } else {
969  // Exploit fact that map has entries sorted by I, then J
970  global_ordinal_type prevI = std::numeric_limits<global_ordinal_type>::max();
971  for (auto it = localNZ.begin(); it != localNZ.end(); it++) {
972  global_ordinal_type I = it->first.first;
973  global_ordinal_type J = it->first.second;
974  scalar_type V = it->second;
975  if (prevI != I) {
976  prevI = I;
977  rowIdx.push_back(I);
978  nnzPerRow.push_back(0);
979  }
980  nnzPerRow.back()++;
981  colIdx.push_back(J);
982  val.push_back(V);
983  }
984 
985  // Done with localNZ map; free it.
986  localNZmap_t().swap(localNZ);
987  }
988 
989  // Compute prefix sum in offsets array
990  offsets.resize(rowIdx.size() + 1);
991  offsets[0] = 0;
992  for (size_t row = 0; row < static_cast<size_t>(rowIdx.size()); row++)
993  offsets[row + 1] = offsets[row] + nnzPerRow[row];
994 
995  if (useTimers) {
996  timer = Teuchos::null;
997  timer = rcp(new Teuchos::TimeMonitor(
998  *Teuchos::TimeMonitor::getNewTimer("RSF insertNonzeros")));
999  }
1000 
1001  // Create a new RowMap with only rows having non-zero entries.
1002  size_t dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1003  Teuchos::RCP<const Tpetra::Map<> > rowMap =
1004  Teuchos::rcp(new Tpetra::Map<>(dummy, rowIdx(), 0, comm));
1005 
1006  Teuchos::RCP<sparse_matrix_type> A =
1007  rcp(new sparse_matrix_type(rowMap, nnzPerRow()));
1008 
1009  // Insert the global values into the matrix row-by-row.
1010  if (verbose && me == 0)
1011  std::cout << "Inserting global values" << std::endl;
1012 
1013  if (readPerProcess) {
1014  const scalar_type ONE = Teuchos::ScalarTraits<scalar_type>::one();
1015  for (int i = 0; i < rowIdx.size(); i++) {
1016  size_t nnz = nnzPerRow[i];
1017  size_t off = offsets[i];
1018  val.assign(nnz, ONE);
1019  // ReadPerProcess routine does not read any numeric values from the file,
1020  // So we insert dummy values here.
1021  A->insertGlobalValues(rowIdx[i], colIdx(off, nnz), val());
1022  }
1023  } else {
1024  for (int i = 0; i < rowIdx.size(); i++) {
1025  size_t nnz = nnzPerRow[i];
1026  size_t off = offsets[i];
1027  A->insertGlobalValues(rowIdx[i], colIdx(off, nnz), val(off, nnz));
1028  }
1029  }
1030 
1031  // free memory that is no longer needed
1032  Teuchos::Array<size_t>().swap(nnzPerRow);
1033  Teuchos::Array<size_t>().swap(offsets);
1034  Teuchos::Array<global_ordinal_type>().swap(rowIdx);
1035  Teuchos::Array<global_ordinal_type>().swap(colIdx);
1036  Teuchos::Array<scalar_type>().swap(val);
1037 
1038  if (useTimers)
1039  timer = Teuchos::null;
1040 
1041  if (callFillComplete) {
1042  if (verbose && me == 0)
1043  std::cout << "Building vector maps" << std::endl;
1044 
1045  if (useTimers) {
1046  timer = Teuchos::null;
1047  timer = rcp(new Teuchos::TimeMonitor(
1048  *Teuchos::TimeMonitor::getNewTimer("RSF buildVectorMaps")));
1049  }
1050 
1051  // Build domain map that corresponds to distribution
1052  Teuchos::Array<global_ordinal_type> vectorSet;
1053  for (global_ordinal_type i = 0;
1054  i < static_cast<global_ordinal_type>(nCol); i++)
1055  if (dist->VecMine(i)) vectorSet.push_back(i);
1056 
1057  dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1058  Teuchos::RCP<const Tpetra::Map<> > domainMap =
1059  Teuchos::rcp(new Tpetra::Map<>(dummy, vectorSet(), 0, comm));
1060 
1061  Teuchos::Array<global_ordinal_type>().swap(vectorSet);
1062 
1063  // Build range map that corresponds to distribution
1064  for (global_ordinal_type i = 0;
1065  i < static_cast<global_ordinal_type>(nRow); i++)
1066  if (dist->VecMine(i)) vectorSet.push_back(i);
1067 
1068  dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1069  Teuchos::RCP<const Tpetra::Map<> > rangeMap =
1070  Teuchos::rcp(new Tpetra::Map<>(dummy, vectorSet(), 0, comm));
1071 
1072  Teuchos::Array<global_ordinal_type>().swap(vectorSet);
1073 
1074  // FillComplete the matrix
1075  if (useTimers) {
1076  timer = Teuchos::null;
1077  timer = rcp(new Teuchos::TimeMonitor(
1078  *Teuchos::TimeMonitor::getNewTimer("RSF fillComplete")));
1079  }
1080 
1081  if (verbose && me == 0)
1082  std::cout << "Calling fillComplete" << std::endl;
1083 
1084  A->fillComplete(domainMap, rangeMap);
1085 
1086  if (useTimers)
1087  timer = Teuchos::null;
1088 
1089  if (verbose) {
1090  std::cout << "\nRank " << A->getComm()->getRank()
1091  << " nRows " << A->getLocalNumRows()
1092  << " minRow " << A->getRowMap()->getMinGlobalIndex()
1093  << " maxRow " << A->getRowMap()->getMaxGlobalIndex()
1094  << "\nRank " << A->getComm()->getRank()
1095  << " nCols " << A->getLocalNumCols()
1096  << " minCol " << A->getColMap()->getMinGlobalIndex()
1097  << " maxCol " << A->getColMap()->getMaxGlobalIndex()
1098  << "\nRank " << A->getComm()->getRank()
1099  << " nDomain " << A->getDomainMap()->getLocalNumElements()
1100  << " minDomain " << A->getDomainMap()->getMinGlobalIndex()
1101  << " maxDomain " << A->getDomainMap()->getMaxGlobalIndex()
1102  << "\nRank " << A->getComm()->getRank()
1103  << " nRange " << A->getRangeMap()->getLocalNumElements()
1104  << " minRange " << A->getRangeMap()->getMinGlobalIndex()
1105  << " maxRange " << A->getRangeMap()->getMaxGlobalIndex()
1106  << "\nRank " << A->getComm()->getRank()
1107  << " nEntries " << A->getLocalNumEntries()
1108  << std::endl;
1109  }
1110  }
1111 
1112  return A;
1113 }
1114 
1115 #endif
A parallel distribution of indices over processes.