Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MatrixMarket_TpetraNew.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef __MATRIXMARKET_TPETRA_NEW_HPP
11 #define __MATRIXMARKET_TPETRA_NEW_HPP
12 
14 // Functions to read a MatrixMarket file and load it into a matrix.
15 // Adapted from
16 // Trilinos/packages/epetraext/src/inout/EpetraExt_CrsMatrixIn.cpp
17 // Modified by Jon Berry and Karen Devine to make matrix reallocations
18 // more efficient; rather than insert each non-zero separately, we
19 // collect rows of non-zeros for insertion.
20 // Modified by Karen Devine and Steve Plimpton to prevent all processors
21 // from reading the same file at the same time (ouch!); chunks of the file
22 // are read and broadcast by processor zero; each processor then extracts
23 // its nonzeros from the chunk, sorts them by row, and inserts them into
24 // the matrix.
25 // The variable "chunkSize" can be changed to modify the size of the chunks
26 // read from the file. Larger values of chunkSize lead to faster execution
27 // and greater memory use.
29 
30 private:
31 
32 template <typename global_ordinal_type, typename scalar_type>
33 static
34 Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> >
35 buildDistribution(
36  std::string &distribution, // string indicating whether to use 1D, 2D or
37  // file-based matrix distribution
38  size_t nRow, // Number of global matrix rows
39  size_t nCol, // Number of global matrix rows
40  const Teuchos::ParameterList &params, // Parameters to the file reading
41  const Teuchos::RCP<const Teuchos::Comm<int> > &comm
42  // communicator to be used in maps
43 )
44 {
45  // Function to build the sets of GIDs for 1D or 2D matrix distribution.
46  // Output is a Distribution object.
47 
48  int me = comm->getRank();
49 
50  using basedist_t = Distribution<global_ordinal_type,scalar_type>;
51  basedist_t *retval;
52 
53  bool randomize = false; // Randomly permute rows and columns before storing
54  {
55  const Teuchos::ParameterEntry *pe = params.getEntryPtr("randomize");
56  if (pe != NULL)
57  randomize = pe->getValue<bool>(&randomize);
58  }
59 
60  std::string partitionFile = ""; // File to reassign rows to parts
61  {
62  const Teuchos::ParameterEntry *pe = params.getEntryPtr("partitionFile");
63  if (pe != NULL)
64  partitionFile = pe->getValue<std::string>(&partitionFile);
65  }
66 
67  if (distribution == "2D") { // Generate 2D distribution
68  if (partitionFile != "") {
69  // Generate 2D distribution with vector partition file
70  TEUCHOS_TEST_FOR_EXCEPTION(randomize, std::logic_error,
71  "Randomization not available for 2DVec distributions.");
72  Distribution2DVec<global_ordinal_type,scalar_type> *dist =
73  new Distribution2DVec<global_ordinal_type, scalar_type>(
74  nRow, comm, params, partitionFile);
75  retval = dynamic_cast<basedist_t *>(dist);
76  }
77  else if (randomize) {
78  // Randomly assign rows/columns to processors
79  Distribution2DRandom<global_ordinal_type,scalar_type> *dist =
80  new Distribution2DRandom<global_ordinal_type,scalar_type>(
81  nRow, comm, params);
82  retval = dynamic_cast<basedist_t *>(dist);
83  }
84  else {
85  // The vector will be distributed linearly, with extra vector entries
86  // spread among processors to maintain balance in rows and columns.
87  Distribution2DLinear<global_ordinal_type,scalar_type> *dist =
88  new Distribution2DLinear<global_ordinal_type,scalar_type>(
89  nRow, comm, params);
90  retval = dynamic_cast<basedist_t *>(dist);
91  }
92  }
93  else if (distribution == "1D") {
94  if (partitionFile != "") {
95  // Generate 1D row-based distribution with vector partition file
96  Distribution1DVec<global_ordinal_type,scalar_type> *dist =
97  new Distribution1DVec<global_ordinal_type,scalar_type>(
98  nRow, comm, params, partitionFile);
99  retval = dynamic_cast<basedist_t*>(dist);
100  }
101  else if (randomize) {
102  // Randomly assign rows to processors.
103  Distribution1DRandom<global_ordinal_type,scalar_type> *dist =
104  new Distribution1DRandom<global_ordinal_type,scalar_type>(
105  nRow, comm, params);
106  retval = dynamic_cast<basedist_t *>(dist);
107  }
108  else {
109  // Linear map similar to Trilinos default.
110  Distribution1DLinear<global_ordinal_type,scalar_type> *dist =
111  new Distribution1DLinear<global_ordinal_type,scalar_type>(
112  nRow, comm, params);
113  retval = dynamic_cast<basedist_t *>(dist);
114  }
115  }
116  else if (distribution == "LowerTriangularBlock") {
117  if (randomize && me == 0) {
118  std::cout << "WARNING: Randomization not available for "
119  << "LowerTriangularBlock distributions." << std::endl;
120  }
121 
122  DistributionLowerTriangularBlock<global_ordinal_type,scalar_type> *dist =
123  new DistributionLowerTriangularBlock<global_ordinal_type,scalar_type>(
124  nRow, comm, params);
125  retval = dynamic_cast<basedist_t*>(dist);
126  }
127  else if (distribution == "MMFile") {
128  // Generate user-defined distribution from Matrix-Market file
129  if (randomize && me == 0) {
130  std::cout << "WARNING: Randomization not available for MMFile "
131  << "distributions." << std::endl;
132  }
133  DistributionMMFile<global_ordinal_type,scalar_type> *dist =
134  new DistributionMMFile<global_ordinal_type,scalar_type>(
135  nRow, comm, params);
136  retval = dynamic_cast<basedist_t*>(dist);
137  }
138 
139  else {
140  std::cout << "ERROR: Invalid distribution method " << distribution
141  << std::endl;
142  exit(-1);
143  }
144  return Teuchos::rcp<basedist_t>(retval);
145 }
146 
147 static
148 void
149 readMatrixMarket(
150  const std::string &filename, // MatrixMarket file to read
151  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
152  const Teuchos::ParameterList &params,
153  size_t &nRow,
154  size_t &nCol,
155  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t &localNZ,
156  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist
157 )
158 {
159  bool useTimers = false; // should we time various parts of the reader?
160  {
161  const Teuchos::ParameterEntry *pe = params.getEntryPtr("useTimers");
162  if (pe != NULL)
163  useTimers = pe->getValue<bool>(&useTimers);
164  }
165 
166  Teuchos::RCP<Teuchos::TimeMonitor> timer = Teuchos::null;
167  if (useTimers)
168  timer = rcp(new Teuchos::TimeMonitor(
169  *Teuchos::TimeMonitor::getNewTimer("RMM parameterSetup")));
170 
171  int me = comm->getRank();
172 
173  bool verbose = false; // Print status as reading
174  {
175  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
176  if (pe != NULL)
177  verbose = pe->getValue<bool>(&verbose);
178  }
179 
180  size_t chunkSize = 500000; // Number of lines to read / broadcast at once
181  {
182  const Teuchos::ParameterEntry *pe = params.getEntryPtr("chunkSize");
183  if (pe != NULL)
184  chunkSize = pe->getValue<size_t>(&chunkSize);
185  }
186 
187  bool symmetrize = false; // Symmetrize the matrix
188  {
189  const Teuchos::ParameterEntry *pe = params.getEntryPtr("symmetrize");
190  if (pe != NULL)
191  symmetrize = pe->getValue<bool>(&symmetrize);
192  }
193 
194  bool transpose = false; // Store the transpose
195  {
196  const Teuchos::ParameterEntry *pe = params.getEntryPtr("transpose");
197  if (pe != NULL)
198  transpose = pe->getValue<bool>(&transpose);
199  }
200 
201  std::string diagonal = ""; // Are diagonal entries required (so add them)
202  // or ignored (so delete them) or neither?
203  // Default is neither.
204  {
205  const Teuchos::ParameterEntry *pe = params.getEntryPtr("diagonal");
206  if (pe != NULL)
207  diagonal = pe->getValue<std::string>(&diagonal);
208  }
209  bool ignoreDiagonal = (diagonal == "exclude");
210  bool requireDiagonal = (diagonal == "require");
211 
212  std::string distribution = "1D"; // Default distribution is 1D row-based
213  {
214  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
215  if (pe != NULL)
216  distribution = pe->getValue<std::string>(&distribution);
217  }
218 
219  if (useTimers) {
220  timer = Teuchos::null;
221  timer = rcp(new Teuchos::TimeMonitor(
222  *Teuchos::TimeMonitor::getNewTimer("RMM header")));
223  }
224 
225  if (verbose && me == 0)
226  std::cout << "Reading matrix market file... " << filename << std::endl;
227 
228  FILE *fp = NULL;
229  size_t dim[3] = {0,0,0}; // nRow, nCol, nNz as read from MatrixMarket
230  MM_typecode mmcode;
231 
232  if (me == 0) {
233 
234  fp = fopen(filename.c_str(), "r");
235 
236  if (fp == NULL) {
237  std::cout << "Error: cannot open file " << filename << std::endl;
238  }
239  else {
240  // Read MatrixMarket banner to get type of matrix
241  if (mm_read_banner(fp, &mmcode) != 0) {
242  std::cout << "Error: bad MatrixMarket banner " << std::endl;
243  }
244  else {
245  // Error check for file types that this function can read
246  if (!mm_is_valid(mmcode) ||
247  mm_is_dense(mmcode) || mm_is_array(mmcode) ||
248  mm_is_complex(mmcode) ||
249  mm_is_skew(mmcode) || mm_is_hermitian(mmcode)) {
250  std::cout << "Error: matrix type is not supported by this reader\n "
251  << "This reader supports sparse, coordinate, "
252  << "real, pattern, integer, symmetric, general"
253  << std::endl;
254  }
255  else {
256  // Read nRow, nCol, nNz from MatrixMarket file
257  if (mm_read_mtx_crd_size(fp, &dim[0], &dim[1], &dim[2]) != 0) {
258  std::cout << "Error: bad matrix dimensions " << std::endl;
259  dim[0] = dim[1] = dim[2] = 0;
260  }
261  }
262  }
263  }
264  }
265 
266  // Broadcast relevant info
267  // Bad input if dim[0] or dim[1] still is zero after broadcast;
268  // all procs throw together
269  Teuchos::broadcast<int, size_t>(*comm, 0, 3, dim);
270  if (dim[0] == 0 || dim[1] == 0) {
271  throw std::runtime_error("Error: bad matrix header information");
272  }
273  Teuchos::broadcast<int, char>(*comm, 0, sizeof(MM_typecode), mmcode);
274 
275  nRow = dim[0];
276  nCol = dim[1];
277 
278  TEUCHOS_TEST_FOR_EXCEPTION(nRow != nCol, std::logic_error,
279  "This overload of readSparseFile requires nRow == nCol "
280  << "(nRow = " << nRow << ", nCol = " << nCol << "); "
281  << "For now, use a different overload of readSparseFile until this "
282  << "overload is fixed to read rectangular matrices. "
283  << "See Trilinos github issues #7045 and #8472.");
284 
285  size_t nNz = dim[2];
286  bool patternInput = mm_is_pattern(mmcode);
287  bool symmetricInput = mm_is_symmetric(mmcode);
288  if (symmetricInput) symmetrize = true;
289 
290  if (verbose && me == 0)
291  std::cout << "Matrix market file... "
292  << "\n symmetrize = " << symmetrize
293  << "\n transpose = " << transpose
294  << "\n change diagonal = " << diagonal
295  << "\n distribution = " << distribution
296  << std::endl;
297 
298  if (useTimers) {
299  timer = Teuchos::null;
300  timer = rcp(new Teuchos::TimeMonitor(
301  *Teuchos::TimeMonitor::getNewTimer("RMM distribution")));
302  }
303 
304  // Create distribution based on nRow, nCol, npRow, npCol
305  dist = buildDistribution<global_ordinal_type,scalar_type>(distribution,
306  nRow, nCol, params,
307  comm);
308  if (useTimers) {
309  timer = Teuchos::null;
310  timer = rcp(new Teuchos::TimeMonitor(
311  *Teuchos::TimeMonitor::getNewTimer("RMM readChunks")));
312  }
313 
314  std::set<global_ordinal_type> diagset;
315  // If diagonal == require, this set keeps track of
316  // which diagonal entries already existing so we can
317  // add those that don't
318 
319  using nzindex_t =
320  typename Distribution<global_ordinal_type,scalar_type>::NZindex_t;
321 
322  // Chunk information and buffers
323  const int maxLineLength = 81;
324  char *buffer = new char[chunkSize*maxLineLength+1];
325  size_t nChunk;
326  size_t nMillion = 0;
327  size_t nRead = 0;
328  size_t rlen;
329 
330  auto timerRead = Teuchos::TimeMonitor::getNewTimer("RMM readNonzeros");
331  auto timerSelect = Teuchos::TimeMonitor::getNewTimer("RMM selectNonzeros");
332  // Read chunks until the entire file is read
333  Teuchos::RCP<Teuchos::TimeMonitor> innerTimer = Teuchos::null;
334  while (nRead < nNz) {
335 
336  innerTimer = rcp(new Teuchos::TimeMonitor(*timerRead));
337 
338  if (nNz-nRead > chunkSize) nChunk = chunkSize;
339  else nChunk = (nNz - nRead);
340 
341  // Processor 0 reads a chunk
342  if (me == 0) {
343  char *eof;
344  rlen = 0;
345  for (size_t i = 0; i < nChunk; i++) {
346  eof = fgets(&buffer[rlen],maxLineLength,fp);
347  if (eof == NULL) {
348  std::cout << "Unexpected end of matrix file." << std::endl;
349  std::cout.flush();
350  exit(-1);
351  }
352  rlen += strlen(&buffer[rlen]);
353  }
354  buffer[rlen++]='\n';
355  }
356 
357  // Processor 0 broadcasts the chunk
358  Teuchos::broadcast<int, size_t>(*comm, 0, 1, &rlen);
359  Teuchos::broadcast<int, char>(*comm, 0, rlen, buffer);
360 
361  buffer[rlen++] = '\0';
362  nRead += nChunk;
363 
364  innerTimer = Teuchos::null;
365  innerTimer = rcp(new Teuchos::TimeMonitor(*timerSelect));
366 
367  // All processors check the received data, saving nonzeros belonging to them
368  char *lineptr = buffer;
369  for (rlen = 0; rlen < nChunk; rlen++) {
370 
371  char *next = strchr(lineptr,'\n');
372  global_ordinal_type I = atol(strtok(lineptr," \t\n"))
373  - 1; // since MatrixMarket files are one-based
374  global_ordinal_type J = atol(strtok(NULL," \t\n"))
375  - 1; // since MatrixMarket files are one-based
376 
377  scalar_type V = (patternInput ? -1. : atof(strtok(NULL," \t\n")));
378  lineptr = next + 1;
379 
380  // Special processing of nonzero
381  if ((I == J) && ignoreDiagonal) continue;
382 
383  if (transpose) std::swap<global_ordinal_type>(I,J);
384 
385  // Add nonzero (I,J) to the map if it should be on this processor
386  // Some file-based distributions have processor assignment stored as
387  // the non-zero's value, so pass the value to Mine.
388  if (dist->Mine(I,J,int(V))) {
389  nzindex_t idx = std::make_pair(I,J);
390  localNZ[idx] = V;
391  if (requireDiagonal && (I == J)) diagset.insert(I);
392  }
393 
394  // If symmetrizing, add (J,I) to the map if it should be on this processor
395  // Some file-based distributions have processor assignment stored as
396  // the non-zero's value, so pass the value to Mine.
397  if (symmetrize && (I != J) && dist->Mine(J,I,int(V))) {
398  // Add entry (J, I) if need to symmetrize
399  // This processor keeps this non-zero.
400  nzindex_t idx = std::make_pair(J,I);
401  localNZ[idx] = V;
402  }
403  }
404 
405  // Status check
406  if (verbose) {
407  if (nRead / 1000000 > nMillion) {
408  nMillion++;
409  if (me == 0) std::cout << nMillion << "M ";
410  }
411  }
412 
413  innerTimer = Teuchos::null;
414  }
415 
416  if (verbose && me == 0)
417  std::cout << std::endl << nRead << " nonzeros read " << std::endl;
418 
419  if (fp != NULL) fclose(fp);
420  delete [] buffer;
421 
422  if (useTimers) {
423  timer = Teuchos::null;
424  timer = rcp(new Teuchos::TimeMonitor(
425  *Teuchos::TimeMonitor::getNewTimer("RMM diagonal")));
426  }
427 
428  // Add diagonal entries if they are required.
429  // Check to make sure they are all here; add them if they are not.
430  if (requireDiagonal) {
431  if (dist->DistType() == MMFile) {
432  // All diagonal entries should be present in the file; we cannot create
433  // them for file-based data distributions.
434  // Do an error check to ensure they are all there.
435  size_t localss = diagset.size();
436  size_t globalss;
437  Teuchos::reduceAll<int, size_t>(*comm, Teuchos::REDUCE_SUM, 1,
438  &localss, &globalss);
439  TEUCHOS_TEST_FOR_EXCEPTION(globalss != nRow, std::logic_error,
440  "File-based nonzero distribution requires all diagonal "
441  << "entries to be present in the file. \n"
442  << "Number of diagonal entries in file = " << globalss << "\n"
443  << "Number of matrix rows = " << nRow << "\n");
444  }
445  else {
446  for (global_ordinal_type i = 0;
447  i < static_cast<global_ordinal_type>(nRow); i++)
448  {
449  if (dist->Mine(i,i)) {
450  if (diagset.find(i) == diagset.end()) {
451  nzindex_t idx = std::make_pair(i,i);
452  localNZ[idx] = 0;
453  }
454  }
455  }
456  }
457  }
458  // Done with diagset; free its memory
459  std::set<global_ordinal_type>().swap(diagset);
460 
461  if (useTimers)
462  timer = Teuchos::null;
463 }
464 
467 // (This format is used by the upcoming readBinary function) //
469 //
470 // File format:
471 // #vertices #edges src_1 dst_1 src_2 dst_2 ... src_#edges dst_#edges
472 //
473 // Types:
474 // #edges: unsigned long long
475 // everything else: unsigned int
476 //
477 // Base of indexing:
478 // 1
479 //
481 //
482 // A code example that converts MatrixMarket format into this format:
483 //
484 // typedef unsigned int ord_type;
485 // typedef unsigned long long size_type;
486 //
487 // std::string line;
488 // std::ifstream in(infilename);
489 // std::ofstream out(outfilename, std::ios::out | std::ios::binary);
490 //
491 // ord_type nv, dummy, edge[2];
492 // size_type ne;
493 //
494 // do
495 // std::getline (in, line);
496 // while(line[0] == '%');
497 //
498 // std::stringstream ss(line);
499 // ss >> nv >> dummy >> ne;
500 // out.write((char *)&nv, sizeof(ord_type));
501 // out.write((char *)&ne, sizeof(size_type));
502 //
503 // while(std::getline(in, line)) {
504 // std::stringstream ss2(line);
505 // ss2 >> edge[0] >> edge[1];
506 // out.write((char *)edge, sizeof(ord_type)*2);
507 //
508 // }
509 // in.close();
510 // out.close();
511 //
513 
514 static
515 void
516 readBinary(
517  const std::string &filename, // MatrixMarket file to read
518  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
519  const Teuchos::ParameterList &params,
520  size_t &nRow,
521  size_t &nCol,
522  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t &localNZ,
523  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist
524 )
525 {
526 
527  int me = comm->getRank();
528 
529  bool verbose = false; // Print status as reading
530  {
531  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
532  if (pe != NULL)
533  verbose = pe->getValue<bool>(&verbose);
534  }
535 
536  size_t chunkSize = 500000; // Number of lines to read / broadcast at once
537  {
538  const Teuchos::ParameterEntry *pe = params.getEntryPtr("chunkSize");
539  if (pe != NULL)
540  chunkSize = pe->getValue<size_t>(&chunkSize);
541  }
542 
543  bool symmetrize = false; // Symmetrize the matrix
544  {
545  const Teuchos::ParameterEntry *pe = params.getEntryPtr("symmetrize");
546  if (pe != NULL)
547  symmetrize = pe->getValue<bool>(&symmetrize);
548  }
549 
550  bool transpose = false; // Store the transpose
551  {
552  const Teuchos::ParameterEntry *pe = params.getEntryPtr("transpose");
553  if (pe != NULL)
554  transpose = pe->getValue<bool>(&transpose);
555  }
556 
557  std::string diagonal = ""; // Are diagonal entries required (so add them)
558  // or ignored (so delete them) or neither?
559  // Default is neither.
560  {
561  const Teuchos::ParameterEntry *pe = params.getEntryPtr("diagonal");
562  if (pe != NULL)
563  diagonal = pe->getValue<std::string>(&diagonal);
564  }
565  bool ignoreDiagonal = (diagonal == "exclude");
566  bool requireDiagonal = (diagonal == "require");
567 
568  std::string distribution = "1D"; // Default distribution is 1D row-based
569  {
570  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
571  if (pe != NULL)
572  distribution = pe->getValue<std::string>(&distribution);
573  }
574 
575  if (verbose && me == 0)
576  std::cout << "Reading binary file... " << filename << std::endl;
577 
578  FILE *fp = NULL;
579  size_t dim[3] = {0,0,0}; // Expected to read nRow and nNz, nCol = nRow
580 
581  if (me == 0) {
582 
583  fp = fopen(filename.c_str(), "rb");
584 
585  if (fp == NULL) {
586  std::cout << "Error: cannot open file " << filename << std::endl;
587  }
588  else {
589  // The header in the binary file contains only numVertices and numEdges
590  unsigned int nv = 0;
591  unsigned long long ne = 0;
592  if (fread(&nv, sizeof(unsigned int), 1, fp) != 1)
593  throw std::runtime_error("Error: bad number of read values.");
594  if (fread(&ne, sizeof(unsigned long long), 1, fp) != 1)
595  throw std::runtime_error("Error: bad number of read values.");
596  dim[0] = nv;
597  dim[1] = dim[0];
598  dim[2] = ne;
599  }
600  }
601 
602  // Broadcast relevant info
603  // Bad input if dim[0] or dim[1] still is zero after broadcast;
604  // all procs throw together
605  Teuchos::broadcast<int, size_t>(*comm, 0, 3, dim);
606  if (dim[0] == 0 || dim[1] == 0) {
607  throw std::runtime_error("Error: bad matrix header information");
608  }
609 
610  nRow = dim[0];
611  nCol = dim[1];
612  size_t nNz = dim[2];
613 
614  if (verbose && me == 0)
615  std::cout << "Binary file... "
616  << "\n symmetrize = " << symmetrize
617  << "\n transpose = " << transpose
618  << "\n change diagonal = " << diagonal
619  << "\n distribution = " << distribution
620  << std::endl;
621 
622  // Create distribution based on nRow, nCol, npRow, npCol
623  dist = buildDistribution<global_ordinal_type,scalar_type>(distribution,
624  nRow, nCol, params,
625  comm);
626 
627  std::set<global_ordinal_type> diagset;
628  // If diagonal == require, this set keeps track of
629  // which diagonal entries already existing so we can
630  // add those that don't
631 
632  using nzindex_t =
633  typename Distribution<global_ordinal_type,scalar_type>::NZindex_t;
634 
635  // Chunk information and buffers
636  unsigned int *buffer = new unsigned int[chunkSize*2];
637  size_t nChunk;
638  size_t nMillion = 0;
639  size_t nRead = 0;
640  size_t rlen;
641  const scalar_type ONE = Teuchos::ScalarTraits<scalar_type>::one();
642 
643 
644  // Read chunks until the entire file is read
645  while (nRead < nNz) {
646  if (nNz-nRead > chunkSize) nChunk = chunkSize;
647  else nChunk = (nNz - nRead);
648 
649  // Processor 0 reads a chunk
650  if (me == 0) {
651  size_t ret = 0;
652  rlen = 0;
653  for (size_t i = 0; i < nChunk; i++) {
654  ret = fread(&buffer[rlen], sizeof(unsigned int), 2, fp);
655  if (ret == 0) {
656  std::cout << "Unexpected end of matrix file." << std::endl;
657  std::cout.flush();
658  exit(-1);
659  }
660  rlen += 2;
661  }
662  }
663 
664  // Processor 0 broadcasts the chunk
665  Teuchos::broadcast<int, size_t>(*comm, 0, 1, &rlen);
666  Teuchos::broadcast<int, unsigned int>(*comm, 0, rlen, buffer);
667 
668  nRead += nChunk;
669 
670  // All processors check the received data, saving nonzeros belonging to them
671  for (rlen = 0; rlen < nChunk; rlen++) {
672 
673  global_ordinal_type I = buffer[2*rlen]-1;
674  global_ordinal_type J = buffer[2*rlen+1]-1;
675 
676  // Special processing of nonzero
677  if ((I == J) && ignoreDiagonal) continue;
678 
679  if (transpose) std::swap<global_ordinal_type>(I,J);
680 
681  // Add nonzero (I,J) to the map if it should be on this processor
682  // Some file-based distributions have processor assignment stored as
683  // the non-zero's value, so pass the value to Mine.
684  if (dist->Mine(I,J,ONE)) {
685  nzindex_t idx = std::make_pair(I,J);
686  localNZ[idx] = ONE; // For now, the input binary format does not
687  // support numeric values, so we insert one.
688  if (requireDiagonal && (I == J)) diagset.insert(I);
689  }
690 
691  // If symmetrizing, add (J,I) to the map if it should be on this processor
692  // Some file-based distributions have processor assignment stored as
693  // the non-zero's value, so pass the value to Mine.
694  if (symmetrize && (I != J) && dist->Mine(J,I,ONE)) {
695  // Add entry (J, I) if need to symmetrize
696  // This processor keeps this non-zero.
697  nzindex_t idx = std::make_pair(J,I);
698  localNZ[idx] = ONE;
699  }
700  }
701 
702  // Status check
703  if (verbose) {
704  if (nRead / 1000000 > nMillion) {
705  nMillion++;
706  if (me == 0) std::cout << nMillion << "M ";
707  }
708  }
709  }
710 
711  if (verbose && me == 0)
712  std::cout << std::endl << nRead << " nonzeros read " << std::endl;
713 
714  if (fp != NULL) fclose(fp);
715  delete [] buffer;
716 
717  // Add diagonal entries if they are required.
718  // Check to make sure they are all here; add them if they are not.
719  if (requireDiagonal) {
720  if (dist->DistType() == MMFile) {
721  // All diagonal entries should be present in the file; we cannot create
722  // them for file-based data distributions.
723  // Do an error check to ensure they are all there.
724  size_t localss = diagset.size();
725  size_t globalss;
726  Teuchos::reduceAll<int, size_t>(*comm, Teuchos::REDUCE_SUM, 1,
727  &localss, &globalss);
728  TEUCHOS_TEST_FOR_EXCEPTION(globalss != nRow, std::logic_error,
729  "File-based nonzero distribution requires all diagonal "
730  << "entries to be present in the file. \n"
731  << "Number of diagonal entries in file = " << globalss << "\n"
732  << "Number of matrix rows = " << nRow << "\n");
733  }
734  else {
735  for (global_ordinal_type i = 0;
736  i < static_cast<global_ordinal_type>(nRow); i++)
737  {
738  if (dist->Mine(i,i)) {
739  if (diagset.find(i) == diagset.end()) {
740  nzindex_t idx = std::make_pair(i,i);
741  localNZ[idx] = 0;
742  }
743  }
744  }
745  }
746  }
747  // Done with diagset; free its memory
748  std::set<global_ordinal_type>().swap(diagset);
749 
750 }
751 
754 // (This format is used by the upcoming readPerProcessBinary function) //
756 //
757 //
758 // FILE FORMAT:
759 // globalNumRows globalNumCols localNumNnzs row_1 col_1 ... row_n col_n,
760 // where n = localNumNnzs
761 //
762 //
763 // ASSUMPTIONS:
764 // - The nonzeros should be sorted by their row indices within each file.
765 // - Nonzeros have global row and column indices.
766 // - The user specifies the base <filename>, where rank i reads <filename>.i.cooBin.
767 //
768 //
769 // TYPES:
770 // localNumNnzs: unsigned long long
771 // everything else: unsigned int
772 //
773 //
774 // BASE OF INDEXING: 1
775 //
776 //
778 static
779 void
780 readPerProcessBinary(
781  const std::string &filename,
782  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
783  const Teuchos::ParameterList &params,
784  size_t &nRow,
785  size_t &nCol,
786  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t &localNZ,
787  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist,
788  unsigned int* &buffer,
789  size_t &nNz
790 )
791 {
792 
793  int me = comm->getRank();
794 
795  bool verbose = false; // Print status as reading
796  {
797  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
798  if (pe != NULL)
799  verbose = pe->getValue<bool>(&verbose);
800  }
801 
802  std::string distribution = "1D"; // Default distribution is 1D row-based
803  {
804  const Teuchos::ParameterEntry *pe = params.getEntryPtr("distribution");
805  if (pe != NULL)
806  distribution = pe->getValue<std::string>(&distribution);
807  }
808 
809  if (verbose && me == 0)
810  std::cout << "Reading per-process binary files... " << filename << std::endl;
811 
812 
813  std::string rankFileName = filename + "." + std::to_string(me) + ".cooBin";
814  FILE *fp = NULL;
815 
816  fp = fopen(rankFileName.c_str(), "rb");
817  if (fp == NULL) {
818  std::cout << "Error: cannot open file " << filename << std::endl;
819  throw std::runtime_error("Error: non-existing input file: " + rankFileName);
820  }
821 
822  // The header in each per-process file: globalNumRows globalNumCols localNumNonzeros
823  unsigned int globalNumRows = 0, globalNumCols = 0;
824  unsigned long long localNumNonzeros = 0;
825  if (fread(&globalNumRows, sizeof(unsigned int), 1, fp) != 1)
826  throw std::runtime_error("Error: bad number of read values.");
827  if (fread(&globalNumCols, sizeof(unsigned int), 1, fp) != 1)
828  throw std::runtime_error("Error: bad number of read values.");
829  if (fread(&localNumNonzeros, sizeof(unsigned long long), 1, fp) != 1)
830  throw std::runtime_error("Error: bad number of read values.");
831 
832  nRow = static_cast<size_t>(globalNumRows);
833  nCol = static_cast<size_t>(globalNumCols);
834  nNz = static_cast<size_t>(localNumNonzeros);
835 
836  // Fill the simple buffer array instead of a std::map
837  // S. Acer: With large graphs, we can't afford std::map
838  buffer = new unsigned int[nNz*2];
839 
840  if(nNz > 0) {
841  size_t ret = fread(buffer, sizeof(unsigned int), 2*nNz, fp);
842  if (ret == 0) {
843  std::cout << "Unexpected end of matrix file: " << rankFileName << std::endl;
844  std::cout.flush();
845  delete [] buffer;
846  exit(-1);
847  }
848  }
849  if (fp != NULL) fclose(fp);
850 
851  // This barrier is not necessary but useful for debugging
852  comm->barrier();
853  if(verbose && me == 0)
854  std::cout << "All ranks finished reading their nonzeros from their individual files\n";
855 
856  // Create distribution based on nRow, nCol, npRow, npCol
857  dist = buildDistribution<global_ordinal_type,scalar_type>(distribution,
858  nRow, nCol, params,
859  comm);
860 }
861 
862 public:
863 
864 // This is the default interface.
865 static Teuchos::RCP<sparse_matrix_type>
866 readSparseFile(
867  const std::string &filename, // MatrixMarket file to read
868  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
869  const Teuchos::ParameterList &params
870 )
871 {
872  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > dist;
873  return readSparseFile(filename, comm, params, dist);
874 }
875 
876 // This version has the Distribution object as an output parameter.
877 // S. Acer needs the distribution object to get the chunk cuts from
878 // LowerTriangularBlock distribution.
879 static Teuchos::RCP<sparse_matrix_type>
880 readSparseFile(
881  const std::string &filename, // MatrixMarket file to read
882  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
883  const Teuchos::ParameterList &params,
884  Teuchos::RCP<Distribution<global_ordinal_type,scalar_type> > &dist
885 )
886 {
887  bool useTimers = false; // should we time various parts of the reader?
888  {
889  const Teuchos::ParameterEntry *pe = params.getEntryPtr("useTimers");
890  if (pe != NULL)
891  useTimers = pe->getValue<bool>(&useTimers);
892  }
893 
894  Teuchos::RCP<Teuchos::TimeMonitor> timer = Teuchos::null;
895  if (useTimers)
896  timer = rcp(new Teuchos::TimeMonitor(
897  *Teuchos::TimeMonitor::getNewTimer("RSF parameterSetup")));
898 
899  int me = comm->getRank();
900 
901  // Check parameters to determine how to process the matrix while reading
902  // TODO: Add validators for the parameters
903 
904  bool verbose = false; // Print status as reading
905  {
906  const Teuchos::ParameterEntry *pe = params.getEntryPtr("verbose");
907  if (pe != NULL)
908  verbose = pe->getValue<bool>(&verbose);
909  }
910 
911  bool callFillComplete = true; // should we fillComplete the new CrsMatrix?
912  {
913  const Teuchos::ParameterEntry *pe = params.getEntryPtr("callFillComplete");
914  if (pe != NULL)
915  callFillComplete = pe->getValue<bool>(&callFillComplete);
916  }
917 
918  // Don't want to use MatrixMarket's coordinate reader, because don't want
919  // entire matrix on one processor.
920  // Instead, Proc 0 reads nonzeros in chunks and broadcasts chunks to all
921  // processors.
922  // All processors insert nonzeros they own into a std::map
923 
924  // Storage for this processor's nonzeros.
925  using localNZmap_t =
926  typename Distribution<global_ordinal_type,scalar_type>::LocalNZmap_t;
927  localNZmap_t localNZ;
928 
929  bool binary = false; // should we read a binary file?
930  {
931  const Teuchos::ParameterEntry *pe = params.getEntryPtr("binary");
932  if (pe != NULL)
933  binary = pe->getValue<bool>(&binary);
934  }
935 
936  bool readPerProcess = false; // should we read a separate file per process?
937  {
938  const Teuchos::ParameterEntry *pe = params.getEntryPtr("readPerProcess");
939  if (pe != NULL)
940  readPerProcess = pe->getValue<bool>(&readPerProcess);
941  }
942 
943  if (useTimers) {
944  const char *timername = (binary?"RSF readBinary":"RSF readMatrixMarket");
945  timer = Teuchos::null;
946  timer = rcp(new Teuchos::TimeMonitor(
947  *Teuchos::TimeMonitor::getNewTimer(timername)));
948  }
949 
950  // Read nonzeros from the given file(s)
951  size_t nRow = 0, nCol = 0;
952  unsigned int *buffer=0; size_t nNz = 0;
953  if(binary){
954  if(readPerProcess)
955  readPerProcessBinary(filename, comm, params, nRow, nCol, localNZ, dist, buffer, nNz);
956  else
957  readBinary(filename, comm, params, nRow, nCol, localNZ, dist);
958  }
959  else
960  readMatrixMarket(filename, comm, params, nRow, nCol, localNZ, dist);
961 
962  if(readPerProcess == false){
963 
964  // Redistribute nonzeros as needed to satisfy the Distribution
965  // For most Distributions, this is a no-op
966  if (useTimers) {
967  timer = Teuchos::null;
968  timer = rcp(new Teuchos::TimeMonitor(
969  *Teuchos::TimeMonitor::getNewTimer("RSF redistribute")));
970  }
971 
972  dist->Redistribute(localNZ);
973  }
974 
975  if (useTimers) {
976  timer = Teuchos::null;
977  timer = rcp(new Teuchos::TimeMonitor(
978  *Teuchos::TimeMonitor::getNewTimer("RSF nonzerosConstruction")));
979  }
980 
981  // Now construct the matrix.
982  // Count number of entries in each row for best use of StaticProfile
983  if (verbose && me == 0)
984  std::cout << std::endl << "Constructing the matrix" << std::endl;
985 
986  Teuchos::Array<global_ordinal_type> rowIdx;
987  Teuchos::Array<size_t> nnzPerRow;
988  Teuchos::Array<global_ordinal_type> colIdx;
989  Teuchos::Array<scalar_type> val;
990  Teuchos::Array<size_t> offsets;
991 
992  if(readPerProcess) {
993  global_ordinal_type prevI = std::numeric_limits<global_ordinal_type>::max();
994  for (size_t it = 0; it < nNz; it++){
995  global_ordinal_type I = buffer[2*it]-1;
996  global_ordinal_type J = buffer[2*it+1]-1;
997  if (prevI != I) {
998  prevI = I;
999  rowIdx.push_back(I);
1000  nnzPerRow.push_back(0);
1001  }
1002  nnzPerRow.back()++;
1003  colIdx.push_back(J);
1004  }
1005  delete [] buffer;
1006  }
1007  else {
1008  // Exploit fact that map has entries sorted by I, then J
1009  global_ordinal_type prevI = std::numeric_limits<global_ordinal_type>::max();
1010  for (auto it = localNZ.begin(); it != localNZ.end(); it++) {
1011  global_ordinal_type I = it->first.first;
1012  global_ordinal_type J = it->first.second;
1013  scalar_type V = it->second;
1014  if (prevI != I) {
1015  prevI = I;
1016  rowIdx.push_back(I);
1017  nnzPerRow.push_back(0);
1018  }
1019  nnzPerRow.back()++;
1020  colIdx.push_back(J);
1021  val.push_back(V);
1022  }
1023 
1024  // Done with localNZ map; free it.
1025  localNZmap_t().swap(localNZ);
1026  }
1027 
1028  // Compute prefix sum in offsets array
1029  offsets.resize(rowIdx.size() + 1);
1030  offsets[0] = 0;
1031  for (size_t row = 0; row < static_cast<size_t>(rowIdx.size()); row++)
1032  offsets[row+1] = offsets[row] + nnzPerRow[row];
1033 
1034  if (useTimers) {
1035  timer = Teuchos::null;
1036  timer = rcp(new Teuchos::TimeMonitor(
1037  *Teuchos::TimeMonitor::getNewTimer("RSF insertNonzeros")));
1038  }
1039 
1040  // Create a new RowMap with only rows having non-zero entries.
1041  size_t dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1042  Teuchos::RCP<const Tpetra::Map<> > rowMap =
1043  Teuchos::rcp(new Tpetra::Map<>(dummy, rowIdx(), 0, comm));
1044 
1045  Teuchos::RCP<sparse_matrix_type> A =
1046  rcp(new sparse_matrix_type(rowMap, nnzPerRow()));
1047 
1048  // Insert the global values into the matrix row-by-row.
1049  if (verbose && me == 0)
1050  std::cout << "Inserting global values" << std::endl;
1051 
1052  if(readPerProcess){
1053  const scalar_type ONE = Teuchos::ScalarTraits<scalar_type>::one();
1054  for (int i = 0; i < rowIdx.size(); i++) {
1055  size_t nnz = nnzPerRow[i];
1056  size_t off = offsets[i];
1057  val.assign(nnz, ONE);
1058  // ReadPerProcess routine does not read any numeric values from the file,
1059  // So we insert dummy values here.
1060  A->insertGlobalValues(rowIdx[i], colIdx(off, nnz), val());
1061  }
1062  }
1063  else {
1064  for (int i = 0; i < rowIdx.size(); i++) {
1065  size_t nnz = nnzPerRow[i];
1066  size_t off = offsets[i];
1067  A->insertGlobalValues(rowIdx[i], colIdx(off, nnz), val(off, nnz));
1068  }
1069  }
1070 
1071  // free memory that is no longer needed
1072  Teuchos::Array<size_t>().swap(nnzPerRow);
1073  Teuchos::Array<size_t>().swap(offsets);
1074  Teuchos::Array<global_ordinal_type>().swap(rowIdx);
1075  Teuchos::Array<global_ordinal_type>().swap(colIdx);
1076  Teuchos::Array<scalar_type>().swap(val);
1077 
1078  if (useTimers)
1079  timer = Teuchos::null;
1080 
1081  if (callFillComplete) {
1082 
1083  if (verbose && me == 0)
1084  std::cout << "Building vector maps" << std::endl;
1085 
1086  if (useTimers) {
1087  timer = Teuchos::null;
1088  timer = rcp(new Teuchos::TimeMonitor(
1089  *Teuchos::TimeMonitor::getNewTimer("RSF buildVectorMaps")));
1090  }
1091 
1092  // Build domain map that corresponds to distribution
1093  Teuchos::Array<global_ordinal_type> vectorSet;
1094  for (global_ordinal_type i = 0;
1095  i < static_cast<global_ordinal_type>(nCol); i++)
1096  if (dist->VecMine(i)) vectorSet.push_back(i);
1097 
1098  dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1099  Teuchos::RCP<const Tpetra::Map<> > domainMap =
1100  Teuchos::rcp(new Tpetra::Map<>(dummy, vectorSet(), 0, comm));
1101 
1102  Teuchos::Array<global_ordinal_type>().swap(vectorSet);
1103 
1104  // Build range map that corresponds to distribution
1105  for (global_ordinal_type i = 0;
1106  i < static_cast<global_ordinal_type>(nRow); i++)
1107  if (dist->VecMine(i)) vectorSet.push_back(i);
1108 
1109  dummy = Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid();
1110  Teuchos::RCP<const Tpetra::Map<> > rangeMap =
1111  Teuchos::rcp(new Tpetra::Map<>(dummy, vectorSet(), 0, comm));
1112 
1113  Teuchos::Array<global_ordinal_type>().swap(vectorSet);
1114 
1115  // FillComplete the matrix
1116  if (useTimers) {
1117  timer = Teuchos::null;
1118  timer = rcp(new Teuchos::TimeMonitor(
1119  *Teuchos::TimeMonitor::getNewTimer("RSF fillComplete")));
1120  }
1121 
1122  if (verbose && me == 0)
1123  std::cout << "Calling fillComplete" << std::endl;
1124 
1125  A->fillComplete(domainMap, rangeMap);
1126 
1127  if (useTimers)
1128  timer = Teuchos::null;
1129 
1130  if (verbose) {
1131  std::cout << "\nRank " << A->getComm()->getRank()
1132  << " nRows " << A->getLocalNumRows()
1133  << " minRow " << A->getRowMap()->getMinGlobalIndex()
1134  << " maxRow " << A->getRowMap()->getMaxGlobalIndex()
1135  << "\nRank " << A->getComm()->getRank()
1136  << " nCols " << A->getLocalNumCols()
1137  << " minCol " << A->getColMap()->getMinGlobalIndex()
1138  << " maxCol " << A->getColMap()->getMaxGlobalIndex()
1139  << "\nRank " << A->getComm()->getRank()
1140  << " nDomain " << A->getDomainMap()->getLocalNumElements()
1141  << " minDomain " << A->getDomainMap()->getMinGlobalIndex()
1142  << " maxDomain " << A->getDomainMap()->getMaxGlobalIndex()
1143  << "\nRank " << A->getComm()->getRank()
1144  << " nRange " << A->getRangeMap()->getLocalNumElements()
1145  << " minRange " << A->getRangeMap()->getMinGlobalIndex()
1146  << " maxRange " << A->getRangeMap()->getMaxGlobalIndex()
1147  << "\nRank " << A->getComm()->getRank()
1148  << " nEntries " << A->getLocalNumEntries()
1149  << std::endl;
1150  }
1151  }
1152 
1153  return A;
1154 }
1155 
1156 
1157 
1158 #endif
1159 
A parallel distribution of indices over processes.