Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
largestComponent2Binary.cpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Zoltan2: A package of combinatorial algorithms for scientific computing
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Seher Acer (sacer@sandia.gov)
39 // Erik Boman (egboman@sandia.gov)
40 // Siva Rajamanickam (srajama@sandia.gov)
41 // ***********************************************************************
42 //
43 // @HEADER
44 //
46 // Written by Seher Acer, 2019
47 // This code reads a mtx file and writes a binary file in CRS format:
48 // [nrows nnzs rowPtr[0 nrows] colInd[0 nnzs]
49 // 1) It symmetrizes the input, regardless of it is already symmetric or not
50 // 2) It removes the diagonal entries
51 // 3) The column indices are sorted in increasing order for each row
52 // 4) Index base for the output file is 0
54 // The corresponding file reader exists in "readMatrixFromBinaryFile.hpp"
55 // Note: This is research code. We do not guarantee it works in all cases.
57 
58 #include <iostream>
59 #include <fstream>
60 #include <sstream>
61 #include <set>
62 #include <vector>
63 #include <algorithm>
64 #include <queue>
65 
66 typedef long long ord_type;
67 
69  ord_type inn, ord_type *inRowPtr, ord_type *inColInd,
70  ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)
71 {
72 
73  std::vector<int> mark(inn, -1);
74  std::queue<ord_type> active;
75  ord_type i, v, lastSearch, root, popCnt = 0, ncc = 0;
76 
77  root = seed;
78  lastSearch = 0;
79 
80  while(popCnt < inn) {
81  active.push(root);
82  mark[root] = ncc;
83 
84  while(active.empty() == false) {
85 
86  i = active.front();
87  active.pop();
88  popCnt ++;
89 
90  for(ord_type j = inRowPtr[i]; j < inRowPtr[i+1]; ++j) {
91  v = inColInd[j];
92  if(mark[v] == -1) {
93  mark[v] = ncc;
94  active.push(v);
95  }
96  }
97  }
98 
99  //search for an unmarked vertex to be the next root
100  for(ord_type i_r = lastSearch; i_r < inn; ++i_r) {
101  if(mark[i_r] == -1) {
102  root = i_r;
103  lastSearch = i_r;
104  break;
105  }
106  }
107 
108  //increase component id
109  ncc++;
110  }
111 
112  // find component sizes
113  std::vector<ord_type> compSize(ncc, 0);
114  for(ord_type i_c = 0; i_c < inn; ++i_c) {
115  if(mark[i_c] == -1) {
116  std::cout << "Vertex " << i_c << " is untouched,, Exiting!\n";
117  exit(1);
118  }
119  else
120  compSize[mark[i_c]]++;
121  }
122 
123  // find the largest component
124  ord_type maxSize = 0;
125  ord_type maxId = -1;
126  for(ord_type i_l = 0; i_l < ncc; ++i_l) {
127  if(compSize[i_l] > maxSize) {
128  maxSize = compSize[i_l];
129  maxId = i_l;
130  }
131  }
132 
133  // renumber the vertices
134  outn = 0;
135  for(ord_type i_v = 0; i_v < inn; ++i_v) {
136  if(mark[i_v] == maxId)
137  mark[i_v] = outn++;
138  else
139  mark[i_v] = -1;
140  }
141 
142  if(outn != maxSize) {
143  std::cout << "The number of vertices in this component: " << outn << " does not match the maximum component size: " << maxSize << "\n";
144  exit(1);
145  }
146 
147  // write the largest component to the output arrays
148  ord_type ptr = 0;
149  outn = 0;
150  outRowPtr[outn] = 0;
151  for(ord_type ii = 0; ii < inn; ii++) {
152  if(mark[ii] > -1){
153  for(ord_type j = inRowPtr[ii]; j < inRowPtr[ii+1]; ++j) {
154  v = inColInd[j];
155  if(mark[v] == -1) {
156  std::cout << "Neighbor " << v << " of " << ii << " is found to be absent in the component\n";
157  exit(1);
158  }
159  outColInd[ptr++] = mark[v];
160  }
161  outn++;
162  outRowPtr[outn] = ptr;
163  }
164  }
165 
166  if(outn != maxSize) {
167  std::cout << "The number of vertices written: " << outn << " does not match the maximum component size: " << maxSize << "\n";
168  exit(1);
169  }
170 
171 }
172 
173 int main(int argc, char* argv[])
174 {
175 
176  std::string line;
177  ord_type nrows, nnzs, r, c = 0;
178 
179  std::ifstream in(argv[1]);
180 
181  do{
182  getline(in, line);
183  }
184  while(line[0] == '%');
185 
186  std::stringstream ss1(line);
187  ss1 >> nrows >> nrows >> nnzs;
188  std::cout << "Number of Rows " << nrows << " Number of Columns " << nrows << std::endl;
189  std::vector<bool> diag(nrows, false);
190 
191  ord_type *rowPtr = new ord_type[nrows+2];
192  for(ord_type i = 0; i < nrows+2; i++)
193  rowPtr[i] = 0;
194 
195  while(getline(in, line)) {
196 
197  std::stringstream ss2(line);
198  ss2 >> r >> c;
199  r--;
200  c--;
201 
202  if(r != c) {
203  rowPtr[r+2]++;
204  rowPtr[c+2]++;
205  }
206  }
207  in.close();
208 
209  for(ord_type j0 = 0; j0 < nrows; j0++)
210  rowPtr[j0+2]++;
211 
212 
213  for(ord_type k = 2; k < nrows+2; k++)
214  rowPtr[k] += rowPtr[k-1];
215 
216  ord_type *colInd = new ord_type[rowPtr[nrows+1]];
217 
218  // re-read from the beginning, skip the intro
219  in.open(argv[1]);
220  do {
221  getline(in, line);
222  }
223  while(line[0] == '%');
224 
225 
226  while(getline(in, line)) {
227 
228  std::stringstream ss2(line);
229  ss2 >> r >> c;
230  r--;
231  c--;
232 
233  if(r != c) {
234  colInd[rowPtr[r+1]++] = c;
235  colInd[rowPtr[c+1]++] = r;
236  }
237 
238  }
239  in.close();
240 
241 
242  for(ord_type i0 = 0; i0 < nrows; i0++)
243  colInd[rowPtr[i0+1]++] = i0;
244 
245 
246  ord_type *rowPtrNew = new ord_type[nrows+1];
247  ord_type *colIndNew = new ord_type[rowPtr[nrows+1]];
248 
249  rowPtrNew[0] = 0;
250  ord_type ptr = 0;
251  ord_type prev = -1;
252  for(ord_type i1 = 0; i1 < nrows; i1++) {
253 
254  ord_type deg = rowPtr[i1+1] - rowPtr[i1];
255  if(deg > 0) {
256 
257  std::sort(&colInd[rowPtr[i1]], &colInd[rowPtr[i1+1]]);
258  colIndNew[ptr++] = colInd[rowPtr[i1]];
259  prev = colInd[rowPtr[i1]];
260  for(ord_type j1 = rowPtr[i1]+1; j1 < rowPtr[i1+1]; j1++) {
261  if(colInd[j1] != prev) {
262  colIndNew[ptr++] = colInd[j1];
263  prev = colInd[j1];
264  }
265  }
266  }
267 
268  rowPtrNew[i1+1] = ptr;
269  }
270 
271  for(ord_type i2 = 0; i2 < nrows; i2++) {
272  for(ord_type j2 = rowPtrNew[i2]; j2 < rowPtrNew[i2+1]; ++j2)
273  if(colIndNew[j2] == i2)
274  diag[i2] = true;
275 
276  if(diag[i2] == false)
277  std::cout << "ROW " << i2 << " misses diagonal\n";
278  }
279 
280  std::cout << argv[1] << " " << nrows << " " << ptr << " ";
281 
282  ord_type newnrows = -1;
283  findLargestComponent(0, //seed
284  nrows, rowPtrNew, colIndNew,
285  newnrows, rowPtr, colInd );
286  ptr = rowPtr[newnrows]; //new number of nonzeros
287 
288  ord_type deg, max = 0;
289  for(ord_type i3 = 0; i3 < nrows; i3++) {
290  deg = rowPtrNew[i3+1] - rowPtrNew[i3];
291  if(deg > max)
292  max = deg;
293  }
294 
295  // write into the output file
296  std::ofstream out(argv[2], std::ios::out | std::ios::binary);
297  out.write((char*)&newnrows, sizeof(ord_type));
298  out.write((char*)&ptr, sizeof(ord_type));
299 
300  out.write((char*)rowPtr, sizeof(ord_type)*(newnrows+1));
301  out.write((char*)colInd, sizeof(ord_type)*(ptr));
302 
303  out.close();
304 
305  std::cout << newnrows << " " << ptr << " " << max << "\n";
306 
307  delete [] rowPtr;
308  delete [] colInd;
309 
310  delete [] rowPtrNew;
311  delete [] colIndNew;
312 
313  return 0;
314 }
int main(int narg, char **arg)
Definition: coloring1.cpp:199
long long ord_type
tuple root
Definition: validXML.py:24
void findLargestComponent(ord_type seed, ord_type inn, ord_type *inRowPtr, ord_type *inColInd, ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)