Zoltan2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
largestComponent2Binary.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Zoltan2: A package of combinatorial algorithms for scientific computing
4 //
5 // Copyright 2012 NTESS and the Zoltan2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 //
12 // Written by Seher Acer, 2019
13 // This code reads a mtx file and writes a binary file in CRS format:
14 // [nrows nnzs rowPtr[0 nrows] colInd[0 nnzs]
15 // 1) It symmetrizes the input, regardless of it is already symmetric or not
16 // 2) It removes the diagonal entries
17 // 3) The column indices are sorted in increasing order for each row
18 // 4) Index base for the output file is 0
20 // The corresponding file reader exists in "readMatrixFromBinaryFile.hpp"
21 // Note: This is research code. We do not guarantee it works in all cases.
23 
24 #include <iostream>
25 #include <fstream>
26 #include <sstream>
27 #include <set>
28 #include <vector>
29 #include <algorithm>
30 #include <queue>
31 
32 typedef long long ord_type;
33 
35  ord_type inn, ord_type *inRowPtr, ord_type *inColInd,
36  ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)
37 {
38 
39  std::vector<int> mark(inn, -1);
40  std::queue<ord_type> active;
41  ord_type i, v, lastSearch, root, popCnt = 0, ncc = 0;
42 
43  root = seed;
44  lastSearch = 0;
45 
46  while(popCnt < inn) {
47  active.push(root);
48  mark[root] = ncc;
49 
50  while(active.empty() == false) {
51 
52  i = active.front();
53  active.pop();
54  popCnt ++;
55 
56  for(ord_type j = inRowPtr[i]; j < inRowPtr[i+1]; ++j) {
57  v = inColInd[j];
58  if(mark[v] == -1) {
59  mark[v] = ncc;
60  active.push(v);
61  }
62  }
63  }
64 
65  //search for an unmarked vertex to be the next root
66  for(ord_type i_r = lastSearch; i_r < inn; ++i_r) {
67  if(mark[i_r] == -1) {
68  root = i_r;
69  lastSearch = i_r;
70  break;
71  }
72  }
73 
74  //increase component id
75  ncc++;
76  }
77 
78  // find component sizes
79  std::vector<ord_type> compSize(ncc, 0);
80  for(ord_type i_c = 0; i_c < inn; ++i_c) {
81  if(mark[i_c] == -1) {
82  std::cout << "Vertex " << i_c << " is untouched,, Exiting!\n";
83  exit(1);
84  }
85  else
86  compSize[mark[i_c]]++;
87  }
88 
89  // find the largest component
90  ord_type maxSize = 0;
91  ord_type maxId = -1;
92  for(ord_type i_l = 0; i_l < ncc; ++i_l) {
93  if(compSize[i_l] > maxSize) {
94  maxSize = compSize[i_l];
95  maxId = i_l;
96  }
97  }
98 
99  // renumber the vertices
100  outn = 0;
101  for(ord_type i_v = 0; i_v < inn; ++i_v) {
102  if(mark[i_v] == maxId)
103  mark[i_v] = outn++;
104  else
105  mark[i_v] = -1;
106  }
107 
108  if(outn != maxSize) {
109  std::cout << "The number of vertices in this component: " << outn << " does not match the maximum component size: " << maxSize << "\n";
110  exit(1);
111  }
112 
113  // write the largest component to the output arrays
114  ord_type ptr = 0;
115  outn = 0;
116  outRowPtr[outn] = 0;
117  for(ord_type ii = 0; ii < inn; ii++) {
118  if(mark[ii] > -1){
119  for(ord_type j = inRowPtr[ii]; j < inRowPtr[ii+1]; ++j) {
120  v = inColInd[j];
121  if(mark[v] == -1) {
122  std::cout << "Neighbor " << v << " of " << ii << " is found to be absent in the component\n";
123  exit(1);
124  }
125  outColInd[ptr++] = mark[v];
126  }
127  outn++;
128  outRowPtr[outn] = ptr;
129  }
130  }
131 
132  if(outn != maxSize) {
133  std::cout << "The number of vertices written: " << outn << " does not match the maximum component size: " << maxSize << "\n";
134  exit(1);
135  }
136 
137 }
138 
139 int main(int argc, char* argv[])
140 {
141 
142  std::string line;
143  ord_type nrows, nnzs, r, c = 0;
144 
145  std::ifstream in(argv[1]);
146 
147  do{
148  getline(in, line);
149  }
150  while(line[0] == '%');
151 
152  std::stringstream ss1(line);
153  ss1 >> nrows >> nrows >> nnzs;
154  std::cout << "Number of Rows " << nrows << " Number of Columns " << nrows << std::endl;
155  std::vector<bool> diag(nrows, false);
156 
157  ord_type *rowPtr = new ord_type[nrows+2];
158  for(ord_type i = 0; i < nrows+2; i++)
159  rowPtr[i] = 0;
160 
161  while(getline(in, line)) {
162 
163  std::stringstream ss2(line);
164  ss2 >> r >> c;
165  r--;
166  c--;
167 
168  if(r != c) {
169  rowPtr[r+2]++;
170  rowPtr[c+2]++;
171  }
172  }
173  in.close();
174 
175  for(ord_type j0 = 0; j0 < nrows; j0++)
176  rowPtr[j0+2]++;
177 
178 
179  for(ord_type k = 2; k < nrows+2; k++)
180  rowPtr[k] += rowPtr[k-1];
181 
182  ord_type *colInd = new ord_type[rowPtr[nrows+1]];
183 
184  // re-read from the beginning, skip the intro
185  in.open(argv[1]);
186  do {
187  getline(in, line);
188  }
189  while(line[0] == '%');
190 
191 
192  while(getline(in, line)) {
193 
194  std::stringstream ss2(line);
195  ss2 >> r >> c;
196  r--;
197  c--;
198 
199  if(r != c) {
200  colInd[rowPtr[r+1]++] = c;
201  colInd[rowPtr[c+1]++] = r;
202  }
203 
204  }
205  in.close();
206 
207 
208  for(ord_type i0 = 0; i0 < nrows; i0++)
209  colInd[rowPtr[i0+1]++] = i0;
210 
211 
212  ord_type *rowPtrNew = new ord_type[nrows+1];
213  ord_type *colIndNew = new ord_type[rowPtr[nrows+1]];
214 
215  rowPtrNew[0] = 0;
216  ord_type ptr = 0;
217  ord_type prev = -1;
218  for(ord_type i1 = 0; i1 < nrows; i1++) {
219 
220  ord_type deg = rowPtr[i1+1] - rowPtr[i1];
221  if(deg > 0) {
222 
223  std::sort(&colInd[rowPtr[i1]], &colInd[rowPtr[i1+1]]);
224  colIndNew[ptr++] = colInd[rowPtr[i1]];
225  prev = colInd[rowPtr[i1]];
226  for(ord_type j1 = rowPtr[i1]+1; j1 < rowPtr[i1+1]; j1++) {
227  if(colInd[j1] != prev) {
228  colIndNew[ptr++] = colInd[j1];
229  prev = colInd[j1];
230  }
231  }
232  }
233 
234  rowPtrNew[i1+1] = ptr;
235  }
236 
237  for(ord_type i2 = 0; i2 < nrows; i2++) {
238  for(ord_type j2 = rowPtrNew[i2]; j2 < rowPtrNew[i2+1]; ++j2)
239  if(colIndNew[j2] == i2)
240  diag[i2] = true;
241 
242  if(diag[i2] == false)
243  std::cout << "ROW " << i2 << " misses diagonal\n";
244  }
245 
246  std::cout << argv[1] << " " << nrows << " " << ptr << " ";
247 
248  ord_type newnrows = -1;
249  findLargestComponent(0, //seed
250  nrows, rowPtrNew, colIndNew,
251  newnrows, rowPtr, colInd );
252  ptr = rowPtr[newnrows]; //new number of nonzeros
253 
254  ord_type deg, max = 0;
255  for(ord_type i3 = 0; i3 < nrows; i3++) {
256  deg = rowPtrNew[i3+1] - rowPtrNew[i3];
257  if(deg > max)
258  max = deg;
259  }
260 
261  // write into the output file
262  std::ofstream out(argv[2], std::ios::out | std::ios::binary);
263  out.write((char*)&newnrows, sizeof(ord_type));
264  out.write((char*)&ptr, sizeof(ord_type));
265 
266  out.write((char*)rowPtr, sizeof(ord_type)*(newnrows+1));
267  out.write((char*)colInd, sizeof(ord_type)*(ptr));
268 
269  out.close();
270 
271  std::cout << newnrows << " " << ptr << " " << max << "\n";
272 
273  delete [] rowPtr;
274  delete [] colInd;
275 
276  delete [] rowPtrNew;
277  delete [] colIndNew;
278 
279  return 0;
280 }
int main(int narg, char **arg)
Definition: coloring1.cpp:164
long long ord_type
tuple root
Definition: validXML.py:24
void findLargestComponent(ord_type seed, ord_type inn, ord_type *inRowPtr, ord_type *inColInd, ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)