Amesos Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
amesos_btf_l_maxtrans.c
Go to the documentation of this file.
1 /* ========================================================================== */
2 /* === BTF_MAXTRANS ========================================================= */
3 /* ========================================================================== */
4 
5 /* Finds a column permutation that maximizes the number of entries on the
6  * diagonal of a sparse matrix. See btf.h for more information.
7  *
8  * This function is identical to cs_maxtrans in CSparse, with the following
9  * exceptions:
10  *
11  * (1) cs_maxtrans finds both jmatch and imatch, where jmatch [i] = j and
12  * imatch [j] = i if row i is matched to column j. This function returns
13  * just jmatch (the Match array). The MATLAB interface to cs_maxtrans
14  * (the single-output cs_dmperm) returns imatch, not jmatch to the MATLAB
15  * caller.
16  *
17  * (2) cs_maxtrans includes a pre-pass that counts the number of non-empty
18  * rows and columns (m2 and n2, respectively), and computes the matching
19  * using the transpose of A if m2 < n2. cs_maxtrans also returns quickly
20  * if the diagonal of the matrix is already zero-free. This pre-pass
21  * allows cs_maxtrans to be much faster than maxtrans, if the use of the
22  * transpose is warranted.
23  *
24  * However, for square structurally non-singular matrices with one or more
25  * zeros on the diagonal, the pre-pass is a waste of time, and for these
26  * matrices, maxtrans can be twice as fast as cs_maxtrans. Since the
27  * maxtrans function is intended primarily for square matrices that are
28  * typically structurally nonsingular, the pre-pass is not included here.
29  * If this maxtrans function is used on a matrix with many more columns
30  * than rows, consider passing the transpose to this function, or use
31  * cs_maxtrans instead.
32  *
33  * (3) cs_maxtrans can operate as a randomized algorithm, to help avoid
34  * rare cases of excessive run-time.
35  *
36  * (4) this maxtrans function includes an option that limits the total work
37  * performed. If this limit is reached, the maximum transveral might not
38  * be found.
39  *
40  * Thus, for general usage, cs_maxtrans is preferred. For square matrices that
41  * are typically structurally non-singular, maxtrans is preferred. A partial
42  * maxtrans can still be very useful when solving a sparse linear system.
43  *
44  * Copyright (c) 2004-2007. Tim Davis, University of Florida,
45  * with support from Sandia National Laboratories. All Rights Reserved.
46  */
47 
48 /* This file should make the long int version of BTF */
49 #define DLONG 1
50 
51 #include "amesos_btf_decl.h"
52 #include "amesos_btf_internal.h"
53 
54 
55 /* ========================================================================== */
56 /* === augment ============================================================== */
57 /* ========================================================================== */
58 
59 /* Perform a depth-first-search starting at column k, to find an augmenting
60  * path. An augmenting path is a sequence of row/column pairs (i1,k), (i2,j1),
61  * (i3,j2), ..., (i(s+1), js), such that all of the following properties hold:
62  *
63  * * column k is not matched to any row
64  * * entries in the path are nonzero
65  * * the pairs (i1,j1), (i2,j2), (i3,j3) ..., (is,js) have been
66  * previously matched to each other
67  * * (i(s+1), js) is nonzero, and row i(s+1) is not matched to any column
68  *
69  * Once this path is found, the matching can be changed to the set of pairs
70  * path. An augmenting path is a sequence of row/column pairs
71  *
72  * (i1,k), (i2,j1), (i3,j2), ..., (i(s+1), js)
73  *
74  * Once a row is matched with a column it remains matched with some column, but
75  * not necessarily the column it was first matched with.
76  *
77  * In the worst case, this function can examine every nonzero in A. Since it
78  * is called n times by maxtrans, the total time of maxtrans can be as high as
79  * O(n*nnz(A)). To limit this work, pass a value of maxwork > 0. Then at
80  * most O((maxwork+1)*nnz(A)) work will be performed; the maximum matching might
81  * not be found, however.
82  *
83  * This routine is very similar to the dfs routine in klu_kernel.c, in the
84  * KLU sparse LU factorization package. It is essentially identical to the
85  * cs_augment routine in CSparse, and its recursive version (augment function
86  * in cs_maxtransr_mex.c), except that this routine allows for the search to be
87  * terminated early if too much work is being performed.
88  *
89  * The algorithm is based on the paper "On Algorithms for obtaining a maximum
90  * transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1,
91  * pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal",
92  * same issue, pp. 387-390. The code here is a new implementation of that
93  * algorithm, with different data structures and control flow. After writing
94  * this code, I carefully compared my algorithm with MC21A/B (ACM Algorithm 575)
95  * Some of the comparisons are partial because I didn't dig deeply into all of
96  * the details of MC21A/B, such as how the stack is maintained. The following
97  * arguments are essentially identical between this code and MC21A:
98  *
99  * maxtrans MC21A,B
100  * -------- -------
101  * n N identical
102  * k JORD identical
103  * Ap IP column / row pointers
104  * Ai ICN row / column indices
105  * Ap[n] LICN length of index array (# of nonzeros in A)
106  * Match IPERM output column / row permutation
107  * nmatch NUMNZ # of nonzeros on diagonal of permuted matrix
108  * Flag CV mark a node as visited by the depth-first-search
109  *
110  * The following are different, but analogous:
111  *
112  * Cheap ARP indicates what part of the a column / row has
113  * already been matched.
114  *
115  * The following arguments are very different:
116  *
117  * - LENR # of entries in each row/column (unused in maxtrans)
118  * Pstack OUT Pstack keeps track of where we are in the depth-
119  * first-search scan of column j. I think that OUT
120  * plays a similar role in MC21B, but I'm unsure.
121  * Istack PR keeps track of the rows in the path. PR is a link
122  * list, though, whereas Istack is a stack. Maxtrans
123  * does not use any link lists.
124  * Jstack OUT? PR? the stack for nodes in the path (unsure)
125  *
126  * The following control structures are roughly comparable:
127  *
128  * maxtrans MC21B
129  * -------- -----
130  * for (k = 0 ; k < n ; k++) DO 100 JORD=1,N
131  * while (head >= 0) DO 70 K=1,JORD
132  * for (p = Cheap [j] ; ...) DO 20 II=IN1,IN2
133  * for (p = head ; ...) DO 90 K=1,JORD
134  */
135 
136 static Int amesos_augment
137 (
138  Int k, /* which stage of the main loop we're in */
139  Int Ap [ ], /* column pointers, size n+1 */
140  Int Ai [ ], /* row indices, size nz = Ap [n] */
141  Int Match [ ], /* size n, Match [i] = j if col j matched to i */
142  Int Cheap [ ], /* rows Ai [Ap [j] .. Cheap [j]-1] alread matched */
143  Int Flag [ ], /* Flag [j] = k if j already visited this stage */
144  Int Istack [ ], /* size n. Row index stack. */
145  Int Jstack [ ], /* size n. Column index stack. */
146  Int Pstack [ ], /* size n. Keeps track of position in adjacency list */
147  double *work, /* work performed by the depth-first-search */
148  double maxwork /* maximum work allowed */
149 )
150 {
151  /* local variables, but "global" to all DFS levels: */
152  Int found ; /* true if match found. */
153  Int head ; /* top of stack */
154 
155  /* variables that are purely local to any one DFS level: */
156  Int j2 ; /* the next DFS goes to node j2 */
157  Int pend ; /* one past the end of the adjacency list for node j */
158  Int pstart ;
159  Int quick ;
160 
161  /* variables that need to be pushed then popped from the stack: */
162  Int i ; /* the row tentatively matched to i if DFS successful */
163  Int j ; /* the DFS is at the current node j */
164  Int p ; /* current index into the adj. list for node j */
165  /* the variables i, j, and p are stacked in Istack, Jstack, and Pstack */
166 
167  quick = (maxwork > 0) ;
168 
169  /* start a DFS to find a match for column k */
170  found = FALSE ;
171  i = EMPTY ;
172  head = 0 ;
173  Jstack [0] = k ;
174  ASSERT (Flag [k] != k) ;
175 
176  while (head >= 0)
177  {
178  j = Jstack [head] ;
179  pend = Ap [j+1] ;
180 
181  if (Flag [j] != k) /* a node is not yet visited */
182  {
183 
184  /* -------------------------------------------------------------- */
185  /* prework for node j */
186  /* -------------------------------------------------------------- */
187 
188  /* first time that j has been visited */
189  Flag [j] = k ;
190  /* cheap assignment: find the next unmatched row in col j. This
191  * loop takes at most O(nnz(A)) time for the sum total of all
192  * calls to augment. */
193  for (p = Cheap [j] ; p < pend && !found ; p++)
194  {
195  i = Ai [p] ;
196  found = (Match [i] == EMPTY) ;
197  }
198  Cheap [j] = p ;
199 
200  /* -------------------------------------------------------------- */
201 
202  /* prepare for DFS */
203  if (found)
204  {
205  /* end of augmenting path, column j matched with row i */
206  Istack [head] = i ;
207  break ;
208  }
209  /* set Pstack [head] to the first entry in column j to scan */
210  Pstack [head] = Ap [j] ;
211  }
212 
213  /* ------------------------------------------------------------------ */
214  /* quick return if too much work done */
215  /* ------------------------------------------------------------------ */
216 
217  if (quick && *work > maxwork)
218  {
219  /* too much work has been performed; abort the search */
220  return (EMPTY) ;
221  }
222 
223  /* ------------------------------------------------------------------ */
224  /* DFS for nodes adjacent to j */
225  /* ------------------------------------------------------------------ */
226 
227  /* If cheap assignment not made, continue the depth-first search. All
228  * rows in column j are already matched. Add the adjacent nodes to the
229  * stack by iterating through until finding another non-visited node.
230  *
231  * It is the following loop that can force maxtrans to take
232  * O(n*nnz(A)) time. */
233 
234  pstart = Pstack [head] ;
235  for (p = pstart ; p < pend ; p++)
236  {
237  i = Ai [p] ;
238  j2 = Match [i] ;
239  ASSERT (j2 != EMPTY) ;
240  if (Flag [j2] != k)
241  {
242  /* Node j2 is not yet visited, start a depth-first search on
243  * node j2. Keep track of where we left off in the scan of adj
244  * list of node j so we can restart j where we left off. */
245  Pstack [head] = p + 1 ;
246  /* Push j2 onto the stack and immediately break so we can
247  * recurse on node j2. Also keep track of row i which (if this
248  * search for an augmenting path works) will be matched with the
249  * current node j. */
250  Istack [head] = i ;
251  Jstack [++head] = j2 ;
252  break ;
253  }
254  }
255 
256  /* ------------------------------------------------------------------ */
257  /* determine how much work was just performed */
258  /* ------------------------------------------------------------------ */
259 
260  *work += (p - pstart + 1) ;
261 
262  /* ------------------------------------------------------------------ */
263  /* node j is done, but the postwork is postponed - see below */
264  /* ------------------------------------------------------------------ */
265 
266  if (p == pend)
267  {
268  /* If all adjacent nodes of j are already visited, pop j from
269  * stack and continue. We failed to find a match. */
270  head-- ;
271  }
272  }
273 
274  /* postwork for all nodes j in the stack */
275  /* unwind the path and make the corresponding matches */
276  if (found)
277  {
278  for (p = head ; p >= 0 ; p--)
279  {
280  j = Jstack [p] ;
281  i = Istack [p] ;
282 
283  /* -------------------------------------------------------------- */
284  /* postwork for node j */
285  /* -------------------------------------------------------------- */
286  /* if found, match row i with column j */
287  Match [i] = j ;
288  }
289  }
290  return (found) ;
291 }
292 
293 
294 /* ========================================================================== */
295 /* === maxtrans ============================================================= */
296 /* ========================================================================== */
297 
298 Int BTF(maxtrans) /* returns # of columns in the matching */
299 (
300  /* --- input --- */
301  Int nrow, /* A is nrow-by-ncol in compressed column form */
302  Int ncol,
303  Int Ap [ ], /* size ncol+1 */
304  Int Ai [ ], /* size nz = Ap [ncol] */
305  double maxwork, /* do at most maxwork*nnz(A) work; no limit if <= 0. This
306  * work limit excludes the O(nnz(A)) cheap-match phase. */
307 
308  /* --- output --- */
309  double *work, /* work = -1 if maxwork > 0 and the total work performed
310  * reached the maximum of maxwork*nnz(A)).
311  * Otherwise, work = the total work performed. */
312 
313  Int Match [ ], /* size nrow. Match [i] = j if column j matched to row i */
314 
315  /* --- workspace --- */
316  Int Work [ ] /* size 5*ncol */
317 )
318 {
319  Int *Cheap, *Flag, *Istack, *Jstack, *Pstack ;
320  Int i, j, k, nmatch, work_limit_reached, result ;
321 
322  /* ---------------------------------------------------------------------- */
323  /* get workspace and initialize */
324  /* ---------------------------------------------------------------------- */
325 
326  Cheap = Work ; Work += ncol ;
327  Flag = Work ; Work += ncol ;
328 
329  /* stack for non-recursive depth-first search in augment function */
330  Istack = Work ; Work += ncol ;
331  Jstack = Work ; Work += ncol ;
332  Pstack = Work ;
333 
334  /* in column j, rows Ai [Ap [j] .. Cheap [j]-1] are known to be matched */
335  for (j = 0 ; j < ncol ; j++)
336  {
337  Cheap [j] = Ap [j] ;
338  Flag [j] = EMPTY ;
339  }
340 
341  /* all rows and columns are currently unmatched */
342  for (i = 0 ; i < nrow ; i++)
343  {
344  Match [i] = EMPTY ;
345  }
346 
347  if (maxwork > 0)
348  {
349  maxwork *= Ap [ncol] ;
350  }
351  *work = 0 ;
352 
353  /* ---------------------------------------------------------------------- */
354  /* find a matching row for each column k */
355  /* ---------------------------------------------------------------------- */
356 
357  nmatch = 0 ;
358  work_limit_reached = FALSE ;
359  for (k = 0 ; k < ncol ; k++)
360  {
361  /* find an augmenting path to match some row i to column k */
362  result = amesos_augment (k, Ap, Ai, Match, Cheap, Flag, Istack, Jstack, Pstack,
363  work, maxwork) ;
364  if (result == TRUE)
365  {
366  /* we found it. Match [i] = k for some row i has been done. */
367  nmatch++ ;
368  }
369  else if (result == EMPTY)
370  {
371  /* augment gave up because of too much work, and no match found */
372  work_limit_reached = TRUE ;
373  }
374  }
375 
376  /* ---------------------------------------------------------------------- */
377  /* return the Match, and the # of matches made */
378  /* ---------------------------------------------------------------------- */
379 
380  /* At this point, row i is matched to j = Match [i] if j >= 0. i is an
381  * unmatched row if Match [i] == EMPTY. */
382 
383  if (work_limit_reached)
384  {
385  /* return -1 if the work limit of maxwork*nnz(A) was reached */
386  *work = EMPTY ;
387  }
388 
389  return (nmatch) ;
390 }
#define EMPTY
#define Int
#define FALSE
#define ASSERT(expression)
static Int amesos_augment(Int k, Int Ap[], Int Ai[], Int Match[], Int Cheap[], Int Flag[], Int Istack[], Int Jstack[], Int Pstack[], double *work, double maxwork)
Int BTF(maxtrans)
#define TRUE