Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
mypdsaup2.f
1 c @HEADER
2 c *****************************************************************************
3 c Anasazi: Block Eigensolvers Package
4 c
5 c Copyright 2004 NTESS and the Anasazi contributors.
6 c SPDX-License-Identifier: BSD-3-Clause
7 c *****************************************************************************
8 c @HEADER
9 
10 c This software is a result of the research described in the report
11 c
12 c "A comparison of algorithms for modal analysis in the absence
13 c of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
14 c Sandia National Laboratories, Technical report SAND2003-1028J.
15 c
16 c It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
17 c framework ( http://trilinos.org/ ).
18 c-----------------------------------------------------------------------
19 c\BeginDoc
20 c
21 c\Name: mypdsaup2
22 c
23 c Message Passing Layer: MPI
24 c
25 c\Description:
26 c Intermediate level interface called by pdsaupd.
27 c
28 c\Usage:
29 c call pdsaup2
30 c ( COMM, IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD,
31 c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, Q, LDQ, WORKL,
32 c IPNTR, WORKD, INFO, VERBOSE, USRCHK, UNCONV )
33 c
34 c\Arguments
35 c
36 c COMM, IDO, BMAT, N, WHICH, NEV, TOL, RESID: same as defined in pdsaupd.
37 c MODE, ISHIFT, MXITER: see the definition of IPARAM in pdsaupd.
38 c
39 c NP Integer. (INPUT/OUTPUT)
40 c Contains the number of implicit shifts to apply during
41 c each Arnoldi/Lanczos iteration.
42 c If ISHIFT=1, NP is adjusted dynamically at each iteration
43 c to accelerate convergence and prevent stagnation.
44 c This is also roughly equal to the number of matrix-vector
45 c products (involving the operator OP) per Arnoldi iteration.
46 c The logic for adjusting is contained within the current
47 c subroutine.
48 c If ISHIFT=0, NP is the number of shifts the user needs
49 c to provide via reverse comunication. 0 < NP < NCV-NEV.
50 c NP may be less than NCV-NEV since a leading block of the current
51 c upper Tridiagonal matrix has split off and contains "unwanted"
52 c Ritz values.
53 c Upon termination of the IRA iteration, NP contains the number
54 c of "converged" wanted Ritz values.
55 c
56 c IUPD Integer. (INPUT)
57 c IUPD .EQ. 0: use explicit restart instead implicit update.
58 c IUPD .NE. 0: use implicit update.
59 c
60 c V Double precision N by (NEV+NP) array. (INPUT/OUTPUT)
61 c The Lanczos basis vectors.
62 c
63 c LDV Integer. (INPUT)
64 c Leading dimension of V exactly as declared in the calling
65 c program.
66 c
67 c H Double precision (NEV+NP) by 2 array. (OUTPUT)
68 c H is used to store the generated symmetric tridiagonal matrix
69 c The subdiagonal is stored in the first column of H starting
70 c at H(2,1). The main diagonal is stored in the second column
71 c of H starting at H(1,2). If pdsaup2 converges store the
72 c B-norm of the final residual vector in H(1,1).
73 c
74 c LDH Integer. (INPUT)
75 c Leading dimension of H exactly as declared in the calling
76 c program.
77 c
78 c RITZ Double precision array of length NEV+NP. (OUTPUT)
79 c RITZ(1:NEV) contains the computed Ritz values of OP.
80 c
81 c BOUNDS Double precision array of length NEV+NP. (OUTPUT)
82 c BOUNDS(1:NEV) contain the error bounds corresponding to RITZ.
83 c
84 c Q Double precision (NEV+NP) by (NEV+NP) array. (WORKSPACE)
85 c Private (replicated) work array used to accumulate the
86 c rotation in the shift application step.
87 c
88 c LDQ Integer. (INPUT)
89 c Leading dimension of Q exactly as declared in the calling
90 c program.
91 c
92 c WORKL Double precision array of length at least 3*(NEV+NP). (INPUT/WORKSPACE)
93 c Private (replicated) array on each PE or array allocated on
94 c the front end. It is used in the computation of the
95 c tridiagonal eigenvalue problem, the calculation and
96 c application of the shifts and convergence checking.
97 c If ISHIFT .EQ. O and IDO .EQ. 3, the first NP locations
98 c of WORKL are used in reverse communication to hold the user
99 c supplied shifts.
100 c
101 c IPNTR Integer array of length 3. (OUTPUT)
102 c Pointer to mark the starting locations in the WORKD for
103 c vectors used by the Lanczos iteration.
104 c -------------------------------------------------------------
105 c IPNTR(1): pointer to the current operand vector X.
106 c IPNTR(2): pointer to the current result vector Y.
107 c IPNTR(3): pointer to the vector B * X when used in one of
108 c the spectral transformation modes. X is the current
109 c operand.
110 c -------------------------------------------------------------
111 c
112 c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION)
113 c Distributed array to be used in the basic Lanczos iteration
114 c for reverse communication. The user should not use WORKD
115 c as temporary workspace during the iteration !!!!!!!!!!
116 c See Data Distribution Note in pdsaupd.
117 c
118 c INFO Integer. (INPUT/OUTPUT)
119 c If INFO .EQ. 0, a randomly initial residual vector is used.
120 c If INFO .NE. 0, RESID contains the initial residual vector,
121 c possibly from a previous run.
122 c Error flag on output.
123 c = 0: Normal return.
124 c = 1: All possible eigenvalues of OP has been found.
125 c NP returns the size of the invariant subspace
126 c spanning the operator OP.
127 c = 2: No shifts could be applied.
128 c = -8: Error return from trid. eigenvalue calculation;
129 c This should never happen.
130 c = -9: Starting vector is zero.
131 c = -9999: Could not build an Lanczos factorization.
132 c Size that was built in returned in NP.
133 c
134 c VERBOSE Flag to print out the status of computation per iteration (or
135 c equivalently restart).
136 c
137 c USRCHK Integer. (INPUT) A nonzero value implies that reverse
138 c communication is done so that the user can check convergence.
139 c
140 c UNCONV Integer. (INPUT) Only used if USRCHK is not equal to zero.
141 c
142 c
143 c\EndDoc
144 c
145 c-----------------------------------------------------------------------
146 c
147 c\BeginLib
148 c
149 c\References:
150 c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
151 c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
152 c pp 357-385.
153 c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
154 c Restarted Arnoldi Iteration", Rice University Technical Report
155 c TR95-13, Department of Computational and Applied Mathematics.
156 c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall,
157 c 1980.
158 c 4. B.N. Parlett, B. Nour-Omid, "Towards a Black Box Lanczos Program",
159 c Computer Physics Communications, 53 (1989), pp 169-179.
160 c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to
161 c Implement the Spectral Transformation", Math. Comp., 48 (1987),
162 c pp 663-673.
163 c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos
164 c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems",
165 c SIAM J. Matr. Anal. Apps., January (1993).
166 c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines
167 c for Updating the QR decomposition", ACM TOMS, December 1990,
168 c Volume 16 Number 4, pp 369-377.
169 c
170 c\Routines called:
171 c pdgetv0 Parallel ARPACK initial vector generation routine.
172 c pdsaitr Parallel ARPACK Lanczos factorization routine.
173 c pdsapps Parallel ARPACK application of implicit shifts routine.
174 c dsconv ARPACK convergence of Ritz values routine.
175 c pdseigt Parallel ARPACK compute Ritz values and error bounds routine.
176 c pdsgets Parallel ARPACK reorder Ritz values and error bounds routine.
177 c dsortr ARPACK sorting routine.
178 c sstrqb ARPACK routine that computes all eigenvalues and the
179 c last component of the eigenvectors of a symmetric
180 c tridiagonal matrix using the implicit QL or QR method.
181 c pivout Parallel ARPACK utility routine that prints integers.
182 c second ARPACK utility routine for timing.
183 c pdvout Parallel ARPACK utility routine that prints vectors.
184 c pdlamch ScaLAPACK routine that determines machine constants.
185 c dcopy Level 1 BLAS that copies one vector to another.
186 c ddot Level 1 BLAS that computes the scalar product of two vectors.
187 c pdnorm2 Parallel version of Level 1 BLAS that computes the norm of a vector.
188 c dscal Level 1 BLAS that scales a vector.
189 c dswap Level 1 BLAS that swaps two vectors.
190 c
191 c\Author
192 c Danny Sorensen Phuong Vu
193 c Richard Lehoucq CRPC / Rice University
194 c Dept. of Computational & Houston, Texas
195 c Applied Mathematics
196 c Rice University
197 c Houston, Texas
198 c
199 c\Parallel Modifications
200 c Kristi Maschhoff
201 c
202 c\Revision history:
203 c Starting Point: Serial Code FILE: saup2.F SID: 2.4
204 c
205 c\SCCS Information:
206 c FILE: saup2.F SID: 1.5 DATE OF SID: 05/20/98
207 c
208 c\EndLib
209 c
210 c-----------------------------------------------------------------------
211 c
212  subroutine mypdsaup2
213  & ( comm, ido, bmat, n, which, nev, np, tol, resid, mode, iupd,
214  & ishift, mxiter, v, ldv, h, ldh, ritz, bounds,
215  & q, ldq, workl, ipntr, workd, info, verbose, usrchk, unconv )
216 c
217  include 'mpif.h'
218 c
219 c %---------------%
220 c | MPI Variables |
221 c %---------------%
222 c
223  integer comm
224 c
225 c
226 c %----------------------------------------------------%
227 c | Include files for debugging and timing information |
228 c %----------------------------------------------------%
229 c
230  include 'debug.h'
231  include 'stat.h'
232 c
233 c %------------------%
234 c | Scalar Arguments |
235 c %------------------%
236 c
237  character bmat*1, which*2
238  integer ido, info, ishift, iupd, ldh, ldq, ldv, mxiter,
239  & n, mode, nev, np
240  integer verbose, usrchk, unconv
241  double precision
242  & tol
243 c
244 c %-----------------%
245 c | Array Arguments |
246 c %-----------------%
247 c
248  integer ipntr(3)
249  double precision
250  & bounds(nev+np), h(ldh,2), q(ldq,nev+np), resid(n),
251  & ritz(nev+np), v(ldv,nev+np), workd(3*n),
252  & workl(3*(nev+np))
253 c
254 c %------------%
255 c | Parameters |
256 c %------------%
257 c
258  double precision
259  & one, zero
260  parameter(one = 1.0, zero = 0.0)
261 c
262 c %---------------%
263 c | Local Scalars |
264 c %---------------%
265 c
266  character wprime*2
267  logical cnorm, getv0, initv, update, ushift
268  logical cnvchk
269  integer ierr, iter, j, kplusp, msglvl, nconv, nevbef, nev0,
270  & np0, nptemp, nevd2, nevm2, kp(3)
271  double precision
272  & rnorm, temp, eps23
273  save cnorm, getv0, initv, update, ushift,
274  & iter, kplusp, msglvl, nconv, nev0, np0,
275  & rnorm, eps23, cnvchk
276 c
277  double precision
278  & rnorm_buf
279 c
280 c
281 c %----------------------%
282 c | External Subroutines |
283 c %----------------------%
284 c
285  external dcopy, pdgetv0, pdsaitr, dscal, dsconv,
286  & pdseigt, pdsgets, pdsapps,
287  & dsortr, pdvout, pivout, second
288 c
289 c %--------------------%
290 c | External Functions |
291 c %--------------------%
292 c
293  double precision
294  & ddot, pdnorm2, pdlamch
295  external ddot, pdnorm2, pdlamch
296 c
297 c %---------------------%
298 c | Intrinsic Functions |
299 c %---------------------%
300 c
301  intrinsic min, abs, sqrt
302 c
303 c %-----------------------%
304 c | Executable Statements |
305 c %-----------------------%
306 c
307  if (ido .eq. 0) then
308 c
309 c %-------------------------------%
310 c | Initialize timing statistics |
311 c | & message level for debugging |
312 c %-------------------------------%
313 c
314  call second(t0)
315  msglvl = msaup2
316 c
317 c %---------------------------------%
318 c | Set machine dependent constant. |
319 c %---------------------------------%
320 c
321  eps23 = pdlamch(comm, 'Epsilon-Machine')
322  eps23 = eps23**(2.0/3.0)
323 c
324 c %-------------------------------------%
325 c | nev0 and np0 are integer variables |
326 c | hold the initial values of NEV & NP |
327 c %-------------------------------------%
328 c
329  nev0 = nev
330  np0 = np
331 c
332 c %-------------------------------------%
333 c | kplusp is the bound on the largest |
334 c | Lanczos factorization built. |
335 c | nconv is the current number of |
336 c | "converged" eigenvlues. |
337 c | iter is the counter on the current |
338 c | iteration step. |
339 c %-------------------------------------%
340 c
341  kplusp = nev0 + np0
342  nconv = 0
343  iter = 0
344 c
345 c %---------------------------------------------%
346 c | Set flags for computing the first NEV steps |
347 c | of the Lanczos factorization. |
348 c %---------------------------------------------%
349 c
350  getv0 = .true.
351  update = .false.
352  ushift = .false.
353  cnorm = .false.
354  cnvchk = .false.
355 c
356  if (info .ne. 0) then
357 c
358 c %--------------------------------------------%
359 c | User provides the initial residual vector. |
360 c %--------------------------------------------%
361 c
362  initv = .true.
363  info = 0
364  else
365  initv = .false.
366  end if
367  end if
368 c
369 c %---------------------------------------------%
370 c | Get a possibly random starting vector and |
371 c | force it into the range of the operator OP. |
372 c %---------------------------------------------%
373 c
374  10 continue
375 c
376  if (getv0) then
377  call pdgetv0( comm, ido, bmat, 1, initv, n, 1, v, ldv,
378  & resid, rnorm, ipntr, workd, workl, info)
379 c
380  if (ido .ne. 99) go to 9000
381 c
382  if (rnorm .eq. zero) then
383 c
384 c %-----------------------------------------%
385 c | The initial vector is zero. Error exit. |
386 c %-----------------------------------------%
387 c
388  info = -9
389  go to 1200
390  end if
391  getv0 = .false.
392  ido = 0
393  end if
394 c
395 c %------------------------------------------------------------%
396 c | Back from reverse communication: continue with update step |
397 c %------------------------------------------------------------%
398 c
399  if (update) go to 20
400 c
401 c %-------------------------------------------%
402 c | Back from computing user specified shifts |
403 c %-------------------------------------------%
404 c
405  if (ushift) go to 50
406 c
407 c %-------------------------------------%
408 c | Back from computing residual norm |
409 c | at the end of the current iteration |
410 c %-------------------------------------%
411 c
412  if (cnorm) go to 100
413 c
414 c %---------------------------------%
415 c | Back from checking convergence. |
416 c %---------------------------------%
417 c
418  if (cnvchk) go to 110
419 c
420 c %----------------------------------------------------------%
421 c | Compute the first NEV steps of the Lanczos factorization |
422 c %----------------------------------------------------------%
423 c
424  call pdsaitr(comm, ido, bmat, n, 0, nev0, mode,
425  & resid, rnorm, v, ldv, h, ldh, ipntr,
426  & workd, workl, info)
427 c
428 c %---------------------------------------------------%
429 c | ido .ne. 99 implies use of reverse communication |
430 c | to compute operations involving OP and possibly B |
431 c %---------------------------------------------------%
432 c
433  if (ido .ne. 99) go to 9000
434 c
435  if (info .gt. 0) then
436 c
437 c %-----------------------------------------------------%
438 c | pdsaitr was unable to build an Lanczos factorization|
439 c | of length NEV0. INFO is returned with the size of |
440 c | the factorization built. Exit main loop. |
441 c %-----------------------------------------------------%
442 c
443  np = info
444  mxiter = iter
445  info = -9999
446  go to 1200
447  end if
448 c
449 c %--------------------------------------------------------------%
450 c | |
451 c | M A I N LANCZOS I T E R A T I O N L O O P |
452 c | Each iteration implicitly restarts the Lanczos |
453 c | factorization in place. |
454 c | |
455 c %--------------------------------------------------------------%
456 c
457  1000 continue
458 c
459  iter = iter + 1
460 c
461  if (verbose .gt. 0) then
462  write(*,*)' Iteration ',iter,
463  & ' - Number of converged eigenvalues ',nconv
464  end if
465 c
466  if (msglvl .gt. 0) then
467  call pivout(comm, logfil, 1, iter, ndigit,
468  & '_saup2: **** Start of major iteration number ****')
469  end if
470  if (msglvl .gt. 1) then
471  call pivout(comm, logfil, 1, nev, ndigit,
472  & '_saup2: The length of the current Lanczos factorization')
473  call pivout(comm, logfil, 1, np, ndigit,
474  & '_saup2: Extend the Lanczos factorization by')
475  end if
476 c
477 c %------------------------------------------------------------%
478 c | Compute NP additional steps of the Lanczos factorization. |
479 c %------------------------------------------------------------%
480 c
481  ido = 0
482  20 continue
483  update = .true.
484 c
485  call pdsaitr(comm, ido, bmat, n, nev, np, mode,
486  & resid, rnorm, v, ldv, h, ldh, ipntr,
487  & workd, workl, info)
488 c
489 c %---------------------------------------------------%
490 c | ido .ne. 99 implies use of reverse communication |
491 c | to compute operations involving OP and possibly B |
492 c %---------------------------------------------------%
493 c
494  if (ido .ne. 99) go to 9000
495 c
496  if (info .gt. 0) then
497 c
498 c %-----------------------------------------------------%
499 c | pdsaitr was unable to build an Lanczos factorization|
500 c | of length NEV0+NP0. INFO is returned with the size |
501 c | of the factorization built. Exit main loop. |
502 c %-----------------------------------------------------%
503 c
504  np = info
505  mxiter = iter
506  info = -9999
507  go to 1200
508  end if
509  update = .false.
510 c
511  if (msglvl .gt. 1) then
512  call pdvout(comm, logfil, 1, rnorm, ndigit,
513  & '_saup2: Current B-norm of residual for factorization')
514  end if
515 c
516 c %--------------------------------------------------------%
517 c | Compute the eigenvalues and corresponding error bounds |
518 c | of the current symmetric tridiagonal matrix. |
519 c %--------------------------------------------------------%
520 c
521  call pdseigt( comm, rnorm, kplusp, h, ldh, ritz, bounds,
522  & workl, ierr)
523 c
524  if (ierr .ne. 0) then
525  info = -8
526  go to 1200
527  end if
528 c
529 c %----------------------------------------------------%
530 c | Make a copy of eigenvalues and corresponding error |
531 c | bounds obtained from _seigt. |
532 c %----------------------------------------------------%
533 c
534  call dcopy(kplusp, ritz, 1, workl(kplusp+1), 1)
535  call dcopy(kplusp, bounds, 1, workl(2*kplusp+1), 1)
536 c
537 c %---------------------------------------------------%
538 c | Select the wanted Ritz values and their bounds |
539 c | to be used in the convergence test. |
540 c | The selection is based on the requested number of |
541 c | eigenvalues instead of the current NEV and NP to |
542 c | prevent possible misconvergence. |
543 c | * Wanted Ritz values := RITZ(NP+1:NEV+NP) |
544 c | * Shifts := RITZ(1:NP) := WORKL(1:NP) |
545 c %---------------------------------------------------%
546 c
547  nev = nev0
548  np = np0
549  call pdsgets( comm, ishift, which, nev, np, ritz,
550  & bounds, workl)
551 c
552 c %-------------------%
553 c | Convergence test. |
554 c %-------------------%
555 c
556  call dcopy(nev, bounds(np+1), 1, workl(np+1), 1)
557  if (usrchk .eq. zero) then
558  call dsconv(nev, ritz(np+1), workl(np+1), tol, nconv)
559  else
560 c
561 c %--------------------------------------------------%
562 c | reverse communication so that the user can check |
563 c | convergence. |
564 c %--------------------------------------------------%
565 c
566  cnvchk = .true.
567  ido = 4
568  go to 9000
569  end if
570 c
571  110 continue
572 c
573 c %------------------------------------%
574 c | Back from reverse communication; |
575 c %------------------------------------%
576 c
577  if (usrchk .ne. zero) then
578  nconv = unconv
579  cnvchk = .false.
580  end if
581 c
582  if (msglvl .gt. 2) then
583  kp(1) = nev
584  kp(2) = np
585  kp(3) = nconv
586  call pivout(comm, logfil, 3, kp, ndigit,
587  & '_saup2: NEV, NP, NCONV are')
588  call pdvout(comm, logfil, kplusp, ritz, ndigit,
589  & '_saup2: The eigenvalues of H')
590  call pdvout(comm, logfil, kplusp, bounds, ndigit,
591  & '_saup2: Ritz estimates of the current NCV Ritz values')
592  end if
593 c
594 c %---------------------------------------------------------%
595 c | Count the number of unwanted Ritz values that have zero |
596 c | Ritz estimates. If any Ritz estimates are equal to zero |
597 c | then a leading block of H of order equal to at least |
598 c | the number of Ritz values with zero Ritz estimates has |
599 c | split off. None of these Ritz values may be removed by |
600 c | shifting. Decrease NP the number of shifts to apply. If |
601 c | no shifts may be applied, then prepare to exit |
602 c %---------------------------------------------------------%
603 c
604  nptemp = np
605  do 30 j=1, nptemp
606  if (bounds(j) .eq. zero) then
607  np = np - 1
608  nev = nev + 1
609  end if
610  30 continue
611 c
612  if ( (nconv .ge. nev0) .or.
613  & (iter .gt. mxiter) .or.
614  & (np .eq. 0) ) then
615 c
616 c %------------------------------------------------%
617 c | Prepare to exit. Put the converged Ritz values |
618 c | and corresponding bounds in RITZ(1:NCONV) and |
619 c | BOUNDS(1:NCONV) respectively. Then sort. Be |
620 c | careful when NCONV > NP since we don't want to |
621 c | swap overlapping locations. |
622 c %------------------------------------------------%
623 c
624  if (which .eq. 'BE') then
625 c
626 c %-----------------------------------------------------%
627 c | Both ends of the spectrum are requested. |
628 c | Sort the eigenvalues into algebraically decreasing |
629 c | order first then swap low end of the spectrum next |
630 c | to high end in appropriate locations. |
631 c | NOTE: when np < floor(nev/2) be careful not to swap |
632 c | overlapping locations. |
633 c %-----------------------------------------------------%
634 c
635  wprime = 'SA'
636  call dsortr(wprime, .true., kplusp, ritz, bounds)
637  nevd2 = nev0 / 2
638  nevm2 = nev0 - nevd2
639  if ( nev .gt. 1 ) then
640  call dswap( min(nevd2,np), ritz(nevm2+1), 1,
641  & ritz( max(kplusp-nevd2+1,kplusp-np+1) ), 1)
642  call dswap( min(nevd2,np), bounds(nevm2+1), 1,
643  & bounds( max(kplusp-nevd2+1,kplusp-np+1)), 1)
644  end if
645 c
646  else
647 c
648 c %--------------------------------------------------%
649 c | LM, SM, LA, SA case. |
650 c | Sort the eigenvalues of H into the an order that |
651 c | is opposite to WHICH, and apply the resulting |
652 c | order to BOUNDS. The eigenvalues are sorted so |
653 c | that the wanted part are always within the first |
654 c | NEV locations. |
655 c %--------------------------------------------------%
656 c
657  if (which .eq. 'LM') wprime = 'SM'
658  if (which .eq. 'SM') wprime = 'LM'
659  if (which .eq. 'LA') wprime = 'SA'
660  if (which .eq. 'SA') wprime = 'LA'
661 c
662  call dsortr(wprime, .true., kplusp, ritz, bounds)
663 c
664  end if
665 c
666 c %--------------------------------------------------%
667 c | Scale the Ritz estimate of each Ritz value |
668 c | by 1 / max(eps23,magnitude of the Ritz value). |
669 c %--------------------------------------------------%
670 c
671  do 35 j = 1, nev0
672  temp = max( eps23, abs(ritz(j)) )
673  bounds(j) = bounds(j)/temp
674  35 continue
675 c
676 c %----------------------------------------------------%
677 c | Sort the Ritz values according to the scaled Ritz |
678 c | esitmates. This will push all the converged ones |
679 c | towards the front of ritzr, ritzi, bounds |
680 c | (in the case when NCONV < NEV.) |
681 c %----------------------------------------------------%
682 c
683  wprime = 'LA'
684  call dsortr(wprime, .true., nev0, bounds, ritz)
685 c
686 c %----------------------------------------------%
687 c | Scale the Ritz estimate back to its original |
688 c | value. |
689 c %----------------------------------------------%
690 c
691  do 40 j = 1, nev0
692  temp = max( eps23, abs(ritz(j)) )
693  bounds(j) = bounds(j)*temp
694  40 continue
695 c
696 c %--------------------------------------------------%
697 c | Sort the "converged" Ritz values again so that |
698 c | the "threshold" values and their associated Ritz |
699 c | estimates appear at the appropriate position in |
700 c | ritz and bound. |
701 c %--------------------------------------------------%
702 c
703  if (which .eq. 'BE') then
704 c
705 c %------------------------------------------------%
706 c | Sort the "converged" Ritz values in increasing |
707 c | order. The "threshold" values are in the |
708 c | middle. |
709 c %------------------------------------------------%
710 c
711  wprime = 'LA'
712  call dsortr(wprime, .true., nconv, ritz, bounds)
713 c
714  else
715 c
716 c %----------------------------------------------%
717 c | In LM, SM, LA, SA case, sort the "converged" |
718 c | Ritz values according to WHICH so that the |
719 c | "threshold" value appears at the front of |
720 c | ritz. |
721 c %----------------------------------------------%
722 
723  call dsortr(which, .true., nconv, ritz, bounds)
724 c
725  end if
726 c
727 c %------------------------------------------%
728 c | Use h( 1,1 ) as storage to communicate |
729 c | rnorm to _seupd if needed |
730 c %------------------------------------------%
731 c
732  h(1,1) = rnorm
733 c
734  if (msglvl .gt. 1) then
735  call pdvout(comm, logfil, kplusp, ritz, ndigit,
736  & '_saup2: Sorted Ritz values.')
737  call pdvout(comm, logfil, kplusp, bounds, ndigit,
738  & '_saup2: Sorted ritz estimates.')
739  end if
740 c
741 c %------------------------------------%
742 c | Max iterations have been exceeded. |
743 c %------------------------------------%
744 c
745  if (iter .gt. mxiter .and. nconv .lt. nev) info = 1
746 c
747 c %---------------------%
748 c | No shifts to apply. |
749 c %---------------------%
750 c
751  if (np .eq. 0 .and. nconv .lt. nev0) info = 2
752 c
753  np = nconv
754  go to 1100
755 c
756  else if (nconv .lt. nev .and. ishift .eq. 1) then
757 c
758 c %---------------------------------------------------%
759 c | Do not have all the requested eigenvalues yet. |
760 c | To prevent possible stagnation, adjust the number |
761 c | of Ritz values and the shifts. |
762 c %---------------------------------------------------%
763 c
764  nevbef = nev
765  nev = nev + min(nconv, np/2)
766  if (nev .eq. 1 .and. kplusp .ge. 6) then
767  nev = kplusp / 2
768  else if (nev .eq. 1 .and. kplusp .gt. 2) then
769  nev = 2
770  end if
771  np = kplusp - nev
772 c
773 c %---------------------------------------%
774 c | If the size of NEV was just increased |
775 c | resort the eigenvalues. |
776 c %---------------------------------------%
777 c
778  if (nevbef .lt. nev)
779  & call pdsgets( comm, ishift, which, nev, np,
780  & ritz, bounds, workl)
781 c
782  end if
783 c
784  if (msglvl .gt. 0) then
785  call pivout(comm, logfil, 1, nconv, ndigit,
786  & '_saup2: no. of "converged" Ritz values at this iter.')
787  if (msglvl .gt. 1) then
788  kp(1) = nev
789  kp(2) = np
790  call pivout(comm, logfil, 2, kp, ndigit,
791  & '_saup2: NEV and NP .')
792  call pdvout(comm, logfil, nev, ritz(np+1), ndigit,
793  & '_saup2: "wanted" Ritz values.')
794  call pdvout(comm, logfil, nev, bounds(np+1), ndigit,
795  & '_saup2: Ritz estimates of the "wanted" values ')
796  end if
797  end if
798 c
799  if (ishift .eq. 0) then
800 c
801 c %-----------------------------------------------------%
802 c | User specified shifts: reverse communication to |
803 c | compute the shifts. They are returned in the first |
804 c | NP locations of WORKL. |
805 c %-----------------------------------------------------%
806 c
807  ushift = .true.
808  ido = 3
809  go to 9000
810  end if
811 c
812  50 continue
813 c
814 c %------------------------------------%
815 c | Back from reverse communication; |
816 c | User specified shifts are returned |
817 c | in WORKL(1:NP) |
818 c %------------------------------------%
819 c
820  ushift = .false.
821 c
822 c
823 c %---------------------------------------------------------%
824 c | Move the NP shifts to the first NP locations of RITZ to |
825 c | free up WORKL. This is for the non-exact shift case; |
826 c | in the exact shift case, pdsgets already handles this. |
827 c %---------------------------------------------------------%
828 c
829  if (ishift .eq. 0) call dcopy(np, workl, 1, ritz, 1)
830 c
831  if (msglvl .gt. 2) then
832  call pivout(comm, logfil, 1, np, ndigit,
833  & '_saup2: The number of shifts to apply ')
834  call pdvout(comm, logfil, np, workl, ndigit,
835  & '_saup2: shifts selected')
836  if (ishift .eq. 1) then
837  call pdvout(comm, logfil, np, bounds, ndigit,
838  & '_saup2: corresponding Ritz estimates')
839  end if
840  end if
841 c
842 c %---------------------------------------------------------%
843 c | Apply the NP0 implicit shifts by QR bulge chasing. |
844 c | Each shift is applied to the entire tridiagonal matrix. |
845 c | The first 2*N locations of WORKD are used as workspace. |
846 c | After pdsapps is done, we have a Lanczos |
847 c | factorization of length NEV. |
848 c %---------------------------------------------------------%
849 c
850  call pdsapps( comm, n, nev, np, ritz, v, ldv, h, ldh, resid,
851  & q, ldq, workd)
852 c
853 c %---------------------------------------------%
854 c | Compute the B-norm of the updated residual. |
855 c | Keep B*RESID in WORKD(1:N) to be used in |
856 c | the first step of the next call to pdsaitr. |
857 c %---------------------------------------------%
858 c
859  cnorm = .true.
860  call second(t2)
861  if (bmat .eq. 'G') then
862  nbx = nbx + 1
863  call dcopy(n, resid, 1, workd(n+1), 1)
864  ipntr(1) = n + 1
865  ipntr(2) = 1
866  ido = 2
867 c
868 c %----------------------------------%
869 c | Exit in order to compute B*RESID |
870 c %----------------------------------%
871 c
872  go to 9000
873  else if (bmat .eq. 'I') then
874  call dcopy(n, resid, 1, workd, 1)
875  end if
876 c
877  100 continue
878 c
879 c %----------------------------------%
880 c | Back from reverse communication; |
881 c | WORKD(1:N) := B*RESID |
882 c %----------------------------------%
883 c
884  if (bmat .eq. 'G') then
885  call second(t3)
886  tmvbx = tmvbx + (t3 - t2)
887  end if
888 c
889  if (bmat .eq. 'G') then
890  rnorm_buf = ddot(n, resid, 1, workd, 1)
891  call mpi_allreduce( rnorm_buf, rnorm, 1,
892  & mpi_double_precision, mpi_sum, comm, ierr )
893  rnorm = sqrt(abs(rnorm))
894  else if (bmat .eq. 'I') then
895  rnorm = pdnorm2( comm, n, resid, 1 )
896  end if
897  cnorm = .false.
898  130 continue
899 c
900  if (msglvl .gt. 2) then
901  call pdvout(comm, logfil, 1, rnorm, ndigit,
902  & '_saup2: B-norm of residual for NEV factorization')
903  call pdvout(comm, logfil, nev, h(1,2), ndigit,
904  & '_saup2: main diagonal of compressed H matrix')
905  call pdvout(comm, logfil, nev-1, h(2,1), ndigit,
906  & '_saup2: subdiagonal of compressed H matrix')
907  end if
908 c
909  go to 1000
910 c
911 c %---------------------------------------------------------------%
912 c | |
913 c | E N D O F M A I N I T E R A T I O N L O O P |
914 c | |
915 c %---------------------------------------------------------------%
916 c
917  1100 continue
918 c
919  mxiter = iter
920  nev = nconv
921 c
922  1200 continue
923  ido = 99
924 c
925 c %------------%
926 c | Error exit |
927 c %------------%
928 c
929  call second(t1)
930  tsaup2 = t1 - t0
931 c
932  9000 continue
933  return
934 c
935 c %----------------%
936 c | End of pdsaup2 |
937 c %----------------%
938 c
939  end