Actual source code: invit.c

slepc-3.18.2 2023-01-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: struct HRtr
 15: {
 16:   PetscScalar *data;
 17:   PetscInt    m;
 18:   PetscInt    idx[2];
 19:   PetscInt    n[2];
 20:   PetscScalar tau[2];
 21:   PetscReal   alpha;
 22:   PetscReal   cs;
 23:   PetscReal   sn;
 24:   PetscInt    type;
 25: };

 27: /*
 28:   Generates a hyperbolic rotation
 29:     if x1*x1 - x2*x2 != 0
 30:       r = sqrt(|x1*x1 - x2*x2|)
 31:       c = x1/r  s = x2/r

 33:       | c -s||x1|   |d*r|
 34:       |-s  c||x2| = | 0 |
 35:       where d = 1 for type==1 and -1 for type==2
 36:   Returns the condition number of the reduction
 37: */
 38: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
 39: {
 40:   PetscReal t,n2,xa,xb;
 41:   PetscInt  type_;

 43:   if (x2==0.0) {
 44:     *r = PetscAbsReal(x1); *c = (x1>=0.0)?1.0:-1.0; *s = 0.0;
 45:     if (type) *type = 1;
 46:     return 0;
 47:   }
 48:   if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
 49:     /* hyperbolic rotation doesn't exist */
 50:     *c = *s = *r = 0.0;
 51:     if (type) *type = 0;
 52:     *cond = PETSC_MAX_REAL;
 53:     return 0;
 54:   }

 56:   if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
 57:     xa = x1; xb = x2; type_ = 1;
 58:   } else {
 59:     xa = x2; xb = x1; type_ = 2;
 60:   }
 61:   t = xb/xa;
 62:   n2 = PetscAbsReal(1 - t*t);
 63:   *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
 64:   *c = x1/(*r);
 65:   *s = x2/(*r);
 66:   if (type_ == 2) *r *= -1;
 67:   if (type) *type = type_;
 68:   if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
 69:   return 0;
 70: }

 72: /*
 73:                                 |c  s|
 74:   Applies an hyperbolic rotator |s  c|
 75:            |c  s|
 76:     [x1 x2]|s  c|
 77: */
 78: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
 79: {
 80:   PetscInt    i;
 81:   PetscReal   t;
 82:   PetscScalar tmp;

 84:   if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
 85:     t = s/c;
 86:     for (i=0;i<n;i++) {
 87:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
 88:       x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
 89:     }
 90:   } else { /* Type II */
 91:     t = c/s;
 92:     for (i=0;i<n;i++) {
 93:       tmp = x1[i*inc1];
 94:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
 95:       x2[i*inc2] = t*x1[i*inc1] + tmp/s;
 96:     }
 97:   }
 98:   return 0;
 99: }

101: /*
102:   Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).

104:   Input:
105:     A symmetric (only lower triangular part is referred)
106:     s vector +1 and -1 (signature matrix)
107:   Output:
108:     d,e
109:     s
110:     Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
111: */
112: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
113: {
114:   PetscInt       i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
115:   PetscInt       nwu=0;
116:   PetscReal      *ss,cond=1.0,cs,sn,r;
117:   PetscScalar    tau,t,*AA;
118:   PetscBLASInt   n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
119:   PetscBool      breakdown = PETSC_TRUE;

121:   if (n<3) {
122:     if (n==1) Q[0]=1;
123:     if (n==2) {
124:       Q[0] = Q[1+ldq] = 1;
125:       Q[1] = Q[ldq] = 0;
126:     }
127:     return 0;
128:   }
129:   PetscBLASIntCast(lda,&lda_);
130:   PetscBLASIntCast(n,&n_);
131:   PetscBLASIntCast(ldq,&ldq_);
132:   ss = rwork;
133:   perm = iwork;
134:   AA = work;
135:   for (i=0;i<n;i++) PetscArraycpy(AA+i*n,A+i*lda,n);
136:   nwu += n*n;
137:   k=0;
138:   while (breakdown && k<n) {
139:     breakdown = PETSC_FALSE;
140:     /* Classify (and flip) A and s according to sign */
141:     if (flip) {
142:       for (i=0;i<n;i++) {
143:         perm[i] = n-1-perm_[i];
144:         if (perm[i]==0) i0 = i;
145:         if (perm[i]==k) ik = i;
146:       }
147:     } else {
148:       for (i=0;i<n;i++) {
149:         perm[i] = perm_[i];
150:         if (perm[i]==0) i0 = i;
151:         if (perm[i]==k) ik = i;
152:       }
153:     }
154:     perm[ik] = 0;
155:     perm[i0] = k;
156:     i=1;
157:     while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
158:       if (s[perm[i]]!=s[perm[0]]) {
159:         j=i+1;
160:         while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
161:         tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
162:       }
163:       i++;
164:     }
165:     for (i=0;i<n;i++) {
166:       ss[i] = s[perm[i]];
167:     }
168:     if (flip) {
169:       ii = &j;
170:       jj = &i;
171:     } else {
172:       ii = &i;
173:       jj = &j;
174:     }
175:     for (i=0;i<n;i++)
176:       for (j=0;j<n;j++)
177:         A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
178:     /* Initialize Q */
179:     for (i=0;i<n;i++) {
180:       PetscArrayzero(Q+i*ldq,n);
181:       Q[perm[i]+i*ldq] = 1.0;
182:     }
183:     for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
184:     n0 = ni-1;
185:     n1 = n_-ni;
186:     for (j=0;j<n-2;j++) {
187:       PetscBLASIntCast(n-j-1,&m);
188:       /* Forming and applying reflectors */
189:       if (n0 > 1) {
190:         PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
191:         /* Apply reflector */
192:         if (PetscAbsScalar(tau) != 0.0) {
193:           t=*(A+ni-n0+j*lda);  *(A+ni-n0+j*lda)=1.0;
194:           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
195:           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
196:           /* Update Q */
197:           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
198:           *(A+ni-n0+j*lda) = t;
199:           for (i=1;i<n0;i++) {
200:             *(A+ni-n0+j*lda+i) = 0.0;  *(A+j+(ni-n0+i)*lda) = 0.0;
201:           }
202:           *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
203:         }
204:       }
205:       if (n1 > 1) {
206:         PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
207:         /* Apply reflector */
208:         if (PetscAbsScalar(tau) != 0.0) {
209:           t=*(A+n-n1+j*lda);  *(A+n-n1+j*lda)=1.0;
210:           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
211:           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
212:           /* Update Q */
213:           PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
214:           *(A+n-n1+j*lda) = t;
215:           for (i=1;i<n1;i++) {
216:             *(A+n-n1+i+j*lda) = 0.0;  *(A+j+(n-n1+i)*lda) = 0.0;
217:           }
218:           *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
219:         }
220:       }
221:       /* Hyperbolic rotation */
222:       if (n0 > 0 && n1 > 0) {
223:         HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
224:         /* Check condition number */
225:         if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
226:           breakdown = PETSC_TRUE;
227:           k++;
229:           break;
230:         }
231:         A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
232:         A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
233:         /* Apply to A */
234:         HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn);
235:         HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn);

237:         /* Update Q */
238:         HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn);
239:         if (type==2) {
240:           ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
241:           n0++;ni++;n1--;
242:         }
243:       }
244:       if (n0>0) n0--;
245:       else n1--;
246:     }
247:   }

249:   /* flip matrices */
250:   if (flip) {
251:     for (i=0;i<n-1;i++) {
252:       d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
253:       e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
254:       s[i] = ss[n-i-1];
255:     }
256:     s[n-1] = ss[0];
257:     d[n-1] = PetscRealPart(A[0]);
258:     for (i=0;i<n;i++) PetscArraycpy(work+i*n,Q+i*ldq,n);
259:     for (i=0;i<n;i++)
260:       for (j=0;j<n;j++)
261:         Q[i+j*ldq] = work[i+(n-j-1)*n];
262:   } else {
263:     for (i=0;i<n-1;i++) {
264:       d[i] = PetscRealPart(A[i+i*lda]);
265:       e[i] = PetscRealPart(A[i+1+i*lda]);
266:       s[i] = ss[i];
267:     }
268:     s[n-1] = ss[n-1];
269:     d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
270:   }
271:   return 0;
272: }

274: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
275: {
276:   PetscScalar    *x,*y;
277:   PetscReal      ncond2=1.0;
278:   PetscBLASInt   n0_,n1_,inc=1;

280:   /* Hyperbolic transformation to make zeros in x */
281:   x = tr1->data;
282:   tr1->n[0] = n0;
283:   tr1->n[1] = n1;
284:   tr1->idx[0] = idx0;
285:   tr1->idx[1] = idx1;
286:   PetscBLASIntCast(tr1->n[0],&n0_);
287:   PetscBLASIntCast(tr1->n[1],&n1_);
288:   if (tr1->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
289:   if (tr1->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
290:   if (tr1->idx[0]<tr1->idx[1]) HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&(tr1->type),&(tr1->cs),&(tr1->sn),&(tr1->alpha),ncond);
291:   else {
292:     tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
293:     *ncond = 1.0;
294:   }
295:   if (sz==2) {
296:     y = tr2->data;
297:     /* Apply first transformation to second column */
298:     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
299:       x[tr1->idx[0]] = 1.0;
300:       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
301:     }
302:     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
303:       x[tr1->idx[1]] = 1.0;
304:       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
305:     }
306:     if (tr1->idx[0]<tr1->idx[1]) HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn);
307:     tr2->n[0] = tr1->n[0];
308:     tr2->n[1] = tr1->n[1];
309:     tr2->idx[0] = tr1->idx[0];
310:     tr2->idx[1] = tr1->idx[1];
311:     if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
312:       tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
313:     }
314:     if (tr2->n[0]>0) {
315:       tr2->n[0]--; tr2->idx[0]++;
316:       if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
317:     } else {
318:       tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
319:     }
320:     /* Hyperbolic transformation to make zeros in y */
321:     PetscBLASIntCast(tr2->n[0],&n0_);
322:     PetscBLASIntCast(tr2->n[1],&n1_);
323:     if (tr2->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
324:     if (tr2->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
325:     if (tr2->idx[0]<tr2->idx[1]) HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&(tr2->type),&(tr2->cs),&(tr2->sn),&(tr2->alpha),&ncond2);
326:     else {
327:       tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
328:       ncond2 = 1.0;
329:     }
330:     if (ncond2>*ncond) *ncond = ncond2;
331:   }
332:   return 0;
333: }

335: /*
336:   Auxiliary function to try perform one iteration of hr routine,
337:   checking condition number. If it is < tolD, apply the
338:   transformation to H and R, if not, ok=false and it do nothing
339:   tolE, tolerance to exchange complex pairs to improve conditioning
340: */
341: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
342: {
343:   struct HRtr    *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
344:   PetscScalar    *x,*y;
345:   PetscReal      ncond,ncond_e;
346:   PetscInt       nwu=0,i,d=1;
347:   PetscBLASInt   n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
348:   PetscReal      tolD = 1e+5;

350:   if (cond) *cond = 1.0;
351:   PetscBLASIntCast(n,&n_);
352:   PetscBLASIntCast(ldr,&ldr_);
353:   PetscBLASIntCast(ldh,&ldh_);
354:   x = work+nwu;
355:   nwu += n;
356:   PetscArraycpy(x,R+j*ldr,n);
357:   *exg = PETSC_FALSE;
358:   *ok = PETSC_TRUE;
359:   tr1_t.data = x;
360:   if (sz==1) {
361:     /* Hyperbolic transformation to make zeros in x */
362:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu);
363:     /* Check condition number to single column*/
364:     if (ncond>tolD) *ok = PETSC_FALSE;
365:     tr1 = &tr1_t;
366:     tr2 = &tr2_t;
367:   } else {
368:     y = work+nwu;
369:     nwu += n;
370:     PetscArraycpy(y,R+(j+1)*ldr,n);
371:     tr2_t.data = y;
372:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu);
373:     /* Computing hyperbolic transformations also for exchanged vectors */
374:     tr1_te.data = work+nwu;
375:     nwu += n;
376:     PetscArraycpy(tr1_te.data,R+(j+1)*ldr,n);
377:     tr2_te.data = work+nwu;
378:     nwu += n;
379:     PetscArraycpy(tr2_te.data,R+j*ldr,n);
380:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu);
381:     if (ncond > d*ncond_e) {
382:       *exg = PETSC_TRUE;
383:       tr1 = &tr1_te;
384:       tr2 = &tr2_te;
385:       ncond = ncond_e;
386:     } else {
387:       tr1 = &tr1_t;
388:       tr2 = &tr2_t;
389:     }
390:     if (ncond>tolD) *ok = PETSC_FALSE;
391:   }
392:   if (*ok) {
393:     /* Everything is OK, apply transformations to R and H */
394:     /* First column */
395:     if (cond && *cond<ncond) *cond = ncond;
396:     x = tr1->data;
397:     PetscBLASIntCast(tr1->n[0],&n0_);
398:     PetscBLASIntCast(tr1->n[1],&n1_);
399:     PetscBLASIntCast(n-j-sz,&mr);
400:     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
401:       x[tr1->idx[0]] = 1.0;
402:       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
403:       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
404:     }
405:     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
406:       x[tr1->idx[1]] = 1.0;
407:       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
408:       PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
409:     }
410:     if (tr1->idx[0]<tr1->idx[1]) {
411:       HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn);
412:       if (tr1->type==1) HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn);
413:       else {
414:         HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn);
415:         s[tr1->idx[0]] = -s[tr1->idx[0]];
416:         s[tr1->idx[1]] = -s[tr1->idx[1]];
417:       }
418:     }
419:     for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
420:     for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
421:     *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
422:     if (sz==2) {
423:       y = tr2->data;
424:       /* Second column */
425:       PetscBLASIntCast(tr2->n[0],&n0_);
426:       PetscBLASIntCast(tr2->n[1],&n1_);
427:       PetscBLASIntCast(n-j-sz,&mr);
428:       PetscBLASIntCast(n-tr2->idx[0],&mh);
429:       if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
430:         y[tr2->idx[0]] = 1.0;
431:         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
432:         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
433:       }
434:       if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
435:         y[tr2->idx[1]] = 1.0;
436:         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
437:         PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
438:       }
439:       if (tr2->idx[0]<tr2->idx[1]) {
440:         HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn);
441:         if (tr2->type==1) HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn);
442:         else {
443:           HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn);
444:           s[tr2->idx[0]] = -s[tr2->idx[0]];
445:           s[tr2->idx[1]] = -s[tr2->idx[1]];
446:         }
447:       }
448:       for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
449:       *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
450:       for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
451:       *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
452:       *n0 = tr2->n[0];
453:       *n1 = tr2->n[1];
454:       *idx0 = tr2->idx[0];
455:       *idx1 = tr2->idx[1];
456:       if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
457:         (*idx1)++; (*n1)--; (*n0)++;
458:       }
459:     } else {
460:       *n0 = tr1->n[0];
461:       *n1 = tr1->n[1];
462:       *idx0 = tr1->idx[0];
463:       *idx1 = tr1->idx[1];
464:       if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
465:         (*idx1)++; (*n1)--; (*n0)++;
466:       }
467:     }
468:     if (*n0>0) {
469:       (*n0)--; (*idx0)++;
470:       if (*n1==0) *idx1 = *idx0;
471:     } else {
472:       (*n1)--; (*idx1)++; *idx0 = *idx1;
473:     }
474:   }
475:   return 0;
476: }

478: /*
479:   compute V = HR whit H s-orthogonal and R upper triangular
480: */
481: static PetscErrorCode PseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
482: {
483:   PetscInt       i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwu=0;
484:   PetscScalar    *col1,*col2;
485:   PetscBool      exg=PETSC_FALSE,ok=PETSC_FALSE;

487:   n = *nv;
488:   col1 = work+nwu;
489:   nwu += n;
490:   col2 = work+nwu;
491:   nwu += n;
492:   /* Sort R and s according to sing(s) */
493:   np = 0;
494:   for (i=0;i<n;i++) if (s[i]>0) np++;
495:   if (s[0]>0) n1 = np;
496:   else n1 = n-np;
497:   n0 = 0;
498:   for (i=0;i<n;i++) {
499:     if (s[i]==s[0]) {
500:       s[n0] = s[0];
501:       perm[n0++] = i;
502:     } else perm[n1++] = i;
503:   }
504:   for (i=n0;i<n;i++) s[i] = -s[0];
505:   n1 -= n0;
506:   idx0 = 0;
507:   idx1 = n0;
508:   if (idx1==n) idx1=idx0;
509:   for (i=0;i<n;i++) {
510:     for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
511:   }
512:   /* Initialize H */
513:   for (i=0;i<n;i++) {
514:     PetscArrayzero(V+i*ldv,n);
515:     V[perm[i]+i*ldv] = 1.0;
516:   }
517:   for (i=0;i<n;i++) perm[i] = i;
518:   j = 0;
519:   while (j<n-k) {
520:     if (cmplxEig[j]==0) sz=1;
521:     else sz=2;
522:     TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu);
523:     if (ok) {
524:       if (exg) cmplxEig[j] = -cmplxEig[j];
525:       j = j+sz;
526:     } else { /* to be discarded */
527:       k = k+1;
528:       if (cmplxEig[j]==0) {
529:         if (j<n) {
530:           t1 = perm[j];
531:           for (i=j;i<n-1;i++) perm[i] = perm[i+1];
532:           perm[n-1] = t1;
533:           t1 = cmplxEig[j];
534:           for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
535:           cmplxEig[n-1] = t1;
536:           PetscArraycpy(col1,R+j*ldr,n);
537:           for (i=j;i<n-1;i++) PetscArraycpy(R+i*ldr,R+(i+1)*ldr,n);
538:           PetscArraycpy(R+(n-1)*ldr,col1,n);
539:         }
540:       } else {
541:         k = k+1;
542:         if (j<n-1) {
543:           t1 = perm[j]; t2 = perm[j+1];
544:           for (i=j;i<n-2;i++) perm[i] = perm[i+2];
545:           perm[n-2] = t1; perm[n-1] = t2;
546:           t1 = cmplxEig[j]; t2 = cmplxEig[j+1];
547:           for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
548:           cmplxEig[n-2] = t1; cmplxEig[n-1] = t2;
549:           PetscArraycpy(col1,R+j*ldr,n);
550:           PetscArraycpy(col2,R+(j+1)*ldr,n);
551:           for (i=j;i<n-2;i++) PetscArraycpy(R+i*ldr,R+(i+2)*ldr,n);
552:           PetscArraycpy(R+(n-2)*ldr,col1,n);
553:           PetscArraycpy(R+(n-1)*ldr,col2,n);
554:         }
555:       }
556:     }
557:   }
558:   if (k!=0) {
559:     if (breakdown) *breakdown = PETSC_TRUE;
560:     *nv = n-k;
561:   }
562:   return 0;
563: }

565: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
566: {
567:   PetscInt          lws,nwus=0,nwui=0,lwi,off,n,nv,ld,i,ldr,l;
568:   const PetscScalar *B,*W;
569:   PetscScalar       *Q,*X,*R,*ts,szero=0.0,sone=1.0;
570:   PetscReal         *s,vi,vr,tr,*d,*e;
571:   PetscBLASInt      ld_,n_,nv_,*perm,*cmplxEig;

573:   l = ds->l;
574:   n = ds->n-l;
575:   PetscBLASIntCast(n,&n_);
576:   ld = ds->ld;
577:   PetscBLASIntCast(ld,&ld_);
578:   off = l*ld+l;
579:   DSGetArrayReal(ds,DS_MAT_D,&s);
580:   if (!ds->compact) {
581:     MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B);
582:     for (i=l;i<ds->n;i++) s[i] = PetscRealPart(B[i*ld+i]);
583:     MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B);
584:   }
585:   lws = n*n+7*n;
586:   lwi = 2*n;
587:   DSAllocateWork_Private(ds,lws,0,lwi);
588:   R = ds->work+nwus;
589:   nwus += n*n;
590:   ldr = n;
591:   perm = ds->iwork + nwui;
592:   nwui += n;
593:   cmplxEig = ds->iwork+nwui;
594:   MatDenseGetArray(ds->omat[mat],&X);
595:   for (i=0;i<n;i++) {
596: #if defined(PETSC_USE_COMPLEX)
597:     vi = PetscImaginaryPart(wr[l+i]);
598: #else
599:     vi = PetscRealPart(wi[l+i]);
600: #endif
601:     if (vi!=0) {
602:       cmplxEig[i] = 1;
603:       cmplxEig[i+1] = 2;
604:       i++;
605:     } else cmplxEig[i] = 0;
606:   }
607:   nv = n;

609:   /* Perform HR decomposition */
610:   /* Hyperbolic rotators */
611:   PseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus);
612:   /* Sort wr,wi perm */
613:   ts = ds->work+nwus;
614:   PetscArraycpy(ts,wr+l,n);
615:   for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
616: #if !defined(PETSC_USE_COMPLEX)
617:   PetscArraycpy(ts,wi+l,n);
618:   for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
619: #endif
620:   /* Projected Matrix */
621:   DSGetArrayReal(ds,DS_MAT_T,&d);
622:   PetscArrayzero(d+2*ld,ld);
623:   e = d+ld;
624:   d[l+nv-1] = PetscRealPart(wr[l+nv-1]*s[l+nv-1]);
625:   for (i=0;i<nv-1;i++) {
626:     if (cmplxEig[i]==0) { /* Real */
627:       d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
628:       e[l+i] = 0.0;
629:     } else {
630:       vr = PetscRealPart(wr[l+i]);
631: #if defined(PETSC_USE_COMPLEX)
632:       vi = PetscImaginaryPart(wr[l+i]);
633: #else
634:       vi = PetscRealPart(wi[l+i]);
635: #endif
636:       if (cmplxEig[i]==-1) vi = -vi;
637:       tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
638:       d[l+i] = (vr-tr)*s[l+i];
639:       d[l+i+1] = (vr+tr)*s[l+i+1];
640:       e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
641:       if (i<nv-2) e[l+i+1] = 0.0;
642:       i++;
643:     }
644:   }
645:   DSRestoreArrayReal(ds,DS_MAT_T,&d);
646:   DSRestoreArrayReal(ds,DS_MAT_D,&s);
647:   /* accumulate previous Q */
648:   MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
649:   if (accum) {
650:     PetscBLASIntCast(nv,&nv_);
651:     DSAllocateMat_Private(ds,DS_MAT_W);
652:     MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN);
653:     MatDenseGetArrayRead(ds->omat[DS_MAT_W],&W);
654:     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,W+off,&ld_,X+off,&ld_,&szero,Q+off,&ld_));
655:     MatDenseRestoreArrayRead(ds->omat[DS_MAT_W],&W);
656:   } else {
657:     PetscArrayzero(Q,ld*ld);
658:     for (i=0;i<ds->l;i++) Q[i+i*ld] = 1.0;
659:     for (i=0;i<n;i++) PetscArraycpy(Q+off+i*ld,X+off+i*ld,n);
660:   }
661:   MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
662:   ds->t = nv+l;
663:   MatDenseRestoreArray(ds->omat[mat],&X);
664:   if (!ds->compact) DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
665:   return 0;
666: }

668: /*
669:    Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
670: */
671: PetscErrorCode DSIntermediate_GHIEP(DS ds)
672: {
673:   PetscInt       i,ld,off;
674:   PetscInt       nwall,nwallr,nwalli;
675:   PetscScalar    *A,*B,*Q;
676:   PetscReal      *d,*e,*s;

678:   ld = ds->ld;
679:   MatDenseGetArray(ds->omat[DS_MAT_A],&A);
680:   MatDenseGetArray(ds->omat[DS_MAT_B],&B);
681:   MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
682:   DSGetArrayReal(ds,DS_MAT_T,&d);
683:   DSGetArrayReal(ds,DS_MAT_D,&s);
684:   e = d+ld;
685:   off = ds->l+ds->l*ld;
686:   PetscArrayzero(Q,ld*ld);
687:   nwall = ld*ld+ld;
688:   nwallr = ld;
689:   nwalli = ld;
690:   DSAllocateWork_Private(ds,nwall,nwallr,nwalli);
691:   for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
692:   for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
693:   if (ds->compact) {
694:     if (ds->state < DS_STATE_INTERMEDIATE) {
695:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
696:       TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
697:       ds->k = ds->l;
698:       PetscArrayzero(d+2*ld+ds->l,ds->n-ds->l);
699:     }
700:   } else {
701:     if (ds->state < DS_STATE_INTERMEDIATE) {
702:       for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
703:       TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
704:       PetscArrayzero(d+2*ld,ds->n);
705:       ds->k = ds->l;
706:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
707:     } else DSSwitchFormat_GHIEP(ds,PETSC_TRUE);
708:   }
709:   MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
710:   MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
711:   MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
712:   DSRestoreArrayReal(ds,DS_MAT_T,&d);
713:   DSRestoreArrayReal(ds,DS_MAT_D,&s);
714:   return 0;
715: }