xref: /petsc/src/snes/impls/ngmres/snesngmres.c (revision 087dfb9eb11bbfa2ea39ca815eeb03a8ebb88322)
1a312c225SMatthew G Knepley /* Defines the basic SNES object */
2a312c225SMatthew G Knepley #include <private/snesimpl.h>
3a312c225SMatthew G Knepley 
4*087dfb9eSxuemin static PetscErrorCode    BuildNGmresSoln(PetscScalar*,Vec,SNES,PetscInt,PetscInt);
5*087dfb9eSxuemin 
6a312c225SMatthew G Knepley /* Private structure for the Anderson mixing method aka nonlinear Krylov */
7a312c225SMatthew G Knepley typedef struct {
8*087dfb9eSxuemin   PetscScalar *hh_origin;
9*087dfb9eSxuemin   Vec       *v, *w, *q;
10a312c225SMatthew G Knepley   PetscReal *f2;    /* 2-norms of function (residual) at each stage */
11a312c225SMatthew G Knepley   PetscInt   msize; /* maximum size of space */
12a312c225SMatthew G Knepley   PetscInt   csize; /* current size of space */
13*087dfb9eSxuemin   PetscScalar beta; /* relaxation parameter */
14*087dfb9eSxuemin   PetscScalar *nrs;            /* temp that holds the coefficients of the Krylov vectors that form the minimum residual solution */
15*087dfb9eSxuemin 
16a312c225SMatthew G Knepley } SNES_NGMRES;
17a312c225SMatthew G Knepley 
18*087dfb9eSxuemin #define HH(a,b)  (ngmres->hh_origin + (a)*(ngmres->msize)+(b))
19*087dfb9eSxuemin 
20a312c225SMatthew G Knepley #undef __FUNCT__
21a312c225SMatthew G Knepley #define __FUNCT__ "SNESReset_NGMRES"
22a312c225SMatthew G Knepley PetscErrorCode SNESReset_NGMRES(SNES snes)
23a312c225SMatthew G Knepley {
24a312c225SMatthew G Knepley   SNES_NGMRES   *ngmres = (SNES_NGMRES*) snes->data;
25a312c225SMatthew G Knepley   PetscErrorCode ierr;
26a312c225SMatthew G Knepley 
27a312c225SMatthew G Knepley   PetscFunctionBegin;
28a312c225SMatthew G Knepley   ierr = VecDestroyVecs(ngmres->msize, &ngmres->v);CHKERRQ(ierr);
29a312c225SMatthew G Knepley   ierr = VecDestroyVecs(ngmres->msize, &ngmres->w);CHKERRQ(ierr);
30a312c225SMatthew G Knepley   PetscFunctionReturn(0);
31a312c225SMatthew G Knepley }
32a312c225SMatthew G Knepley 
33a312c225SMatthew G Knepley #undef __FUNCT__
34a312c225SMatthew G Knepley #define __FUNCT__ "SNESDestroy_NGMRES"
35a312c225SMatthew G Knepley PetscErrorCode SNESDestroy_NGMRES(SNES snes)
36a312c225SMatthew G Knepley {
37a312c225SMatthew G Knepley   PetscErrorCode ierr;
38a312c225SMatthew G Knepley 
39a312c225SMatthew G Knepley   PetscFunctionBegin;
40a312c225SMatthew G Knepley   ierr = SNESReset_NGMRES(snes);CHKERRQ(ierr);
41a312c225SMatthew G Knepley   if (snes->work) {ierr = VecDestroyVecs(snes->nwork, &snes->work);CHKERRQ(ierr);}
42a312c225SMatthew G Knepley   PetscFunctionReturn(0);
43a312c225SMatthew G Knepley }
44a312c225SMatthew G Knepley 
45a312c225SMatthew G Knepley #undef __FUNCT__
46a312c225SMatthew G Knepley #define __FUNCT__ "SNESSetUp_NGMRES"
47a312c225SMatthew G Knepley PetscErrorCode SNESSetUp_NGMRES(SNES snes)
48a312c225SMatthew G Knepley {
49a312c225SMatthew G Knepley   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
50*087dfb9eSxuemin   PetscInt msize,hh;
51a312c225SMatthew G Knepley   PetscErrorCode ierr;
52a312c225SMatthew G Knepley 
53a312c225SMatthew G Knepley   PetscFunctionBegin;
54a312c225SMatthew G Knepley #if 0
55a312c225SMatthew G Knepley   if (snes->pc_side != PC_LEFT) {SETERRQ(((PetscObject) snes)->comm, PETSC_ERR_SUP, "Only left preconditioning allowed for SNESNGMRES");}
56a312c225SMatthew G Knepley #endif
57*087dfb9eSxuemin   ngmres->beta  = 1.0;
58*087dfb9eSxuemin   msize         = ngmres->msize;  /* restart size */
59*087dfb9eSxuemin   hh            = msize * msize;
60*087dfb9eSxuemin   ierr = PetscMalloc2(hh,PetscScalar,&ngmres->hh_origin,msize,PetscScalar,&ngmres->nrs);CHKERRQ(ierr);
61*087dfb9eSxuemin 
62*087dfb9eSxuemin   ierr = PetscMemzero(ngmres->hh_origin,hh*sizeof(PetscScalar));CHKERRQ(ierr);
63*087dfb9eSxuemin   ierr = PetscMemzero(ngmres->nrs,msize*sizeof(PetscScalar));CHKERRQ(ierr);
64*087dfb9eSxuemin 
65*087dfb9eSxuemin   //  ierr = PetscLogObjectMemory(ksp,(hh+msize)*sizeof(PetscScalar));CHKERRQ(ierr);
66*087dfb9eSxuemin 
67a312c225SMatthew G Knepley   ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->v);CHKERRQ(ierr);
68a312c225SMatthew G Knepley   ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->w);CHKERRQ(ierr);
69*087dfb9eSxuemin   ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->q);CHKERRQ(ierr);
70*087dfb9eSxuemin   ierr = SNESDefaultGetWork(snes, 2);CHKERRQ(ierr);
71a312c225SMatthew G Knepley   PetscFunctionReturn(0);
72a312c225SMatthew G Knepley }
73a312c225SMatthew G Knepley 
74a312c225SMatthew G Knepley #undef __FUNCT__
75a312c225SMatthew G Knepley #define __FUNCT__ "SNESSetFromOptions_NGMRES"
76a312c225SMatthew G Knepley PetscErrorCode SNESSetFromOptions_NGMRES(SNES snes)
77a312c225SMatthew G Knepley {
78a312c225SMatthew G Knepley   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
79a312c225SMatthew G Knepley   PetscErrorCode ierr;
80a312c225SMatthew G Knepley 
81a312c225SMatthew G Knepley   PetscFunctionBegin;
82a312c225SMatthew G Knepley   ierr = PetscOptionsHead("SNES NGMRES options");CHKERRQ(ierr);
83a312c225SMatthew G Knepley     ierr = PetscOptionsInt("-snes_gmres_restart", "Number of directions", "SNES", ngmres->msize, &ngmres->msize, PETSC_NULL);CHKERRQ(ierr);
84a312c225SMatthew G Knepley   ierr = PetscOptionsTail();CHKERRQ(ierr);
85a312c225SMatthew G Knepley   PetscFunctionReturn(0);
86a312c225SMatthew G Knepley }
87a312c225SMatthew G Knepley 
88a312c225SMatthew G Knepley #undef __FUNCT__
89a312c225SMatthew G Knepley #define __FUNCT__ "SNESView_NGMRES"
90a312c225SMatthew G Knepley PetscErrorCode SNESView_NGMRES(SNES snes, PetscViewer viewer)
91a312c225SMatthew G Knepley {
92a312c225SMatthew G Knepley   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
93a312c225SMatthew G Knepley   PetscBool      iascii;
94a312c225SMatthew G Knepley   PetscErrorCode ierr;
95a312c225SMatthew G Knepley 
96a312c225SMatthew G Knepley   PetscFunctionBegin;
97a312c225SMatthew G Knepley   ierr = PetscTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);CHKERRQ(ierr);
98a312c225SMatthew G Knepley   if (iascii) {
99a312c225SMatthew G Knepley     ierr = PetscViewerASCIIPrintf(viewer, "  Size of space %d\n", ngmres->msize);CHKERRQ(ierr);
100a312c225SMatthew G Knepley   }
101a312c225SMatthew G Knepley   PetscFunctionReturn(0);
102a312c225SMatthew G Knepley }
103a312c225SMatthew G Knepley 
104a312c225SMatthew G Knepley #undef __FUNCT__
105a312c225SMatthew G Knepley #define __FUNCT__ "SNESSolve_NGMRES"
106a312c225SMatthew G Knepley PetscErrorCode SNESSolve_NGMRES(SNES snes)
107a312c225SMatthew G Knepley {
1084a0c5b0cSMatthew G Knepley   SNES           pc;
109*087dfb9eSxuemin   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
110*087dfb9eSxuemin   Vec            X,F, Fold, Xold,temp,*dX = ngmres->w,*dF = ngmres->v;
111*087dfb9eSxuemin   PetscScalar    *nrs=ngmres->nrs;
112*087dfb9eSxuemin   PetscReal      gnorm;
113*087dfb9eSxuemin   PetscInt       i, j, k, l, flag, it;
114a312c225SMatthew G Knepley   PetscErrorCode ierr;
115a312c225SMatthew G Knepley 
116a312c225SMatthew G Knepley   PetscFunctionBegin;
117a312c225SMatthew G Knepley   snes->reason  = SNES_CONVERGED_ITERATING;
118a312c225SMatthew G Knepley   X             = snes->vec_sol;
119a312c225SMatthew G Knepley   F             = snes->vec_func;
120*087dfb9eSxuemin   Fold          = snes->work[0];
121*087dfb9eSxuemin   Xold             = snes->work[1];
122*087dfb9eSxuemin   ierr = VecDuplicate(X,&Xold);CHKERRQ(ierr);
123*087dfb9eSxuemin   ierr = VecDuplicate(X,&temp);CHKERRQ(ierr);
124a312c225SMatthew G Knepley 
1254a0c5b0cSMatthew G Knepley   ierr = SNESGetPC(snes, &pc);CHKERRQ(ierr);
126a312c225SMatthew G Knepley   ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr);
127a312c225SMatthew G Knepley   snes->iter = 0;
128a312c225SMatthew G Knepley   snes->norm = 0.;
129a312c225SMatthew G Knepley   ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr);
130*087dfb9eSxuemin   ierr = SNESComputeFunction(snes, X, temp);CHKERRQ(ierr);               /* r = F(x) */
131*087dfb9eSxuemin #if 0
132*087dfb9eSxuemin   ierr = SNESSolve(snes->pc, temp, F);CHKERRQ(ierr);                  /* p = P(r) */
133*087dfb9eSxuemin #else
134*087dfb9eSxuemin   ierr = VecCopy(temp, F);CHKERRQ(ierr);                              /* p = r    */
135*087dfb9eSxuemin #endif
136*087dfb9eSxuemin 
137a312c225SMatthew G Knepley   if (snes->domainerror) {
138a312c225SMatthew G Knepley     snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN;
139a312c225SMatthew G Knepley     PetscFunctionReturn(0);
140a312c225SMatthew G Knepley   }
141*087dfb9eSxuemin   ierr = VecNorm(F, NORM_2, &gnorm);CHKERRQ(ierr);                    /* fnorm = ||r||  */
142*087dfb9eSxuemin   if (PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm");
143*087dfb9eSxuemin 
144a312c225SMatthew G Knepley   ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr);
145*087dfb9eSxuemin   snes->norm = gnorm;
146a312c225SMatthew G Knepley   ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr);
147*087dfb9eSxuemin   SNESLogConvHistory(snes, gnorm, 0);
148*087dfb9eSxuemin   ierr = SNESMonitor(snes, 0, gnorm);CHKERRQ(ierr);
149a312c225SMatthew G Knepley 
150a312c225SMatthew G Knepley   /* set parameter for default relative tolerance convergence test */
151*087dfb9eSxuemin   snes->ttol = gnorm*snes->rtol;
152a312c225SMatthew G Knepley   /* test convergence */
153*087dfb9eSxuemin   ierr = (*snes->ops->converged)(snes,0,0.0,0.0,gnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr);
154a312c225SMatthew G Knepley   if (snes->reason) PetscFunctionReturn(0);
155a312c225SMatthew G Knepley 
156*087dfb9eSxuemin  /* for k=0 */
157*087dfb9eSxuemin 
158*087dfb9eSxuemin   k=0;
159*087dfb9eSxuemin   ierr= VecCopy(X,Xold); CHKERRQ(ierr); /* Xbar_0= X_0 */
160*087dfb9eSxuemin   ierr= VecCopy(F,Fold); CHKERRQ(ierr); /* Fbar_0 = f_0= B(b-Ax_0) */
161*087dfb9eSxuemin   ierr= VecWAXPY(X, ngmres->beta, Fold, Xold); CHKERRQ(ierr);       /*X_1 = X_bar + beta * Fbar */
162*087dfb9eSxuemin 
163*087dfb9eSxuemin   /* to calculate f_1 */
164*087dfb9eSxuemin   ierr = SNESComputeFunction(snes, X, temp);CHKERRQ(ierr);               /* r = F(x) */
165*087dfb9eSxuemin #if 0
166*087dfb9eSxuemin   ierr = SNESSolve(snes->pc, temp, F);CHKERRQ(ierr);                  /* p = P(r) */
1674a0c5b0cSMatthew G Knepley #else
168*087dfb9eSxuemin   ierr = VecCopy(temp, F);CHKERRQ(ierr);                              /* p = r    */
169a312c225SMatthew G Knepley #endif
170a312c225SMatthew G Knepley 
171a312c225SMatthew G Knepley 
172*087dfb9eSxuemin   /* calculate dX and dF for k=0 */
173*087dfb9eSxuemin   ierr= VecWAXPY(dX[k],-1.0, Xold, X); CHKERRQ(ierr); /* dX= X_1 - X_0 */
174*087dfb9eSxuemin   ierr= VecWAXPY(dF[k],-1.0, Fold, F); CHKERRQ(ierr); /* dF= f_1 - f_0 */
175*087dfb9eSxuemin 
176*087dfb9eSxuemin 
177*087dfb9eSxuemin   ierr= VecCopy(X,Xold); CHKERRQ(ierr); /* Xbar_0= X_0 */
178*087dfb9eSxuemin   ierr= VecCopy(F,Fold); CHKERRQ(ierr); /* Fbar_0 = f_0= B(b-Ax_0) */
179*087dfb9eSxuemin 
180*087dfb9eSxuemin   flag=0;
181*087dfb9eSxuemin 
182*087dfb9eSxuemin 
183*087dfb9eSxuemin 
184*087dfb9eSxuemin   for (k=1; k<snes->max_its; k += 1) {  /* begin the iteration */
185*087dfb9eSxuemin     l=ngmres->msize;
186*087dfb9eSxuemin     if(k<l) l=k;
187*087dfb9eSxuemin     it=l-1;
188*087dfb9eSxuemin 
189*087dfb9eSxuemin     ierr = BuildNGmresSoln(nrs,Fold,snes,it,flag);CHKERRQ(ierr);
190*087dfb9eSxuemin 
191*087dfb9eSxuemin 
192*087dfb9eSxuemin 
193*087dfb9eSxuemin     /* to obtain the solution at k+1 step */
194*087dfb9eSxuemin     ierr= VecCopy(Xold,X); CHKERRQ(ierr); /* X=Xold+Fold-(dX + dF) *nrd */
195*087dfb9eSxuemin      ierr= VecAXPY(X,1.0,Fold); CHKERRQ(ierr); /* X= Xold+Fold */
196*087dfb9eSxuemin     for(i=0;i<l;i++){      /*X= Xold+Fold- (dX+dF*beta) *innerb */
197*087dfb9eSxuemin       ierr= VecAXPY(X,-nrs[i], dX[i]);CHKERRQ(ierr);
198*087dfb9eSxuemin       ierr= VecAXPY(X,-nrs[i]*ngmres->beta, dF[i]);CHKERRQ(ierr);
199a312c225SMatthew G Knepley     }
200*087dfb9eSxuemin 
201*087dfb9eSxuemin 
202*087dfb9eSxuemin     /* to test with GMRES */
203*087dfb9eSxuemin     //ierr= VecCopy(Xold,temp); CHKERRQ(ierr); /* X=Xold-(dX ) *nrd */
204*087dfb9eSxuemin     /*for(i=0;i<l;i++){
205*087dfb9eSxuemin       ierr= VecAXPY(temp,-nrs[i], dX[i]);CHKERRQ(ierr);
206*087dfb9eSxuemin     }
207*087dfb9eSxuemin     ierr = KSP_MatMult(ksp,Amat,temp,R);CHKERRQ(ierr);
208*087dfb9eSxuemin     ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr);
209*087dfb9eSxuemin     ierr = PCApply(ksp->pc,R,F);CHKERRQ(ierr);
210*087dfb9eSxuemin      ierr = VecNorm(R,NORM_2,&gnorm);CHKERRQ(ierr);
211*087dfb9eSxuemin      printf("gmres residual norm=%g\n",gnorm);
212*087dfb9eSxuemin     */
213*087dfb9eSxuemin 
214*087dfb9eSxuemin     /* to calculate f_k+1 */
215*087dfb9eSxuemin     ierr = SNESComputeFunction(snes, X, temp);CHKERRQ(ierr);               /* r = F(x) */
216*087dfb9eSxuemin #if 0
217*087dfb9eSxuemin     ierr = SNESSolve(snes->pc, temp, F);CHKERRQ(ierr);                  /* p = P(r) */
218*087dfb9eSxuemin #else
219*087dfb9eSxuemin     ierr = VecCopy(temp, F);CHKERRQ(ierr);                              /* p = r    */
220*087dfb9eSxuemin #endif
221*087dfb9eSxuemin 
222*087dfb9eSxuemin 
223*087dfb9eSxuemin 
224*087dfb9eSxuemin     /* check the convegence */
225*087dfb9eSxuemin       ierr = VecNorm(F, NORM_2, &gnorm);CHKERRQ(ierr);                    /* fnorm = ||r||  */
226*087dfb9eSxuemin       SNESLogConvHistory(snes, gnorm, k);
227*087dfb9eSxuemin       ierr = SNESMonitor(snes, k, gnorm);CHKERRQ(ierr);
228*087dfb9eSxuemin       snes->iter =k;
229*087dfb9eSxuemin       ierr = (*snes->ops->converged)(snes,0,0.0,0.0,gnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr);
230*087dfb9eSxuemin 
231*087dfb9eSxuemin       if (snes->reason) PetscFunctionReturn(0);
232*087dfb9eSxuemin 
233*087dfb9eSxuemin 
234*087dfb9eSxuemin 
235*087dfb9eSxuemin     /* calculate dX and dF for k=0 */
236*087dfb9eSxuemin       if( k>l) {/* we need to replace the old vectors */
237*087dfb9eSxuemin 	flag=1;
238*087dfb9eSxuemin 	for(i=0;i<l-1;i++){
239*087dfb9eSxuemin 	  ierr= VecCopy(dX[i+1],dX[i]); CHKERRQ(ierr); /* X=Xold+Fold-(dX + dF) *nrd */
240*087dfb9eSxuemin 	  ierr= VecCopy(dF[i+1],dF[i]); CHKERRQ(ierr); /* X=Xold+Fold-(dX + dF) *nrd */
241*087dfb9eSxuemin           for(j=0;j<l;j++)
242*087dfb9eSxuemin 	    *HH(j,i)=*HH(j,i+1);
243*087dfb9eSxuemin 	}
244*087dfb9eSxuemin       }
245*087dfb9eSxuemin       ierr= VecWAXPY(dX[l],-1.0, Xold, X); CHKERRQ(ierr); /* dX= X_1 - X_0 */
246*087dfb9eSxuemin       ierr= VecWAXPY(dF[l],-1.0, Fold, F); CHKERRQ(ierr); /* dF= f_1 - f_0 */
247*087dfb9eSxuemin       ierr= VecCopy(X,Xold); CHKERRQ(ierr);
248*087dfb9eSxuemin       ierr= VecCopy(F,Fold); CHKERRQ(ierr);
249*087dfb9eSxuemin 
250a312c225SMatthew G Knepley   }
251a312c225SMatthew G Knepley   snes->reason = SNES_DIVERGED_MAX_IT;
252a312c225SMatthew G Knepley   PetscFunctionReturn(0);
253a312c225SMatthew G Knepley }
254a312c225SMatthew G Knepley 
255a312c225SMatthew G Knepley /*MC
256a312c225SMatthew G Knepley   SNESNGMRES - The Nonlinear Generalized Minimum Residual (NGMRES) method of Oosterlee and Washio.
257a312c225SMatthew G Knepley 
258a312c225SMatthew G Knepley    Level: beginner
259a312c225SMatthew G Knepley 
260a312c225SMatthew G Knepley    Notes: Supports only left preconditioning
261a312c225SMatthew G Knepley 
262a312c225SMatthew G Knepley    "Krylov Subspace Acceleration of Nonlinear Multigrid with Application to Recirculating Flows", C. W. Oosterlee and T. Washio,
263a312c225SMatthew G Knepley    SIAM Journal on Scientific Computing, 21(5), 2000.
264a312c225SMatthew G Knepley 
265a312c225SMatthew G Knepley .seealso: SNESCreate(), SNES, SNESSetType(), SNESType (for list of available types)
266a312c225SMatthew G Knepley M*/
267a312c225SMatthew G Knepley EXTERN_C_BEGIN
268a312c225SMatthew G Knepley #undef __FUNCT__
269a312c225SMatthew G Knepley #define __FUNCT__ "SNESCreate_NGMRES"
270a312c225SMatthew G Knepley PetscErrorCode SNESCreate_NGMRES(SNES snes)
271a312c225SMatthew G Knepley {
272a312c225SMatthew G Knepley   SNES_NGMRES   *ngmres;
273a312c225SMatthew G Knepley   PetscErrorCode ierr;
274a312c225SMatthew G Knepley 
275a312c225SMatthew G Knepley   PetscFunctionBegin;
276a312c225SMatthew G Knepley   snes->ops->destroy        = SNESDestroy_NGMRES;
277a312c225SMatthew G Knepley   snes->ops->setup          = SNESSetUp_NGMRES;
278a312c225SMatthew G Knepley   snes->ops->setfromoptions = SNESSetFromOptions_NGMRES;
279a312c225SMatthew G Knepley   snes->ops->view           = SNESView_NGMRES;
280a312c225SMatthew G Knepley   snes->ops->solve          = SNESSolve_NGMRES;
281a312c225SMatthew G Knepley   snes->ops->reset          = SNESReset_NGMRES;
282a312c225SMatthew G Knepley 
283a312c225SMatthew G Knepley   ierr = PetscNewLog(snes, SNES_NGMRES, &ngmres);CHKERRQ(ierr);
284a312c225SMatthew G Knepley   snes->data = (void*) ngmres;
285a312c225SMatthew G Knepley   ngmres->msize = 30;
286a312c225SMatthew G Knepley   ngmres->csize = 0;
2874a0c5b0cSMatthew G Knepley 
2884a0c5b0cSMatthew G Knepley   ierr = SNESGetPC(snes, &snes->pc);CHKERRQ(ierr);
289a312c225SMatthew G Knepley #if 0
290a312c225SMatthew G Knepley   if (ksp->pc_side != PC_LEFT) {ierr = PetscInfo(ksp,"WARNING! Setting PC_SIDE for NGMRES to left!\n");CHKERRQ(ierr);}
291a312c225SMatthew G Knepley   snes->pc_side = PC_LEFT;
292a312c225SMatthew G Knepley #endif
293a312c225SMatthew G Knepley   PetscFunctionReturn(0);
294a312c225SMatthew G Knepley }
295a312c225SMatthew G Knepley EXTERN_C_END
296*087dfb9eSxuemin 
297*087dfb9eSxuemin 
298*087dfb9eSxuemin 
299*087dfb9eSxuemin /*
300*087dfb9eSxuemin     BuildNGmresSoln - create the solution of the least square problem to determine the coefficients
301*087dfb9eSxuemin 
302*087dfb9eSxuemin     Input parameters:
303*087dfb9eSxuemin         nrs - the solution
304*087dfb9eSxuemin 	Fold  - the RHS vector
305*087dfb9eSxuemin         flag - =1, we need to replace the old vector by new one, =0, we still add new vector
306*087dfb9eSxuemin      This is an internal routine that knows about the NGMRES internals.
307*087dfb9eSxuemin  */
308*087dfb9eSxuemin #undef __FUNCT__
309*087dfb9eSxuemin #define __FUNCT__ "BuildNGmresSoln"
310*087dfb9eSxuemin static PetscErrorCode BuildNGmresSoln(PetscScalar* nrs, Vec Fold, SNES snes,PetscInt it, PetscInt flag)
311*087dfb9eSxuemin {
312*087dfb9eSxuemin   PetscScalar    tt,temps;
313*087dfb9eSxuemin   PetscErrorCode ierr;
314*087dfb9eSxuemin   PetscInt       i,ii,j,l;
315*087dfb9eSxuemin   SNES_NGMRES      *ngmres = (SNES_NGMRES *)(snes->data);
316*087dfb9eSxuemin   Vec *dF=ngmres->v, *Q=ngmres->q,temp;
317*087dfb9eSxuemin   PetscReal      a,b,gam,c,s;
318*087dfb9eSxuemin 
319*087dfb9eSxuemin   PetscFunctionBegin;
320*087dfb9eSxuemin   ierr = VecDuplicate(Fold,&temp);CHKERRQ(ierr);
321*087dfb9eSxuemin   l=it+1;
322*087dfb9eSxuemin 
323*087dfb9eSxuemin   /* Solve for solution vector that minimizes the residual */
324*087dfb9eSxuemin 
325*087dfb9eSxuemin   if(flag==1) { // we need to replace the old vector and need to modify the QR factors, use Givens rotation
326*087dfb9eSxuemin       for(i=0;i<it;i++){
327*087dfb9eSxuemin 	/* calculate the Givens rotation */
328*087dfb9eSxuemin 	a=*HH(i,i);
329*087dfb9eSxuemin 	b=*HH(i+1,i);
330*087dfb9eSxuemin         gam=1.0/PetscSqrtScalar(a*a+b*b);
331*087dfb9eSxuemin         c= a*gam;
332*087dfb9eSxuemin         s= b*gam;
333*087dfb9eSxuemin 	/* update the Q factor */
334*087dfb9eSxuemin         ierr= VecCopy(Q[i],temp); CHKERRQ(ierr);
335*087dfb9eSxuemin 	ierr = VecAXPBY(temp,s,c,Q[i+1]);CHKERRQ(ierr); /*temp= c*Q[i]+s*Q[i+1] */
336*087dfb9eSxuemin         ierr = VecAXPBY(Q[i+1],-s,c,Q[i]);CHKERRQ(ierr); /* Q[i+1]= -s*Q[i] + c*Q[i+1] */
337*087dfb9eSxuemin         ierr= VecCopy(temp,Q[i]); CHKERRQ(ierr);   /* Q[i]= c*Q[i] + s*Q[i+1] */
338*087dfb9eSxuemin         /* update the R factor */
339*087dfb9eSxuemin         for(j=0;j<l;j++){
340*087dfb9eSxuemin           a= *HH(i,j);
341*087dfb9eSxuemin           b=*HH(i+1,j);
342*087dfb9eSxuemin 	  temps=c* a+s* b;
343*087dfb9eSxuemin           *HH(i+1,j)=-s*a+c*b;
344*087dfb9eSxuemin           *HH(i,j)=temps;
345*087dfb9eSxuemin         }
346*087dfb9eSxuemin       }
347*087dfb9eSxuemin     }
348*087dfb9eSxuemin 
349*087dfb9eSxuemin     // add a new vector, use modified Gram-Schmidt
350*087dfb9eSxuemin     ierr= VecCopy(dF[it],temp); CHKERRQ(ierr);
351*087dfb9eSxuemin     for(i=0;i<it;i++){
352*087dfb9eSxuemin       ierr=VecDot(temp,Q[i],HH(i,it));CHKERRQ(ierr); /* h(i,l-1)= dF[l-1]'*Q[i] */
353*087dfb9eSxuemin       ierr = VecAXPBY(temp,-*HH(i,it),1.0,Q[i]);CHKERRQ(ierr); /* temp= temp- h(i,l-1)*Q[i] */
354*087dfb9eSxuemin     }
355*087dfb9eSxuemin     ierr=VecCopy(temp,Q[it]);CHKERRQ(ierr);
356*087dfb9eSxuemin     ierr=VecNormalize(Q[it],&a);CHKERRQ(ierr);
357*087dfb9eSxuemin     *HH(it,it)=a;
358*087dfb9eSxuemin 
359*087dfb9eSxuemin 
360*087dfb9eSxuemin 
361*087dfb9eSxuemin     /* modify the RHS with Q'*Fold*/
362*087dfb9eSxuemin 
363*087dfb9eSxuemin   for(i=0;i<l;i++)
364*087dfb9eSxuemin           ierr=VecDot(Fold,Q[i],ngmres->nrs+i);CHKERRQ(ierr); /* nrs= Fold'*Q[i] */
365*087dfb9eSxuemin 
366*087dfb9eSxuemin     /* start backsubstitution to solve the least square problem */
367*087dfb9eSxuemin 
368*087dfb9eSxuemin      if (*HH(it,it) != 0.0) {
369*087dfb9eSxuemin       nrs[it] =  nrs[it]/ *HH(it,it);
370*087dfb9eSxuemin     } else {
371*087dfb9eSxuemin        snes->reason = SNES_DIVERGED_MAX_IT;
372*087dfb9eSxuemin        ierr = PetscInfo2(snes,"Likely your matrix or preconditioner is singular. HH(it,it) is identically zero; it = %D nrs(it) = %G",it,10);
373*087dfb9eSxuemin        PetscFunctionReturn(0);
374*087dfb9eSxuemin   }
375*087dfb9eSxuemin   for (ii=1; ii<=it; ii++) {
376*087dfb9eSxuemin     i   = it - ii;
377*087dfb9eSxuemin     tt  = nrs[i];
378*087dfb9eSxuemin     for (j=i+1; j<=it; j++) tt  = tt - *HH(i,j) * nrs[j];
379*087dfb9eSxuemin     if (*HH(i,i) == 0.0) {
380*087dfb9eSxuemin       snes->reason = SNES_DIVERGED_MAX_IT;
381*087dfb9eSxuemin       ierr = PetscInfo1(snes,"Likely your matrix or preconditioner is singular. HH(k,k) is identically zero; i = %D",i);
382*087dfb9eSxuemin       PetscFunctionReturn(0);
383*087dfb9eSxuemin     }
384*087dfb9eSxuemin     nrs[i]   = tt / *HH(i,i);
385*087dfb9eSxuemin   }
386*087dfb9eSxuemin 
387*087dfb9eSxuemin 
388*087dfb9eSxuemin   PetscFunctionReturn(0);
389*087dfb9eSxuemin }
390