xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision 5c9740d624bef1aeaff6f84d00aa946d4ed0f468)
1 /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/
2 
3 #include "src/ksp/pc/impls/is/nn/nn.h"
4 
5 /* -------------------------------------------------------------------------- */
6 /*
7    PCSetUp_NN - Prepares for the use of the NN preconditioner
8                     by setting data structures and options.
9 
10    Input Parameter:
11 .  pc - the preconditioner context
12 
13    Application Interface Routine: PCSetUp()
14 
15    Notes:
16    The interface routine PCSetUp() is not usually called directly by
17    the user, but instead is called by PCApply() if necessary.
18 */
19 #undef __FUNCT__
20 #define __FUNCT__ "PCSetUp_NN"
21 static int PCSetUp_NN(PC pc)
22 {
23   int ierr;
24 
25   PetscFunctionBegin;
26   if (!pc->setupcalled) {
27     /* Set up all the "iterative substructuring" common block */
28     ierr = PCISSetUp(pc);CHKERRQ(ierr);
29     /* Create the coarse matrix. */
30     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* -------------------------------------------------------------------------- */
36 /*
37    PCApply_NN - Applies the NN preconditioner to a vector.
38 
39    Input Parameters:
40 .  pc - the preconditioner context
41 .  r - input vector (global)
42 
43    Output Parameter:
44 .  z - output vector (global)
45 
46    Application Interface Routine: PCApply()
47  */
48 #undef __FUNCT__
49 #define __FUNCT__ "PCApply_NN"
50 static int PCApply_NN(PC pc,Vec r,Vec z)
51 {
52   PC_IS       *pcis = (PC_IS*)(pc->data);
53   int         ierr;
54   PetscScalar m_one = -1.0;
55   Vec         w = pcis->vec1_global;
56 
57   PetscFunctionBegin;
58 
59   /*
60     Dirichlet solvers.
61     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
62     Storing the local results at vec2_D
63   */
64   ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
65   ierr = VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
66   ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr);
67   ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr);
68   ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr);
69 
70   /*
71     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
72     Storing the result in the interface portion of the global vector w.
73   */
74   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
75   ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr);
76   ierr = VecCopy(r,w);CHKERRQ(ierr);
77   ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
78   ierr = VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
79 
80   /*
81     Apply the interface preconditioner
82   */
83   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
84                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
85 
86   /*
87     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
88     The result is stored in vec1_D.
89   */
90   ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
91   ierr = VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
92   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
93 
94   /*
95     Dirichlet solvers.
96     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
97     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
98   */
99   ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
100   ierr = VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
101   ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr);
102   ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr);
103   ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr);
104   ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr);
105   ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
106   ierr = VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
107 
108   PetscFunctionReturn(0);
109 }
110 
111 /* -------------------------------------------------------------------------- */
112 /*
113    PCDestroy_NN - Destroys the private context for the NN preconditioner
114    that was created with PCCreate_NN().
115 
116    Input Parameter:
117 .  pc - the preconditioner context
118 
119    Application Interface Routine: PCDestroy()
120 */
121 #undef __FUNCT__
122 #define __FUNCT__ "PCDestroy_NN"
123 static int PCDestroy_NN(PC pc)
124 {
125   PC_NN *pcnn = (PC_NN*)pc->data;
126   int   ierr;
127 
128   PetscFunctionBegin;
129 
130   ierr = PCISDestroy(pc);CHKERRQ(ierr);
131 
132   if (pcnn->coarse_mat)  {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);}
133   if (pcnn->coarse_x)    {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);}
134   if (pcnn->coarse_b)    {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);}
135   if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);}
136   if (pcnn->DZ_IN) {
137     if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);}
138     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
139   }
140 
141   /*
142       Free the private data structure that was hanging off the PC
143   */
144   ierr = PetscFree(pcnn);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 /* -------------------------------------------------------------------------- */
149 /*MC
150    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
151 
152    Options Database Keys:
153 +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
154                                        (this skips the first coarse grid solve in the preconditioner)
155 .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
156                                        (this skips the second coarse grid solve in the preconditioner)
157 .    -pc_is_damp_fixed <fact> -
158 .    -pc_is_remove_nullspace_fixed -
159 .    -pc_is_set_damping_factor_floating <fact> -
160 .    -pc_is_not_damp_floating -
161 +    -pc_is_not_remove_nullspace_floating -
162 
163    Level: intermediates
164 
165    Notes: The matrix used with this preconditioner must be of type MATIS
166 
167           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
168           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
169           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
170 
171    Contributed by Paulo Goldfeld
172 
173 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
174 M*/
175 EXTERN_C_BEGIN
176 #undef __FUNCT__
177 #define __FUNCT__ "PCCreate_NN"
178 int PCCreate_NN(PC pc)
179 {
180   int   ierr;
181   PC_NN *pcnn;
182 
183   PetscFunctionBegin;
184 
185   /*
186      Creates the private data structure for this preconditioner and
187      attach it to the PC object.
188   */
189   ierr      = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr);
190   pc->data  = (void*)pcnn;
191 
192   /*
193      Logs the memory usage; this is not needed but allows PETSc to
194      monitor how much memory is being used for various purposes.
195   */
196   PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */
197 
198   ierr = PCISCreate(pc);CHKERRQ(ierr);
199   pcnn->coarse_mat  = 0;
200   pcnn->coarse_x    = 0;
201   pcnn->coarse_b    = 0;
202   pcnn->ksp_coarse = 0;
203   pcnn->DZ_IN       = 0;
204 
205   /*
206       Set the pointers for the functions that are provided above.
207       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
208       are called, they will automatically call these functions.  Note we
209       choose not to provide a couple of these functions since they are
210       not needed.
211   */
212   pc->ops->apply               = PCApply_NN;
213   pc->ops->applytranspose      = 0;
214   pc->ops->setup               = PCSetUp_NN;
215   pc->ops->destroy             = PCDestroy_NN;
216   pc->ops->view                = 0;
217   pc->ops->applyrichardson     = 0;
218   pc->ops->applysymmetricleft  = 0;
219   pc->ops->applysymmetricright = 0;
220 
221   PetscFunctionReturn(0);
222 }
223 EXTERN_C_END
224 
225 
226 /* -------------------------------------------------------------------------- */
227 /*
228    PCNNCreateCoarseMatrix -
229 */
230 #undef __FUNCT__
231 #define __FUNCT__ "PCNNCreateCoarseMatrix"
232 int PCNNCreateCoarseMatrix (PC pc)
233 {
234   MPI_Request    *send_request, *recv_request;
235   int            i, j, k, ierr;
236 
237   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
238   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
239 
240   /* aliasing some names */
241   PC_IS*         pcis     = (PC_IS*)(pc->data);
242   PC_NN*         pcnn     = (PC_NN*)pc->data;
243   int            n_neigh  = pcis->n_neigh;
244   int*           neigh    = pcis->neigh;
245   int*           n_shared = pcis->n_shared;
246   int**          shared   = pcis->shared;
247   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */
248 
249   PetscFunctionBegin;
250 
251   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
252   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
253 
254   /* Allocate memory for DZ */
255   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
256   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
257   {
258     int size_of_Z = 0;
259     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
260     DZ_IN = pcnn->DZ_IN;
261     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
262     for (i=0; i<n_neigh; i++) {
263       size_of_Z += n_shared[i];
264     }
265     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
266     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
267   }
268   for (i=1; i<n_neigh; i++) {
269     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
270     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
271   }
272 
273   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
274   /* First, set the auxiliary array pcis->work_N. */
275   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
276   for (i=1; i<n_neigh; i++){
277     for (j=0; j<n_shared[i]; j++) {
278       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
279     }
280   }
281 
282   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
283   /* Notice that send_request[] and recv_request[] could have one less element. */
284   /* We make them longer to have request[i] corresponding to neigh[i].          */
285   {
286     int tag;
287     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
288     ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr);
289     recv_request = send_request + (n_neigh);
290     for (i=1; i<n_neigh; i++) {
291       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr);
292       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr);
293     }
294   }
295 
296   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
297   for(j=0; j<n_shared[0]; j++) {
298     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
299   }
300 
301   /* Start computing with local D*Z while communication goes on.    */
302   /* Apply Schur complement. The result is "stored" in vec (more    */
303   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
304   /* and also scattered to pcnn->work_N.                            */
305   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
306                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
307 
308   /* Compute the first column, while completing the receiving. */
309   for (i=0; i<n_neigh; i++) {
310     MPI_Status stat;
311     int ind=0;
312     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
313     mat[ind*n_neigh+0] = 0.0;
314     for (k=0; k<n_shared[ind]; k++) {
315       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
316     }
317   }
318 
319   /* Compute the remaining of the columns */
320   for (j=1; j<n_neigh; j++) {
321     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
322                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
323     for (i=0; i<n_neigh; i++) {
324       mat[i*n_neigh+j] = 0.0;
325       for (k=0; k<n_shared[i]; k++) {
326 	mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
327       }
328     }
329   }
330 
331   /* Complete the sending. */
332   if (n_neigh>1) {
333     MPI_Status *stat;
334     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
335     ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);
336     ierr = PetscFree(stat);CHKERRQ(ierr);
337   }
338 
339   /* Free the memory for the MPI requests */
340   ierr = PetscFree(send_request);CHKERRQ(ierr);
341 
342   /* Free the memory for DZ_OUT */
343   if (DZ_OUT) {
344     if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); }
345     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
346   }
347 
348   {
349     int size,n_neigh_m1;
350     ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
351     n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
352     /* Create the global coarse vectors (rhs and solution). */
353     ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
354     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
355     /* Create and set the global coarse matrix. */
356     ierr = MatCreateMPIAIJ(pc->comm,1,1,size,size,1,PETSC_NULL,n_neigh_m1,PETSC_NULL,&(pcnn->coarse_mat));CHKERRQ(ierr);
357     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
358     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360   }
361 
362   {
363     int         rank;
364     PetscScalar one = 1.0;
365     IS          is;
366     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
367     /* "Zero out" rows of not-purely-Neumann subdomains */
368     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
369       ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr);
370     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
371       ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr);
372     }
373     ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr);
374     ierr = ISDestroy(is);CHKERRQ(ierr);
375   }
376 
377   /* Create the coarse linear solver context */
378   {
379     PC  pc_ctx, inner_pc;
380     ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr);
381     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
382     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
383     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
384     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
385     ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr);
386     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
387     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
388     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
389     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
390     ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr);
391     ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr);
392     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
393   }
394 
395   /* Free the memory for mat */
396   ierr = PetscFree(mat);CHKERRQ(ierr);
397 
398   /* for DEBUGGING, save the coarse matrix to a file. */
399   {
400     PetscTruth flg;
401     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr);
402     if (flg) {
403       PetscViewer viewer;
404       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
405       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
406       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
407       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
408     }
409   }
410 
411   /*  Set the variable pcnn->factor_coarse_rhs. */
412   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
413 
414   /* See historical note 02, at the bottom of this file. */
415 
416   PetscFunctionReturn(0);
417 }
418 
419 /* -------------------------------------------------------------------------- */
420 /*
421    PCNNApplySchurToChunk -
422 
423    Input parameters:
424 .  pcnn
425 .  n - size of chunk
426 .  idx - indices of chunk
427 .  chunk - values
428 
429    Output parameters:
430 .  array_N - result of Schur complement applied to chunk, scattered to big array
431 .  vec1_B  - result of Schur complement applied to chunk
432 .  vec2_B  - garbage (used as work space)
433 .  vec1_D  - garbage (used as work space)
434 .  vec2_D  - garbage (used as work space)
435 
436 */
437 #undef __FUNCT__
438 #define __FUNCT__ "PCNNApplySchurToChunk"
439 int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
440 {
441   int   i, ierr;
442   PC_IS *pcis = (PC_IS*)(pc->data);
443 
444   PetscFunctionBegin;
445 
446   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
447   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
448   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
449   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
450   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
451 
452   PetscFunctionReturn(0);
453 }
454 
455 /* -------------------------------------------------------------------------- */
456 /*
457    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
458                                       the preconditioner for the Schur complement.
459 
460    Input parameter:
461 .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
462 
463    Output parameters:
464 .  z - global vector of interior and interface nodes. The values on the interface are the result of
465        the application of the interface preconditioner to the interface part of r. The values on the
466        interior nodes are garbage.
467 .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
468 .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
469 .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
470 .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
471 .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
472 .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
473 .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
474 .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
475 
476 */
477 #undef __FUNCT__
478 #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
479 int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
480                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
481 {
482   int    ierr;
483   PC_IS* pcis = (PC_IS*)(pc->data);
484 
485   PetscFunctionBegin;
486 
487   /*
488     First balancing step.
489   */
490   {
491     PetscTruth flg;
492     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr);
493     if (!flg) {
494       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
495     } else {
496       ierr = VecCopy(r,z);CHKERRQ(ierr);
497     }
498   }
499 
500   /*
501     Extract the local interface part of z and scale it by D
502   */
503   ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
504   ierr = VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
505   ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr);
506 
507   /* Neumann Solver */
508   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
509 
510   /*
511     Second balancing step.
512   */
513   {
514     PetscTruth flg;
515     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr);
516     if (!flg) {
517       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
518     } else {
519       PetscScalar zero = 0.0;
520       ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr);
521       ierr = VecSet(&zero,z);CHKERRQ(ierr);
522       ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
523       ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
524     }
525   }
526 
527   PetscFunctionReturn(0);
528 }
529 
530 /* -------------------------------------------------------------------------- */
531 /*
532    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
533                    input argument u is provided), or s, as given in equations
534                    (12) and (13), if the input argument u is a null vector.
535                    Notice that the input argument u plays the role of u_i in
536                    equation (14). The equation numbers refer to [Man93].
537 
538    Input Parameters:
539 .  pcnn - NN preconditioner context.
540 .  r - MPI vector of all nodes (interior and interface). It's preserved.
541 .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
542 
543    Output Parameters:
544 .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
545 .  vec1_B - Sequential vector of local interface nodes. Workspace.
546 .  vec2_B - Sequential vector of local interface nodes. Workspace.
547 .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
548 .  vec1_D - Sequential vector of local interior nodes. Workspace.
549 .  vec2_D - Sequential vector of local interior nodes. Workspace.
550 .  work_N - Array of all local nodes (interior and interface). Workspace.
551 
552 */
553 #undef __FUNCT__
554 #define __FUNCT__ "PCNNBalancing"
555 int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
556                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
557 {
558   int            k, ierr;
559   PetscScalar    zero     =  0.0;
560   PetscScalar    m_one    = -1.0;
561   PetscScalar    value;
562   PetscScalar*   lambda;
563   PC_NN*         pcnn     = (PC_NN*)(pc->data);
564   PC_IS*         pcis     = (PC_IS*)(pc->data);
565 
566   PetscFunctionBegin;
567   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
568 
569   if (u) {
570     if (!vec3_B) { vec3_B = u; }
571     ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr);
572     ierr = VecSet(&zero,z);CHKERRQ(ierr);
573     ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
574     ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
575     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
576     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
577     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
578     ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr);
579     ierr = VecCopy(r,z);CHKERRQ(ierr);
580     ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
581     ierr = VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
582   } else {
583     ierr = VecCopy(r,z);CHKERRQ(ierr);
584   }
585   ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
586   ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
587   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
588   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
589   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
590   {
591     int rank;
592     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
593     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
594     /*
595        Since we are only inserting local values (one value actually) we don't need to do the
596        reduction that tells us there is no data that needs to be moved. Hence we comment out these
597        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
598        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
599     */
600   }
601   ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr);
602   ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr);
603   ierr = KSPSolve(pcnn->ksp_coarse);CHKERRQ(ierr);
604   if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); }
605   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
606   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
607   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
608   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
609   ierr = VecSet(&zero,z);CHKERRQ(ierr);
610   ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
611   ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
612   if (!u) {
613     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
614     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
615     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
616     ierr = VecCopy(r,z);CHKERRQ(ierr);
617   }
618   ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
619   ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
620   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
621 
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 
627 
628 
629 /*  -------   E N D   O F   T H E   C O D E   -------  */
630 /*                                                     */
631 /*  From now on, "footnotes" (or "historical notes").  */
632 /*                                                     */
633 /*  -------------------------------------------------  */
634 
635 
636 #ifdef __HISTORICAL_NOTES___do_not_compile__
637 
638 /* --------------------------------------------------------------------------
639    Historical note 01
640    -------------------------------------------------------------------------- */
641 /*
642    We considered the possibility of an alternative D_i that would still
643    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
644    The basic principle was still the pseudo-inverse of the counting
645    function; the difference was that we would not count subdomains
646    that do not contribute to the coarse space (i.e., not pure-Neumann
647    subdomains).
648 
649    This turned out to be a bad idea:  we would solve trivial Neumann
650    problems in the not pure-Neumann subdomains, since we would be scaling
651    the balanced residual by zero.
652 */
653 
654     {
655       PetscTruth flg;
656       ierr = PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);CHKERRQ(ierr);
657       if (flg) {
658         Vec    counter;
659         PetscScalar one=1.0, zero=0.0;
660         ierr = VecDuplicate(pc->vec,&counter);CHKERRQ(ierr);
661         ierr = VecSet(&zero,counter);CHKERRQ(ierr);
662         if (pcnn->pure_neumann) {
663           ierr = VecSet(&one,pcnn->D);CHKERRQ(ierr);
664         } else {
665           ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr);
666         }
667         ierr = VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
668         ierr = VecScatterEnd  (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
669         ierr = VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
670         ierr = VecScatterEnd  (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
671         ierr = VecDestroy(counter);CHKERRQ(ierr);
672         if (pcnn->pure_neumann) {
673           ierr = VecReciprocal(pcnn->D);CHKERRQ(ierr);
674         } else {
675           ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr);
676         }
677       }
678     }
679 
680 
681 
682 /* --------------------------------------------------------------------------
683    Historical note 02
684    -------------------------------------------------------------------------- */
685 /*
686    We tried an alternative coarse problem, that would eliminate exactly a
687    constant error. Turned out not to improve the overall convergence.
688 */
689 
690   /*  Set the variable pcnn->factor_coarse_rhs. */
691   {
692     PetscTruth flg;
693     ierr = PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);CHKERRQ(ierr);
694     if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; }
695     else {
696       PetscScalar zero = 0.0, one = 1.0;
697       ierr = VecSet(&one,pcnn->vec1_B);
698       ierr = ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);CHKERRQ(ierr);
699       ierr = VecSet(&zero,pcnn->vec1_global);CHKERRQ(ierr);
700       ierr = VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
701       ierr = VecScatterEnd  (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
702       ierr = VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
703       ierr = VecScatterEnd  (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
704       if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; }
705       else {
706         ierr = ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);CHKERRQ(ierr);
707         for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) {
708           pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k];
709         }
710         if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; }
711         else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); }
712       }
713     }
714   }
715 
716 #endif /* __HISTORICAL_NOTES___do_not_compile */
717