xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision f251bdbd1d9e8529587f82c093daf4686cd196a5)
1 #define PETSCKSP_DLL
2 
3 #include "src/ksp/pc/impls/is/nn/nn.h"
4 
5 /* -------------------------------------------------------------------------- */
6 /*
7    PCSetUp_NN - Prepares for the use of the NN preconditioner
8                     by setting data structures and options.
9 
10    Input Parameter:
11 .  pc - the preconditioner context
12 
13    Application Interface Routine: PCSetUp()
14 
15    Notes:
16    The interface routine PCSetUp() is not usually called directly by
17    the user, but instead is called by PCApply() if necessary.
18 */
19 #undef __FUNCT__
20 #define __FUNCT__ "PCSetUp_NN"
21 static PetscErrorCode PCSetUp_NN(PC pc)
22 {
23   PetscErrorCode ierr;
24 
25   PetscFunctionBegin;
26   if (!pc->setupcalled) {
27     /* Set up all the "iterative substructuring" common block */
28     ierr = PCISSetUp(pc);CHKERRQ(ierr);
29     /* Create the coarse matrix. */
30     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* -------------------------------------------------------------------------- */
36 /*
37    PCApply_NN - Applies the NN preconditioner to a vector.
38 
39    Input Parameters:
40 .  pc - the preconditioner context
41 .  r - input vector (global)
42 
43    Output Parameter:
44 .  z - output vector (global)
45 
46    Application Interface Routine: PCApply()
47  */
48 #undef __FUNCT__
49 #define __FUNCT__ "PCApply_NN"
50 static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
51 {
52   PC_IS          *pcis = (PC_IS*)(pc->data);
53   PetscErrorCode ierr;
54   PetscScalar    m_one = -1.0;
55   Vec            w = pcis->vec1_global;
56 
57   PetscFunctionBegin;
58   /*
59     Dirichlet solvers.
60     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
61     Storing the local results at vec2_D
62   */
63   ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
64   ierr = VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
65   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
66 
67   /*
68     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
69     Storing the result in the interface portion of the global vector w.
70   */
71   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
72   ierr = VecScale(pcis->vec1_B,m_one);CHKERRQ(ierr);
73   ierr = VecCopy(r,w);CHKERRQ(ierr);
74   ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
75   ierr = VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
76 
77   /*
78     Apply the interface preconditioner
79   */
80   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
81                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
82 
83   /*
84     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
85     The result is stored in vec1_D.
86   */
87   ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
88   ierr = VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
89   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
90 
91   /*
92     Dirichlet solvers.
93     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
94     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
95   */
96   ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
97   ierr = VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
98   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
99   ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr);
100   ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
101   ierr = VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 /* -------------------------------------------------------------------------- */
106 /*
107    PCDestroy_NN - Destroys the private context for the NN preconditioner
108    that was created with PCCreate_NN().
109 
110    Input Parameter:
111 .  pc - the preconditioner context
112 
113    Application Interface Routine: PCDestroy()
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "PCDestroy_NN"
117 static PetscErrorCode PCDestroy_NN(PC pc)
118 {
119   PC_NN          *pcnn = (PC_NN*)pc->data;
120   PetscErrorCode ierr;
121 
122   PetscFunctionBegin;
123   ierr = PCISDestroy(pc);CHKERRQ(ierr);
124 
125   if (pcnn->coarse_mat)  {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);}
126   if (pcnn->coarse_x)    {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);}
127   if (pcnn->coarse_b)    {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);}
128   if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);}
129   if (pcnn->DZ_IN) {
130     if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);}
131     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
132   }
133 
134   /*
135       Free the private data structure that was hanging off the PC
136   */
137   ierr = PetscFree(pcnn);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 /* -------------------------------------------------------------------------- */
142 /*MC
143    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
144 
145    Options Database Keys:
146 +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147                                        (this skips the first coarse grid solve in the preconditioner)
148 .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149                                        (this skips the second coarse grid solve in the preconditioner)
150 .    -pc_is_damp_fixed <fact> -
151 .    -pc_is_remove_nullspace_fixed -
152 .    -pc_is_set_damping_factor_floating <fact> -
153 .    -pc_is_not_damp_floating -
154 +    -pc_is_not_remove_nullspace_floating -
155 
156    Level: intermediate
157 
158    Notes: The matrix used with this preconditioner must be of type MATIS
159 
160           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162           on the subdomains; though in our experience using approximate solvers is slower.).
163 
164           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
167 
168    Contributed by Paulo Goldfeld
169 
170 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
171 M*/
172 EXTERN_C_BEGIN
173 #undef __FUNCT__
174 #define __FUNCT__ "PCCreate_NN"
175 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_NN(PC pc)
176 {
177   PetscErrorCode ierr;
178   PC_NN          *pcnn;
179 
180   PetscFunctionBegin;
181   /*
182      Creates the private data structure for this preconditioner and
183      attach it to the PC object.
184   */
185   ierr      = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr);
186   pc->data  = (void*)pcnn;
187 
188   /*
189      Logs the memory usage; this is not needed but allows PETSc to
190      monitor how much memory is being used for various purposes.
191   */
192   ierr = PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS));CHKERRQ(ierr); /* Is this the right thing to do? */
193 
194   ierr = PCISCreate(pc);CHKERRQ(ierr);
195   pcnn->coarse_mat  = 0;
196   pcnn->coarse_x    = 0;
197   pcnn->coarse_b    = 0;
198   pcnn->ksp_coarse = 0;
199   pcnn->DZ_IN       = 0;
200 
201   /*
202       Set the pointers for the functions that are provided above.
203       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
204       are called, they will automatically call these functions.  Note we
205       choose not to provide a couple of these functions since they are
206       not needed.
207   */
208   pc->ops->apply               = PCApply_NN;
209   pc->ops->applytranspose      = 0;
210   pc->ops->setup               = PCSetUp_NN;
211   pc->ops->destroy             = PCDestroy_NN;
212   pc->ops->view                = 0;
213   pc->ops->applyrichardson     = 0;
214   pc->ops->applysymmetricleft  = 0;
215   pc->ops->applysymmetricright = 0;
216   PetscFunctionReturn(0);
217 }
218 EXTERN_C_END
219 
220 
221 /* -------------------------------------------------------------------------- */
222 /*
223    PCNNCreateCoarseMatrix -
224 */
225 #undef __FUNCT__
226 #define __FUNCT__ "PCNNCreateCoarseMatrix"
227 PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
228 {
229   MPI_Request    *send_request, *recv_request;
230   PetscErrorCode ierr;
231   PetscInt       i, j, k;
232   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
233   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
234 
235   /* aliasing some names */
236   PC_IS*         pcis     = (PC_IS*)(pc->data);
237   PC_NN*         pcnn     = (PC_NN*)pc->data;
238   PetscInt       n_neigh  = pcis->n_neigh;
239   PetscInt*      neigh    = pcis->neigh;
240   PetscInt*      n_shared = pcis->n_shared;
241   PetscInt**     shared   = pcis->shared;
242   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */
243 
244   PetscFunctionBegin;
245   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
246   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
247 
248   /* Allocate memory for DZ */
249   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
250   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
251   {
252     PetscInt size_of_Z = 0;
253     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
254     DZ_IN = pcnn->DZ_IN;
255     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
256     for (i=0; i<n_neigh; i++) {
257       size_of_Z += n_shared[i];
258     }
259     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
260     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
261   }
262   for (i=1; i<n_neigh; i++) {
263     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
264     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
265   }
266 
267   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
268   /* First, set the auxiliary array pcis->work_N. */
269   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
270   for (i=1; i<n_neigh; i++){
271     for (j=0; j<n_shared[i]; j++) {
272       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
273     }
274   }
275 
276   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
277   /* Notice that send_request[] and recv_request[] could have one less element. */
278   /* We make them longer to have request[i] corresponding to neigh[i].          */
279   {
280     PetscMPIInt tag;
281     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
282     ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr);
283     recv_request = send_request + (n_neigh);
284     for (i=1; i<n_neigh; i++) {
285       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr);
286       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr);
287     }
288   }
289 
290   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
291   for(j=0; j<n_shared[0]; j++) {
292     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
293   }
294 
295   /* Start computing with local D*Z while communication goes on.    */
296   /* Apply Schur complement. The result is "stored" in vec (more    */
297   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
298   /* and also scattered to pcnn->work_N.                            */
299   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
300                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
301 
302   /* Compute the first column, while completing the receiving. */
303   for (i=0; i<n_neigh; i++) {
304     MPI_Status  stat;
305     PetscMPIInt ind=0;
306     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
307     mat[ind*n_neigh+0] = 0.0;
308     for (k=0; k<n_shared[ind]; k++) {
309       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
310     }
311   }
312 
313   /* Compute the remaining of the columns */
314   for (j=1; j<n_neigh; j++) {
315     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
316                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
317     for (i=0; i<n_neigh; i++) {
318       mat[i*n_neigh+j] = 0.0;
319       for (k=0; k<n_shared[i]; k++) {
320 	mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
321       }
322     }
323   }
324 
325   /* Complete the sending. */
326   if (n_neigh>1) {
327     MPI_Status *stat;
328     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
329     if (n_neigh-1) {ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);}
330     ierr = PetscFree(stat);CHKERRQ(ierr);
331   }
332 
333   /* Free the memory for the MPI requests */
334   ierr = PetscFree(send_request);CHKERRQ(ierr);
335 
336   /* Free the memory for DZ_OUT */
337   if (DZ_OUT) {
338     if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); }
339     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
340   }
341 
342   {
343     PetscMPIInt size;
344     ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
345     /* Create the global coarse vectors (rhs and solution). */
346     ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
347     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
348     /* Create and set the global coarse AIJ matrix. */
349     ierr = MatCreate(pc->comm,&(pcnn->coarse_mat));CHKERRQ(ierr);
350     ierr = MatSetSizes(pcnn->coarse_mat,1,1,size,size);CHKERRQ(ierr);
351     ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr);
352     ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr);
354     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
355     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
356     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
357   }
358 
359   {
360     PetscMPIInt rank;
361     PetscScalar one = 1.0;
362     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
363     /* "Zero out" rows of not-purely-Neumann subdomains */
364     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
365       ierr = MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one);CHKERRQ(ierr);
366     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
367       ierr = MatZeroRows(pcnn->coarse_mat,1,&rank,one);CHKERRQ(ierr);
368     }
369   }
370 
371   /* Create the coarse linear solver context */
372   {
373     PC  pc_ctx, inner_pc;
374     ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr);
375     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
376     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
377     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
378     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
379     ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr);
380     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
381     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
382     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
383     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
384     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
385   }
386 
387   /* Free the memory for mat */
388   ierr = PetscFree(mat);CHKERRQ(ierr);
389 
390   /* for DEBUGGING, save the coarse matrix to a file. */
391   {
392     PetscTruth flg;
393     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr);
394     if (flg) {
395       PetscViewer viewer;
396       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
397       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
398       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
399       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
400     }
401   }
402 
403   /*  Set the variable pcnn->factor_coarse_rhs. */
404   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
405 
406   /* See historical note 02, at the bottom of this file. */
407   PetscFunctionReturn(0);
408 }
409 
410 /* -------------------------------------------------------------------------- */
411 /*
412    PCNNApplySchurToChunk -
413 
414    Input parameters:
415 .  pcnn
416 .  n - size of chunk
417 .  idx - indices of chunk
418 .  chunk - values
419 
420    Output parameters:
421 .  array_N - result of Schur complement applied to chunk, scattered to big array
422 .  vec1_B  - result of Schur complement applied to chunk
423 .  vec2_B  - garbage (used as work space)
424 .  vec1_D  - garbage (used as work space)
425 .  vec2_D  - garbage (used as work space)
426 
427 */
428 #undef __FUNCT__
429 #define __FUNCT__ "PCNNApplySchurToChunk"
430 PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
431 {
432   PetscErrorCode ierr;
433   PetscInt       i;
434   PC_IS          *pcis = (PC_IS*)(pc->data);
435 
436   PetscFunctionBegin;
437   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
438   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
439   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
440   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
441   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
442   PetscFunctionReturn(0);
443 }
444 
445 /* -------------------------------------------------------------------------- */
446 /*
447    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
448                                       the preconditioner for the Schur complement.
449 
450    Input parameter:
451 .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
452 
453    Output parameters:
454 .  z - global vector of interior and interface nodes. The values on the interface are the result of
455        the application of the interface preconditioner to the interface part of r. The values on the
456        interior nodes are garbage.
457 .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
458 .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
459 .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
460 .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
461 .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
462 .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
463 .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
464 .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
465 
466 */
467 #undef __FUNCT__
468 #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
469 PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
470                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
471 {
472   PetscErrorCode ierr;
473   PC_IS*         pcis = (PC_IS*)(pc->data);
474 
475   PetscFunctionBegin;
476   /*
477     First balancing step.
478   */
479   {
480     PetscTruth flg;
481     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr);
482     if (!flg) {
483       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
484     } else {
485       ierr = VecCopy(r,z);CHKERRQ(ierr);
486     }
487   }
488 
489   /*
490     Extract the local interface part of z and scale it by D
491   */
492   ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
493   ierr = VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
494   ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
495 
496   /* Neumann Solver */
497   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
498 
499   /*
500     Second balancing step.
501   */
502   {
503     PetscTruth flg;
504     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr);
505     if (!flg) {
506       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
507     } else {
508       PetscScalar zero = 0.0;
509       ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
510       ierr = VecSet(z,zero);CHKERRQ(ierr);
511       ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
512       ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 /* -------------------------------------------------------------------------- */
519 /*
520    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
521                    input argument u is provided), or s, as given in equations
522                    (12) and (13), if the input argument u is a null vector.
523                    Notice that the input argument u plays the role of u_i in
524                    equation (14). The equation numbers refer to [Man93].
525 
526    Input Parameters:
527 .  pcnn - NN preconditioner context.
528 .  r - MPI vector of all nodes (interior and interface). It's preserved.
529 .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
530 
531    Output Parameters:
532 .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
533 .  vec1_B - Sequential vector of local interface nodes. Workspace.
534 .  vec2_B - Sequential vector of local interface nodes. Workspace.
535 .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
536 .  vec1_D - Sequential vector of local interior nodes. Workspace.
537 .  vec2_D - Sequential vector of local interior nodes. Workspace.
538 .  work_N - Array of all local nodes (interior and interface). Workspace.
539 
540 */
541 #undef __FUNCT__
542 #define __FUNCT__ "PCNNBalancing"
543 PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
544                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
545 {
546   PetscErrorCode ierr;
547   PetscInt       k;
548   PetscScalar    zero     =  0.0;
549   PetscScalar    m_one    = -1.0;
550   PetscScalar    value;
551   PetscScalar*   lambda;
552   PC_NN*         pcnn     = (PC_NN*)(pc->data);
553   PC_IS*         pcis     = (PC_IS*)(pc->data);
554 
555   PetscFunctionBegin;
556   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
557   if (u) {
558     if (!vec3_B) { vec3_B = u; }
559     ierr = VecPointwiseMult(vec1_B,pcis->D,u);CHKERRQ(ierr);
560     ierr = VecSet(z,zero);CHKERRQ(ierr);
561     ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
562     ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
563     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
564     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
565     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
566     ierr = VecScale(vec3_B,m_one);CHKERRQ(ierr);
567     ierr = VecCopy(r,z);CHKERRQ(ierr);
568     ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
569     ierr = VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
570   } else {
571     ierr = VecCopy(r,z);CHKERRQ(ierr);
572   }
573   ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
574   ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
575   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
576   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
577   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
578   {
579     PetscMPIInt rank;
580     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
581     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
582     /*
583        Since we are only inserting local values (one value actually) we don't need to do the
584        reduction that tells us there is no data that needs to be moved. Hence we comment out these
585        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
586        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
587     */
588   }
589   ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr);
590   if (!u) { ierr = VecScale(pcnn->coarse_x,m_one);CHKERRQ(ierr); }
591   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
592   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
593   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
594   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
595   ierr = VecSet(z,zero);CHKERRQ(ierr);
596   ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
597   ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
598   if (!u) {
599     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
600     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
601     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
602     ierr = VecCopy(r,z);CHKERRQ(ierr);
603   }
604   ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
605   ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
606   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 #undef __FUNCT__
611 
612 
613 
614 /*  -------   E N D   O F   T H E   C O D E   -------  */
615 /*                                                     */
616 /*  From now on, "footnotes" (or "historical notes").  */
617 /*                                                     */
618 /*  -------------------------------------------------  */
619 
620 
621 
622 /* --------------------------------------------------------------------------
623    Historical note 01
624    -------------------------------------------------------------------------- */
625 /*
626    We considered the possibility of an alternative D_i that would still
627    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
628    The basic principle was still the pseudo-inverse of the counting
629    function; the difference was that we would not count subdomains
630    that do not contribute to the coarse space (i.e., not pure-Neumann
631    subdomains).
632 
633    This turned out to be a bad idea:  we would solve trivial Neumann
634    problems in the not pure-Neumann subdomains, since we would be scaling
635    the balanced residual by zero.
636 */
637 
638 
639 
640 
641 /* --------------------------------------------------------------------------
642    Historical note 02
643    -------------------------------------------------------------------------- */
644 /*
645    We tried an alternative coarse problem, that would eliminate exactly a
646    constant error. Turned out not to improve the overall convergence.
647 */
648 
649 
650