xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision df48e8d96c19df54efdde2c76f874a16fcafd8fe)
1 #define PETSCKSP_DLL
2 
3 #include "src/ksp/pc/impls/is/nn/nn.h"
4 
5 /* -------------------------------------------------------------------------- */
6 /*
7    PCSetUp_NN - Prepares for the use of the NN preconditioner
8                     by setting data structures and options.
9 
10    Input Parameter:
11 .  pc - the preconditioner context
12 
13    Application Interface Routine: PCSetUp()
14 
15    Notes:
16    The interface routine PCSetUp() is not usually called directly by
17    the user, but instead is called by PCApply() if necessary.
18 */
19 #undef __FUNCT__
20 #define __FUNCT__ "PCSetUp_NN"
21 static PetscErrorCode PCSetUp_NN(PC pc)
22 {
23   PetscErrorCode ierr;
24 
25   PetscFunctionBegin;
26   if (!pc->setupcalled) {
27     /* Set up all the "iterative substructuring" common block */
28     ierr = PCISSetUp(pc);CHKERRQ(ierr);
29     /* Create the coarse matrix. */
30     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* -------------------------------------------------------------------------- */
36 /*
37    PCApply_NN - Applies the NN preconditioner to a vector.
38 
39    Input Parameters:
40 .  pc - the preconditioner context
41 .  r - input vector (global)
42 
43    Output Parameter:
44 .  z - output vector (global)
45 
46    Application Interface Routine: PCApply()
47  */
48 #undef __FUNCT__
49 #define __FUNCT__ "PCApply_NN"
50 static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
51 {
52   PC_IS          *pcis = (PC_IS*)(pc->data);
53   PetscErrorCode ierr;
54   PetscScalar    m_one = -1.0;
55   Vec            w = pcis->vec1_global;
56 
57   PetscFunctionBegin;
58   /*
59     Dirichlet solvers.
60     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
61     Storing the local results at vec2_D
62   */
63   ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
64   ierr = VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
65   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
66 
67   /*
68     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
69     Storing the result in the interface portion of the global vector w.
70   */
71   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
72   ierr = VecScale(pcis->vec1_B,m_one);CHKERRQ(ierr);
73   ierr = VecCopy(r,w);CHKERRQ(ierr);
74   ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
75   ierr = VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
76 
77   /*
78     Apply the interface preconditioner
79   */
80   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
81                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
82 
83   /*
84     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
85     The result is stored in vec1_D.
86   */
87   ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
88   ierr = VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
89   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
90 
91   /*
92     Dirichlet solvers.
93     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
94     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
95   */
96   ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
97   ierr = VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
98   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
99   ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr);
100   ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
101   ierr = VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 /* -------------------------------------------------------------------------- */
106 /*
107    PCDestroy_NN - Destroys the private context for the NN preconditioner
108    that was created with PCCreate_NN().
109 
110    Input Parameter:
111 .  pc - the preconditioner context
112 
113    Application Interface Routine: PCDestroy()
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "PCDestroy_NN"
117 static PetscErrorCode PCDestroy_NN(PC pc)
118 {
119   PC_NN          *pcnn = (PC_NN*)pc->data;
120   PetscErrorCode ierr;
121 
122   PetscFunctionBegin;
123   ierr = PCISDestroy(pc);CHKERRQ(ierr);
124 
125   if (pcnn->coarse_mat)  {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);}
126   if (pcnn->coarse_x)    {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);}
127   if (pcnn->coarse_b)    {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);}
128   if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);}
129   if (pcnn->DZ_IN) {
130     if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);}
131     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
132   }
133 
134   /*
135       Free the private data structure that was hanging off the PC
136   */
137   ierr = PetscFree(pcnn);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 /* -------------------------------------------------------------------------- */
142 /*MC
143    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
144 
145    Options Database Keys:
146 +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147                                        (this skips the first coarse grid solve in the preconditioner)
148 .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149                                        (this skips the second coarse grid solve in the preconditioner)
150 .    -pc_is_damp_fixed <fact> -
151 .    -pc_is_remove_nullspace_fixed -
152 .    -pc_is_set_damping_factor_floating <fact> -
153 .    -pc_is_not_damp_floating -
154 +    -pc_is_not_remove_nullspace_floating -
155 
156    Level: intermediate
157 
158    Notes: The matrix used with this preconditioner must be of type MATIS
159 
160           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162           on the subdomains; though in our experience using approximate solvers is slower.).
163 
164           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
167 
168    Contributed by Paulo Goldfeld
169 
170 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
171 M*/
172 EXTERN_C_BEGIN
173 #undef __FUNCT__
174 #define __FUNCT__ "PCCreate_NN"
175 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_NN(PC pc)
176 {
177   PetscErrorCode ierr;
178   PC_NN          *pcnn;
179 
180   PetscFunctionBegin;
181   /*
182      Creates the private data structure for this preconditioner and
183      attach it to the PC object.
184   */
185   ierr      = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr);
186   pc->data  = (void*)pcnn;
187 
188   /*
189      Logs the memory usage; this is not needed but allows PETSc to
190      monitor how much memory is being used for various purposes.
191   */
192   ierr = PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS));CHKERRQ(ierr); /* Is this the right thing to do? */
193 
194   ierr = PCISCreate(pc);CHKERRQ(ierr);
195   pcnn->coarse_mat  = 0;
196   pcnn->coarse_x    = 0;
197   pcnn->coarse_b    = 0;
198   pcnn->ksp_coarse = 0;
199   pcnn->DZ_IN       = 0;
200 
201   /*
202       Set the pointers for the functions that are provided above.
203       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
204       are called, they will automatically call these functions.  Note we
205       choose not to provide a couple of these functions since they are
206       not needed.
207   */
208   pc->ops->apply               = PCApply_NN;
209   pc->ops->applytranspose      = 0;
210   pc->ops->setup               = PCSetUp_NN;
211   pc->ops->destroy             = PCDestroy_NN;
212   pc->ops->view                = 0;
213   pc->ops->applyrichardson     = 0;
214   pc->ops->applysymmetricleft  = 0;
215   pc->ops->applysymmetricright = 0;
216   PetscFunctionReturn(0);
217 }
218 EXTERN_C_END
219 
220 
221 /* -------------------------------------------------------------------------- */
222 /*
223    PCNNCreateCoarseMatrix -
224 */
225 #undef __FUNCT__
226 #define __FUNCT__ "PCNNCreateCoarseMatrix"
227 PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
228 {
229   MPI_Request    *send_request, *recv_request;
230   PetscErrorCode ierr;
231   PetscInt       i, j, k;
232   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
233   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
234 
235   /* aliasing some names */
236   PC_IS*         pcis     = (PC_IS*)(pc->data);
237   PC_NN*         pcnn     = (PC_NN*)pc->data;
238   PetscInt       n_neigh  = pcis->n_neigh;
239   PetscInt*      neigh    = pcis->neigh;
240   PetscInt*      n_shared = pcis->n_shared;
241   PetscInt**     shared   = pcis->shared;
242   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */
243 
244   PetscFunctionBegin;
245   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
246   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
247 
248   /* Allocate memory for DZ */
249   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
250   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
251   {
252     PetscInt size_of_Z = 0;
253     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
254     DZ_IN = pcnn->DZ_IN;
255     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
256     for (i=0; i<n_neigh; i++) {
257       size_of_Z += n_shared[i];
258     }
259     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
260     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
261   }
262   for (i=1; i<n_neigh; i++) {
263     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
264     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
265   }
266 
267   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
268   /* First, set the auxiliary array pcis->work_N. */
269   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
270   for (i=1; i<n_neigh; i++){
271     for (j=0; j<n_shared[i]; j++) {
272       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
273     }
274   }
275 
276   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
277   /* Notice that send_request[] and recv_request[] could have one less element. */
278   /* We make them longer to have request[i] corresponding to neigh[i].          */
279   {
280     PetscMPIInt tag;
281     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
282     ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr);
283     recv_request = send_request + (n_neigh);
284     for (i=1; i<n_neigh; i++) {
285       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr);
286       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr);
287     }
288   }
289 
290   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
291   for(j=0; j<n_shared[0]; j++) {
292     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
293   }
294 
295   /* Start computing with local D*Z while communication goes on.    */
296   /* Apply Schur complement. The result is "stored" in vec (more    */
297   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
298   /* and also scattered to pcnn->work_N.                            */
299   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
300                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
301 
302   /* Compute the first column, while completing the receiving. */
303   for (i=0; i<n_neigh; i++) {
304     MPI_Status  stat;
305     PetscMPIInt ind=0;
306     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
307     mat[ind*n_neigh+0] = 0.0;
308     for (k=0; k<n_shared[ind]; k++) {
309       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
310     }
311   }
312 
313   /* Compute the remaining of the columns */
314   for (j=1; j<n_neigh; j++) {
315     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
316                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
317     for (i=0; i<n_neigh; i++) {
318       mat[i*n_neigh+j] = 0.0;
319       for (k=0; k<n_shared[i]; k++) {
320 	mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
321       }
322     }
323   }
324 
325   /* Complete the sending. */
326   if (n_neigh>1) {
327     MPI_Status *stat;
328     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
329     if (n_neigh-1) {ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);}
330     ierr = PetscFree(stat);CHKERRQ(ierr);
331   }
332 
333   /* Free the memory for the MPI requests */
334   ierr = PetscFree(send_request);CHKERRQ(ierr);
335 
336   /* Free the memory for DZ_OUT */
337   if (DZ_OUT) {
338     if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); }
339     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
340   }
341 
342   {
343     PetscMPIInt size;
344     ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
345     /* Create the global coarse vectors (rhs and solution). */
346     ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
347     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
348     /* Create and set the global coarse AIJ matrix. */
349     ierr = MatCreate(pc->comm,&(pcnn->coarse_mat));CHKERRQ(ierr);
350     ierr = MatSetSizes(pcnn->coarse_mat,1,1,size,size);CHKERRQ(ierr);
351     ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr);
352     ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr);
354     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
355     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
356     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
357   }
358 
359   {
360     PetscMPIInt rank;
361     PetscScalar one = 1.0;
362     IS          is;
363     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
364     /* "Zero out" rows of not-purely-Neumann subdomains */
365     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
366       ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr);
367     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
368       ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr);
369     }
370     ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr);
371     ierr = ISDestroy(is);CHKERRQ(ierr);
372   }
373 
374   /* Create the coarse linear solver context */
375   {
376     PC  pc_ctx, inner_pc;
377     ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr);
378     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
379     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
380     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
381     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
382     ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr);
383     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
384     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
385     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
386     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
387     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
388   }
389 
390   /* Free the memory for mat */
391   ierr = PetscFree(mat);CHKERRQ(ierr);
392 
393   /* for DEBUGGING, save the coarse matrix to a file. */
394   {
395     PetscTruth flg;
396     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr);
397     if (flg) {
398       PetscViewer viewer;
399       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
400       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
401       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
402       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
403     }
404   }
405 
406   /*  Set the variable pcnn->factor_coarse_rhs. */
407   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
408 
409   /* See historical note 02, at the bottom of this file. */
410   PetscFunctionReturn(0);
411 }
412 
413 /* -------------------------------------------------------------------------- */
414 /*
415    PCNNApplySchurToChunk -
416 
417    Input parameters:
418 .  pcnn
419 .  n - size of chunk
420 .  idx - indices of chunk
421 .  chunk - values
422 
423    Output parameters:
424 .  array_N - result of Schur complement applied to chunk, scattered to big array
425 .  vec1_B  - result of Schur complement applied to chunk
426 .  vec2_B  - garbage (used as work space)
427 .  vec1_D  - garbage (used as work space)
428 .  vec2_D  - garbage (used as work space)
429 
430 */
431 #undef __FUNCT__
432 #define __FUNCT__ "PCNNApplySchurToChunk"
433 PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
434 {
435   PetscErrorCode ierr;
436   PetscInt       i;
437   PC_IS          *pcis = (PC_IS*)(pc->data);
438 
439   PetscFunctionBegin;
440   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
441   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
442   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
443   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
444   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 /* -------------------------------------------------------------------------- */
449 /*
450    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
451                                       the preconditioner for the Schur complement.
452 
453    Input parameter:
454 .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
455 
456    Output parameters:
457 .  z - global vector of interior and interface nodes. The values on the interface are the result of
458        the application of the interface preconditioner to the interface part of r. The values on the
459        interior nodes are garbage.
460 .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
461 .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
462 .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
463 .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
464 .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
465 .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
466 .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
467 .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
468 
469 */
470 #undef __FUNCT__
471 #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
472 PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
473                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
474 {
475   PetscErrorCode ierr;
476   PC_IS*         pcis = (PC_IS*)(pc->data);
477 
478   PetscFunctionBegin;
479   /*
480     First balancing step.
481   */
482   {
483     PetscTruth flg;
484     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr);
485     if (!flg) {
486       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
487     } else {
488       ierr = VecCopy(r,z);CHKERRQ(ierr);
489     }
490   }
491 
492   /*
493     Extract the local interface part of z and scale it by D
494   */
495   ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
496   ierr = VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
497   ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
498 
499   /* Neumann Solver */
500   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
501 
502   /*
503     Second balancing step.
504   */
505   {
506     PetscTruth flg;
507     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr);
508     if (!flg) {
509       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
510     } else {
511       PetscScalar zero = 0.0;
512       ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
513       ierr = VecSet(z,zero);CHKERRQ(ierr);
514       ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
515       ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
516     }
517   }
518   PetscFunctionReturn(0);
519 }
520 
521 /* -------------------------------------------------------------------------- */
522 /*
523    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
524                    input argument u is provided), or s, as given in equations
525                    (12) and (13), if the input argument u is a null vector.
526                    Notice that the input argument u plays the role of u_i in
527                    equation (14). The equation numbers refer to [Man93].
528 
529    Input Parameters:
530 .  pcnn - NN preconditioner context.
531 .  r - MPI vector of all nodes (interior and interface). It's preserved.
532 .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
533 
534    Output Parameters:
535 .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
536 .  vec1_B - Sequential vector of local interface nodes. Workspace.
537 .  vec2_B - Sequential vector of local interface nodes. Workspace.
538 .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
539 .  vec1_D - Sequential vector of local interior nodes. Workspace.
540 .  vec2_D - Sequential vector of local interior nodes. Workspace.
541 .  work_N - Array of all local nodes (interior and interface). Workspace.
542 
543 */
544 #undef __FUNCT__
545 #define __FUNCT__ "PCNNBalancing"
546 PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
547                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
548 {
549   PetscErrorCode ierr;
550   PetscInt       k;
551   PetscScalar    zero     =  0.0;
552   PetscScalar    m_one    = -1.0;
553   PetscScalar    value;
554   PetscScalar*   lambda;
555   PC_NN*         pcnn     = (PC_NN*)(pc->data);
556   PC_IS*         pcis     = (PC_IS*)(pc->data);
557 
558   PetscFunctionBegin;
559   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
560   if (u) {
561     if (!vec3_B) { vec3_B = u; }
562     ierr = VecPointwiseMult(vec1_B,pcis->D,u);CHKERRQ(ierr);
563     ierr = VecSet(z,zero);CHKERRQ(ierr);
564     ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
565     ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
566     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
567     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
568     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
569     ierr = VecScale(vec3_B,m_one);CHKERRQ(ierr);
570     ierr = VecCopy(r,z);CHKERRQ(ierr);
571     ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
572     ierr = VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
573   } else {
574     ierr = VecCopy(r,z);CHKERRQ(ierr);
575   }
576   ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
577   ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
578   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
579   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
580   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
581   {
582     PetscMPIInt rank;
583     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
584     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
585     /*
586        Since we are only inserting local values (one value actually) we don't need to do the
587        reduction that tells us there is no data that needs to be moved. Hence we comment out these
588        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
589        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
590     */
591   }
592   ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr);
593   if (!u) { ierr = VecScale(pcnn->coarse_x,m_one);CHKERRQ(ierr); }
594   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
595   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
596   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
597   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
598   ierr = VecSet(z,zero);CHKERRQ(ierr);
599   ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
600   ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
601   if (!u) {
602     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
603     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
604     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
605     ierr = VecCopy(r,z);CHKERRQ(ierr);
606   }
607   ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
608   ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
609   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 
615 
616 
617 /*  -------   E N D   O F   T H E   C O D E   -------  */
618 /*                                                     */
619 /*  From now on, "footnotes" (or "historical notes").  */
620 /*                                                     */
621 /*  -------------------------------------------------  */
622 
623 
624 
625 /* --------------------------------------------------------------------------
626    Historical note 01
627    -------------------------------------------------------------------------- */
628 /*
629    We considered the possibility of an alternative D_i that would still
630    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
631    The basic principle was still the pseudo-inverse of the counting
632    function; the difference was that we would not count subdomains
633    that do not contribute to the coarse space (i.e., not pure-Neumann
634    subdomains).
635 
636    This turned out to be a bad idea:  we would solve trivial Neumann
637    problems in the not pure-Neumann subdomains, since we would be scaling
638    the balanced residual by zero.
639 */
640 
641 
642 
643 
644 /* --------------------------------------------------------------------------
645    Historical note 02
646    -------------------------------------------------------------------------- */
647 /*
648    We tried an alternative coarse problem, that would eliminate exactly a
649    constant error. Turned out not to improve the overall convergence.
650 */
651 
652 
653