xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision 45bfc511327f17f9664eea9fee8d9f549538c00a)
1 #define PETSCKSP_DLL
2 
3 #include "src/ksp/pc/impls/is/nn/nn.h"
4 
5 /* -------------------------------------------------------------------------- */
6 /*
7    PCSetUp_NN - Prepares for the use of the NN preconditioner
8                     by setting data structures and options.
9 
10    Input Parameter:
11 .  pc - the preconditioner context
12 
13    Application Interface Routine: PCSetUp()
14 
15    Notes:
16    The interface routine PCSetUp() is not usually called directly by
17    the user, but instead is called by PCApply() if necessary.
18 */
19 #undef __FUNCT__
20 #define __FUNCT__ "PCSetUp_NN"
21 static PetscErrorCode PCSetUp_NN(PC pc)
22 {
23   PetscErrorCode ierr;
24 
25   PetscFunctionBegin;
26   if (!pc->setupcalled) {
27     /* Set up all the "iterative substructuring" common block */
28     ierr = PCISSetUp(pc);CHKERRQ(ierr);
29     /* Create the coarse matrix. */
30     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* -------------------------------------------------------------------------- */
36 /*
37    PCApply_NN - Applies the NN preconditioner to a vector.
38 
39    Input Parameters:
40 .  pc - the preconditioner context
41 .  r - input vector (global)
42 
43    Output Parameter:
44 .  z - output vector (global)
45 
46    Application Interface Routine: PCApply()
47  */
48 #undef __FUNCT__
49 #define __FUNCT__ "PCApply_NN"
50 static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
51 {
52   PC_IS          *pcis = (PC_IS*)(pc->data);
53   PetscErrorCode ierr;
54   PetscScalar    m_one = -1.0;
55   Vec            w = pcis->vec1_global;
56 
57   PetscFunctionBegin;
58   /*
59     Dirichlet solvers.
60     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
61     Storing the local results at vec2_D
62   */
63   ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
64   ierr = VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
65   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
66 
67   /*
68     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
69     Storing the result in the interface portion of the global vector w.
70   */
71   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
72   ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr);
73   ierr = VecCopy(r,w);CHKERRQ(ierr);
74   ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
75   ierr = VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
76 
77   /*
78     Apply the interface preconditioner
79   */
80   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
81                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
82 
83   /*
84     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
85     The result is stored in vec1_D.
86   */
87   ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
88   ierr = VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
89   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
90 
91   /*
92     Dirichlet solvers.
93     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
94     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
95   */
96   ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
97   ierr = VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
98   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
99   ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr);
100   ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
101   ierr = VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 /* -------------------------------------------------------------------------- */
106 /*
107    PCDestroy_NN - Destroys the private context for the NN preconditioner
108    that was created with PCCreate_NN().
109 
110    Input Parameter:
111 .  pc - the preconditioner context
112 
113    Application Interface Routine: PCDestroy()
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "PCDestroy_NN"
117 static PetscErrorCode PCDestroy_NN(PC pc)
118 {
119   PC_NN          *pcnn = (PC_NN*)pc->data;
120   PetscErrorCode ierr;
121 
122   PetscFunctionBegin;
123   ierr = PCISDestroy(pc);CHKERRQ(ierr);
124 
125   if (pcnn->coarse_mat)  {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);}
126   if (pcnn->coarse_x)    {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);}
127   if (pcnn->coarse_b)    {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);}
128   if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);}
129   if (pcnn->DZ_IN) {
130     if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);}
131     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
132   }
133 
134   /*
135       Free the private data structure that was hanging off the PC
136   */
137   ierr = PetscFree(pcnn);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 /* -------------------------------------------------------------------------- */
142 /*MC
143    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
144 
145    Options Database Keys:
146 +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147                                        (this skips the first coarse grid solve in the preconditioner)
148 .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149                                        (this skips the second coarse grid solve in the preconditioner)
150 .    -pc_is_damp_fixed <fact> -
151 .    -pc_is_remove_nullspace_fixed -
152 .    -pc_is_set_damping_factor_floating <fact> -
153 .    -pc_is_not_damp_floating -
154 +    -pc_is_not_remove_nullspace_floating -
155 
156    Level: intermediate
157 
158    Notes: The matrix used with this preconditioner must be of type MATIS
159 
160           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162           on the subdomains; though in our experience using approximate solvers is slower.).
163 
164           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
167 
168    Contributed by Paulo Goldfeld
169 
170 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
171 M*/
172 EXTERN_C_BEGIN
173 #undef __FUNCT__
174 #define __FUNCT__ "PCCreate_NN"
175 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_NN(PC pc)
176 {
177   PetscErrorCode ierr;
178   PC_NN          *pcnn;
179 
180   PetscFunctionBegin;
181   /*
182      Creates the private data structure for this preconditioner and
183      attach it to the PC object.
184   */
185   ierr      = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr);
186   pc->data  = (void*)pcnn;
187 
188   /*
189      Logs the memory usage; this is not needed but allows PETSc to
190      monitor how much memory is being used for various purposes.
191   */
192   ierr = PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS));CHKERRQ(ierr); /* Is this the right thing to do? */
193 
194   ierr = PCISCreate(pc);CHKERRQ(ierr);
195   pcnn->coarse_mat  = 0;
196   pcnn->coarse_x    = 0;
197   pcnn->coarse_b    = 0;
198   pcnn->ksp_coarse = 0;
199   pcnn->DZ_IN       = 0;
200 
201   /*
202       Set the pointers for the functions that are provided above.
203       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
204       are called, they will automatically call these functions.  Note we
205       choose not to provide a couple of these functions since they are
206       not needed.
207   */
208   pc->ops->apply               = PCApply_NN;
209   pc->ops->applytranspose      = 0;
210   pc->ops->setup               = PCSetUp_NN;
211   pc->ops->destroy             = PCDestroy_NN;
212   pc->ops->view                = 0;
213   pc->ops->applyrichardson     = 0;
214   pc->ops->applysymmetricleft  = 0;
215   pc->ops->applysymmetricright = 0;
216   PetscFunctionReturn(0);
217 }
218 EXTERN_C_END
219 
220 
221 /* -------------------------------------------------------------------------- */
222 /*
223    PCNNCreateCoarseMatrix -
224 */
225 #undef __FUNCT__
226 #define __FUNCT__ "PCNNCreateCoarseMatrix"
227 PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
228 {
229   MPI_Request    *send_request, *recv_request;
230   PetscErrorCode ierr;
231   PetscInt       i, j, k;
232   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
233   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
234 
235   /* aliasing some names */
236   PC_IS*         pcis     = (PC_IS*)(pc->data);
237   PC_NN*         pcnn     = (PC_NN*)pc->data;
238   PetscInt       n_neigh  = pcis->n_neigh;
239   PetscInt*      neigh    = pcis->neigh;
240   PetscInt*      n_shared = pcis->n_shared;
241   PetscInt**     shared   = pcis->shared;
242   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */
243 
244   PetscFunctionBegin;
245   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
246   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
247 
248   /* Allocate memory for DZ */
249   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
250   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
251   {
252     PetscInt size_of_Z = 0;
253     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
254     DZ_IN = pcnn->DZ_IN;
255     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
256     for (i=0; i<n_neigh; i++) {
257       size_of_Z += n_shared[i];
258     }
259     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
260     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
261   }
262   for (i=1; i<n_neigh; i++) {
263     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
264     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
265   }
266 
267   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
268   /* First, set the auxiliary array pcis->work_N. */
269   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
270   for (i=1; i<n_neigh; i++){
271     for (j=0; j<n_shared[i]; j++) {
272       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
273     }
274   }
275 
276   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
277   /* Notice that send_request[] and recv_request[] could have one less element. */
278   /* We make them longer to have request[i] corresponding to neigh[i].          */
279   {
280     PetscMPIInt tag;
281     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
282     ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr);
283     recv_request = send_request + (n_neigh);
284     for (i=1; i<n_neigh; i++) {
285       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr);
286       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr);
287     }
288   }
289 
290   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
291   for(j=0; j<n_shared[0]; j++) {
292     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
293   }
294 
295   /* Start computing with local D*Z while communication goes on.    */
296   /* Apply Schur complement. The result is "stored" in vec (more    */
297   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
298   /* and also scattered to pcnn->work_N.                            */
299   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
300                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
301 
302   /* Compute the first column, while completing the receiving. */
303   for (i=0; i<n_neigh; i++) {
304     MPI_Status  stat;
305     PetscMPIInt ind=0;
306     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
307     mat[ind*n_neigh+0] = 0.0;
308     for (k=0; k<n_shared[ind]; k++) {
309       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
310     }
311   }
312 
313   /* Compute the remaining of the columns */
314   for (j=1; j<n_neigh; j++) {
315     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
316                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
317     for (i=0; i<n_neigh; i++) {
318       mat[i*n_neigh+j] = 0.0;
319       for (k=0; k<n_shared[i]; k++) {
320 	mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
321       }
322     }
323   }
324 
325   /* Complete the sending. */
326   if (n_neigh>1) {
327     MPI_Status *stat;
328     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
329     ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);
330     ierr = PetscFree(stat);CHKERRQ(ierr);
331   }
332 
333   /* Free the memory for the MPI requests */
334   ierr = PetscFree(send_request);CHKERRQ(ierr);
335 
336   /* Free the memory for DZ_OUT */
337   if (DZ_OUT) {
338     if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); }
339     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
340   }
341 
342   {
343     PetscMPIInt size;
344     ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
345     /* Create the global coarse vectors (rhs and solution). */
346     ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
347     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
348     /* Create and set the global coarse AIJ matrix. */
349     ierr = MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));CHKERRQ(ierr);
350     ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr);
351     ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr);
352     ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr);
353     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
354     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
355     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
356   }
357 
358   {
359     PetscMPIInt rank;
360     PetscScalar one = 1.0;
361     IS          is;
362     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
363     /* "Zero out" rows of not-purely-Neumann subdomains */
364     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
365       ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr);
366     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
367       ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr);
368     }
369     ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr);
370     ierr = ISDestroy(is);CHKERRQ(ierr);
371   }
372 
373   /* Create the coarse linear solver context */
374   {
375     PC  pc_ctx, inner_pc;
376     ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr);
377     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
378     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
379     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
380     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
381     ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr);
382     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
383     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
384     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
385     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
386     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
387   }
388 
389   /* Free the memory for mat */
390   ierr = PetscFree(mat);CHKERRQ(ierr);
391 
392   /* for DEBUGGING, save the coarse matrix to a file. */
393   {
394     PetscTruth flg;
395     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr);
396     if (flg) {
397       PetscViewer viewer;
398       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
399       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
400       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
401       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
402     }
403   }
404 
405   /*  Set the variable pcnn->factor_coarse_rhs. */
406   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
407 
408   /* See historical note 02, at the bottom of this file. */
409   PetscFunctionReturn(0);
410 }
411 
412 /* -------------------------------------------------------------------------- */
413 /*
414    PCNNApplySchurToChunk -
415 
416    Input parameters:
417 .  pcnn
418 .  n - size of chunk
419 .  idx - indices of chunk
420 .  chunk - values
421 
422    Output parameters:
423 .  array_N - result of Schur complement applied to chunk, scattered to big array
424 .  vec1_B  - result of Schur complement applied to chunk
425 .  vec2_B  - garbage (used as work space)
426 .  vec1_D  - garbage (used as work space)
427 .  vec2_D  - garbage (used as work space)
428 
429 */
430 #undef __FUNCT__
431 #define __FUNCT__ "PCNNApplySchurToChunk"
432 PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
433 {
434   PetscErrorCode ierr;
435   PetscInt       i;
436   PC_IS          *pcis = (PC_IS*)(pc->data);
437 
438   PetscFunctionBegin;
439   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
440   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
441   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
442   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
443   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 /* -------------------------------------------------------------------------- */
448 /*
449    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
450                                       the preconditioner for the Schur complement.
451 
452    Input parameter:
453 .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
454 
455    Output parameters:
456 .  z - global vector of interior and interface nodes. The values on the interface are the result of
457        the application of the interface preconditioner to the interface part of r. The values on the
458        interior nodes are garbage.
459 .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
460 .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
461 .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
462 .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
463 .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
464 .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
465 .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
466 .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
467 
468 */
469 #undef __FUNCT__
470 #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
471 PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
472                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
473 {
474   PetscErrorCode ierr;
475   PC_IS*         pcis = (PC_IS*)(pc->data);
476 
477   PetscFunctionBegin;
478   /*
479     First balancing step.
480   */
481   {
482     PetscTruth flg;
483     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr);
484     if (!flg) {
485       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
486     } else {
487       ierr = VecCopy(r,z);CHKERRQ(ierr);
488     }
489   }
490 
491   /*
492     Extract the local interface part of z and scale it by D
493   */
494   ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
495   ierr = VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
496   ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr);
497 
498   /* Neumann Solver */
499   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
500 
501   /*
502     Second balancing step.
503   */
504   {
505     PetscTruth flg;
506     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr);
507     if (!flg) {
508       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
509     } else {
510       PetscScalar zero = 0.0;
511       ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr);
512       ierr = VecSet(&zero,z);CHKERRQ(ierr);
513       ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
514       ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
515     }
516   }
517   PetscFunctionReturn(0);
518 }
519 
520 /* -------------------------------------------------------------------------- */
521 /*
522    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
523                    input argument u is provided), or s, as given in equations
524                    (12) and (13), if the input argument u is a null vector.
525                    Notice that the input argument u plays the role of u_i in
526                    equation (14). The equation numbers refer to [Man93].
527 
528    Input Parameters:
529 .  pcnn - NN preconditioner context.
530 .  r - MPI vector of all nodes (interior and interface). It's preserved.
531 .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
532 
533    Output Parameters:
534 .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
535 .  vec1_B - Sequential vector of local interface nodes. Workspace.
536 .  vec2_B - Sequential vector of local interface nodes. Workspace.
537 .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
538 .  vec1_D - Sequential vector of local interior nodes. Workspace.
539 .  vec2_D - Sequential vector of local interior nodes. Workspace.
540 .  work_N - Array of all local nodes (interior and interface). Workspace.
541 
542 */
543 #undef __FUNCT__
544 #define __FUNCT__ "PCNNBalancing"
545 PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
546                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
547 {
548   PetscErrorCode ierr;
549   PetscInt       k;
550   PetscScalar    zero     =  0.0;
551   PetscScalar    m_one    = -1.0;
552   PetscScalar    value;
553   PetscScalar*   lambda;
554   PC_NN*         pcnn     = (PC_NN*)(pc->data);
555   PC_IS*         pcis     = (PC_IS*)(pc->data);
556 
557   PetscFunctionBegin;
558   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
559   if (u) {
560     if (!vec3_B) { vec3_B = u; }
561     ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr);
562     ierr = VecSet(&zero,z);CHKERRQ(ierr);
563     ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
564     ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
565     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
566     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
567     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
568     ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr);
569     ierr = VecCopy(r,z);CHKERRQ(ierr);
570     ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
571     ierr = VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
572   } else {
573     ierr = VecCopy(r,z);CHKERRQ(ierr);
574   }
575   ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
576   ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
577   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
578   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
579   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
580   {
581     PetscMPIInt rank;
582     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
583     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
584     /*
585        Since we are only inserting local values (one value actually) we don't need to do the
586        reduction that tells us there is no data that needs to be moved. Hence we comment out these
587        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
588        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
589     */
590   }
591   ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr);
592   if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); }
593   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
594   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
595   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
596   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
597   ierr = VecSet(&zero,z);CHKERRQ(ierr);
598   ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
599   ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
600   if (!u) {
601     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
602     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
603     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
604     ierr = VecCopy(r,z);CHKERRQ(ierr);
605   }
606   ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
607   ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
608   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 
614 
615 
616 /*  -------   E N D   O F   T H E   C O D E   -------  */
617 /*                                                     */
618 /*  From now on, "footnotes" (or "historical notes").  */
619 /*                                                     */
620 /*  -------------------------------------------------  */
621 
622 
623 
624 /* --------------------------------------------------------------------------
625    Historical note 01
626    -------------------------------------------------------------------------- */
627 /*
628    We considered the possibility of an alternative D_i that would still
629    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
630    The basic principle was still the pseudo-inverse of the counting
631    function; the difference was that we would not count subdomains
632    that do not contribute to the coarse space (i.e., not pure-Neumann
633    subdomains).
634 
635    This turned out to be a bad idea:  we would solve trivial Neumann
636    problems in the not pure-Neumann subdomains, since we would be scaling
637    the balanced residual by zero.
638 */
639 
640 
641 
642 
643 /* --------------------------------------------------------------------------
644    Historical note 02
645    -------------------------------------------------------------------------- */
646 /*
647    We tried an alternative coarse problem, that would eliminate exactly a
648    constant error. Turned out not to improve the overall convergence.
649 */
650 
651 
652