xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision de3625564d23a58eb9a13fca7a33afdebe168b34)
1 /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/
2 
3 #include "src/ksp/pc/impls/is/nn/nn.h"
4 
5 /* -------------------------------------------------------------------------- */
6 /*
7    PCSetUp_NN - Prepares for the use of the NN preconditioner
8                     by setting data structures and options.
9 
10    Input Parameter:
11 .  pc - the preconditioner context
12 
13    Application Interface Routine: PCSetUp()
14 
15    Notes:
16    The interface routine PCSetUp() is not usually called directly by
17    the user, but instead is called by PCApply() if necessary.
18 */
19 #undef __FUNCT__
20 #define __FUNCT__ "PCSetUp_NN"
21 static int PCSetUp_NN(PC pc)
22 {
23   int ierr;
24 
25   PetscFunctionBegin;
26   if (!pc->setupcalled) {
27     /* Set up all the "iterative substructuring" common block */
28     ierr = PCISSetUp(pc);CHKERRQ(ierr);
29     /* Create the coarse matrix. */
30     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* -------------------------------------------------------------------------- */
36 /*
37    PCApply_NN - Applies the NN preconditioner to a vector.
38 
39    Input Parameters:
40 .  pc - the preconditioner context
41 .  r - input vector (global)
42 
43    Output Parameter:
44 .  z - output vector (global)
45 
46    Application Interface Routine: PCApply()
47  */
48 #undef __FUNCT__
49 #define __FUNCT__ "PCApply_NN"
50 static int PCApply_NN(PC pc,Vec r,Vec z)
51 {
52   PC_IS       *pcis = (PC_IS*)(pc->data);
53   int         ierr;
54   PetscScalar m_one = -1.0;
55   Vec         w = pcis->vec1_global;
56 
57   PetscFunctionBegin;
58 
59   /*
60     Dirichlet solvers.
61     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
62     Storing the local results at vec2_D
63   */
64   ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
65   ierr = VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
66   ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr);
67   ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr);
68   ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr);
69 
70   /*
71     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
72     Storing the result in the interface portion of the global vector w.
73   */
74   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
75   ierr = VecScale(&m_one,pcis->vec1_B);CHKERRQ(ierr);
76   ierr = VecCopy(r,w);CHKERRQ(ierr);
77   ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
78   ierr = VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
79 
80   /*
81     Apply the interface preconditioner
82   */
83   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
84                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
85 
86   /*
87     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
88     The result is stored in vec1_D.
89   */
90   ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
91   ierr = VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
92   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
93 
94   /*
95     Dirichlet solvers.
96     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
97     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
98   */
99   ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
100   ierr = VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
101   ierr = KSPSetRhs(pcis->ksp_D,pcis->vec1_D);CHKERRQ(ierr);
102   ierr = KSPSetSolution(pcis->ksp_D,pcis->vec2_D);CHKERRQ(ierr);
103   ierr = KSPSolve(pcis->ksp_D);CHKERRQ(ierr);
104   ierr = VecScale(&m_one,pcis->vec2_D);CHKERRQ(ierr);
105   ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
106   ierr = VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
107 
108   PetscFunctionReturn(0);
109 }
110 
111 /* -------------------------------------------------------------------------- */
112 /*
113    PCDestroy_NN - Destroys the private context for the NN preconditioner
114    that was created with PCCreate_NN().
115 
116    Input Parameter:
117 .  pc - the preconditioner context
118 
119    Application Interface Routine: PCDestroy()
120 */
121 #undef __FUNCT__
122 #define __FUNCT__ "PCDestroy_NN"
123 static int PCDestroy_NN(PC pc)
124 {
125   PC_NN *pcnn = (PC_NN*)pc->data;
126   int   ierr;
127 
128   PetscFunctionBegin;
129 
130   ierr = PCISDestroy(pc);CHKERRQ(ierr);
131 
132   if (pcnn->coarse_mat)  {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);}
133   if (pcnn->coarse_x)    {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);}
134   if (pcnn->coarse_b)    {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);}
135   if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);}
136   if (pcnn->DZ_IN) {
137     if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);}
138     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
139   }
140 
141   /*
142       Free the private data structure that was hanging off the PC
143   */
144   ierr = PetscFree(pcnn);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 /* -------------------------------------------------------------------------- */
149 /*MC
150    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
151 
152    Options Database Keys:
153 +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
154                                        (this skips the first coarse grid solve in the preconditioner)
155 .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
156                                        (this skips the second coarse grid solve in the preconditioner)
157 .    -pc_is_damp_fixed <fact> -
158 .    -pc_is_remove_nullspace_fixed -
159 .    -pc_is_set_damping_factor_floating <fact> -
160 .    -pc_is_not_damp_floating -
161 +    -pc_is_not_remove_nullspace_floating -
162 
163    Level: intermediates
164 
165    Notes: The matrix used with this preconditioner must be of type MATIS
166 
167           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
168           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
169           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
170 
171    Contributed by Paulo Goldfeld
172 
173 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
174 M*/
175 EXTERN_C_BEGIN
176 #undef __FUNCT__
177 #define __FUNCT__ "PCCreate_NN"
178 int PCCreate_NN(PC pc)
179 {
180   int   ierr;
181   PC_NN *pcnn;
182 
183   PetscFunctionBegin;
184 
185   /*
186      Creates the private data structure for this preconditioner and
187      attach it to the PC object.
188   */
189   ierr      = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr);
190   pc->data  = (void*)pcnn;
191 
192   /*
193      Logs the memory usage; this is not needed but allows PETSc to
194      monitor how much memory is being used for various purposes.
195   */
196   PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */
197 
198   ierr = PCISCreate(pc);CHKERRQ(ierr);
199   pcnn->coarse_mat  = 0;
200   pcnn->coarse_x    = 0;
201   pcnn->coarse_b    = 0;
202   pcnn->ksp_coarse = 0;
203   pcnn->DZ_IN       = 0;
204 
205   /*
206       Set the pointers for the functions that are provided above.
207       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
208       are called, they will automatically call these functions.  Note we
209       choose not to provide a couple of these functions since they are
210       not needed.
211   */
212   pc->ops->apply               = PCApply_NN;
213   pc->ops->applytranspose      = 0;
214   pc->ops->setup               = PCSetUp_NN;
215   pc->ops->destroy             = PCDestroy_NN;
216   pc->ops->view                = 0;
217   pc->ops->applyrichardson     = 0;
218   pc->ops->applysymmetricleft  = 0;
219   pc->ops->applysymmetricright = 0;
220 
221   PetscFunctionReturn(0);
222 }
223 EXTERN_C_END
224 
225 
226 /* -------------------------------------------------------------------------- */
227 /*
228    PCNNCreateCoarseMatrix -
229 */
230 #undef __FUNCT__
231 #define __FUNCT__ "PCNNCreateCoarseMatrix"
232 int PCNNCreateCoarseMatrix (PC pc)
233 {
234   MPI_Request    *send_request, *recv_request;
235   int            i, j, k, ierr;
236 
237   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
238   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
239 
240   /* aliasing some names */
241   PC_IS*         pcis     = (PC_IS*)(pc->data);
242   PC_NN*         pcnn     = (PC_NN*)pc->data;
243   int            n_neigh  = pcis->n_neigh;
244   int*           neigh    = pcis->neigh;
245   int*           n_shared = pcis->n_shared;
246   int**          shared   = pcis->shared;
247   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */
248 
249   PetscFunctionBegin;
250 
251   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
252   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
253 
254   /* Allocate memory for DZ */
255   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
256   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
257   {
258     int size_of_Z = 0;
259     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
260     DZ_IN = pcnn->DZ_IN;
261     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
262     for (i=0; i<n_neigh; i++) {
263       size_of_Z += n_shared[i];
264     }
265     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
266     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
267   }
268   for (i=1; i<n_neigh; i++) {
269     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
270     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
271   }
272 
273   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
274   /* First, set the auxiliary array pcis->work_N. */
275   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
276   for (i=1; i<n_neigh; i++){
277     for (j=0; j<n_shared[i]; j++) {
278       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
279     }
280   }
281 
282   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
283   /* Notice that send_request[] and recv_request[] could have one less element. */
284   /* We make them longer to have request[i] corresponding to neigh[i].          */
285   {
286     int tag;
287     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
288     ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr);
289     recv_request = send_request + (n_neigh);
290     for (i=1; i<n_neigh; i++) {
291       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr);
292       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr);
293     }
294   }
295 
296   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
297   for(j=0; j<n_shared[0]; j++) {
298     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
299   }
300 
301   /* Start computing with local D*Z while communication goes on.    */
302   /* Apply Schur complement. The result is "stored" in vec (more    */
303   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
304   /* and also scattered to pcnn->work_N.                            */
305   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
306                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
307 
308   /* Compute the first column, while completing the receiving. */
309   for (i=0; i<n_neigh; i++) {
310     MPI_Status stat;
311     int ind=0;
312     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
313     mat[ind*n_neigh+0] = 0.0;
314     for (k=0; k<n_shared[ind]; k++) {
315       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
316     }
317   }
318 
319   /* Compute the remaining of the columns */
320   for (j=1; j<n_neigh; j++) {
321     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
322                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
323     for (i=0; i<n_neigh; i++) {
324       mat[i*n_neigh+j] = 0.0;
325       for (k=0; k<n_shared[i]; k++) {
326 	mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
327       }
328     }
329   }
330 
331   /* Complete the sending. */
332   if (n_neigh>1) {
333     MPI_Status *stat;
334     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
335     ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);
336     ierr = PetscFree(stat);CHKERRQ(ierr);
337   }
338 
339   /* Free the memory for the MPI requests */
340   ierr = PetscFree(send_request);CHKERRQ(ierr);
341 
342   /* Free the memory for DZ_OUT */
343   if (DZ_OUT) {
344     if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); }
345     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
346   }
347 
348   {
349     int size,n_neigh_m1;
350     ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
351     n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
352     /* Create the global coarse vectors (rhs and solution). */
353     ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
354     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
355     /* Create and set the global coarse AIJ matrix. */
356     ierr = MatCreate(pc->comm,1,1,size,size,&(pcnn->coarse_mat));CHKERRQ(ierr);
357     ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr);
358     ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr);
359     ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr);
360     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
361     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
362     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
363   }
364 
365   {
366     int         rank;
367     PetscScalar one = 1.0;
368     IS          is;
369     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
370     /* "Zero out" rows of not-purely-Neumann subdomains */
371     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
372       ierr = ISCreateStride(pc->comm,0,0,0,&is);CHKERRQ(ierr);
373     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
374       ierr = ISCreateStride(pc->comm,1,rank,0,&is);CHKERRQ(ierr);
375     }
376     ierr = MatZeroRows(pcnn->coarse_mat,is,&one);CHKERRQ(ierr);
377     ierr = ISDestroy(is);CHKERRQ(ierr);
378   }
379 
380   /* Create the coarse linear solver context */
381   {
382     PC  pc_ctx, inner_pc;
383     ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr);
384     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
385     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
386     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
387     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
388     ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr);
389     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
390     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
391     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
392     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
393     ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr);
394     ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr);
395     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
396   }
397 
398   /* Free the memory for mat */
399   ierr = PetscFree(mat);CHKERRQ(ierr);
400 
401   /* for DEBUGGING, save the coarse matrix to a file. */
402   {
403     PetscTruth flg;
404     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr);
405     if (flg) {
406       PetscViewer viewer;
407       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
408       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
409       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
410       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
411     }
412   }
413 
414   /*  Set the variable pcnn->factor_coarse_rhs. */
415   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
416 
417   /* See historical note 02, at the bottom of this file. */
418 
419   PetscFunctionReturn(0);
420 }
421 
422 /* -------------------------------------------------------------------------- */
423 /*
424    PCNNApplySchurToChunk -
425 
426    Input parameters:
427 .  pcnn
428 .  n - size of chunk
429 .  idx - indices of chunk
430 .  chunk - values
431 
432    Output parameters:
433 .  array_N - result of Schur complement applied to chunk, scattered to big array
434 .  vec1_B  - result of Schur complement applied to chunk
435 .  vec2_B  - garbage (used as work space)
436 .  vec1_D  - garbage (used as work space)
437 .  vec2_D  - garbage (used as work space)
438 
439 */
440 #undef __FUNCT__
441 #define __FUNCT__ "PCNNApplySchurToChunk"
442 int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
443 {
444   int   i, ierr;
445   PC_IS *pcis = (PC_IS*)(pc->data);
446 
447   PetscFunctionBegin;
448 
449   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
450   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
451   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
452   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
453   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
454 
455   PetscFunctionReturn(0);
456 }
457 
458 /* -------------------------------------------------------------------------- */
459 /*
460    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
461                                       the preconditioner for the Schur complement.
462 
463    Input parameter:
464 .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
465 
466    Output parameters:
467 .  z - global vector of interior and interface nodes. The values on the interface are the result of
468        the application of the interface preconditioner to the interface part of r. The values on the
469        interior nodes are garbage.
470 .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
471 .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
472 .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
473 .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
474 .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
475 .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
476 .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
477 .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
478 
479 */
480 #undef __FUNCT__
481 #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
482 int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
483                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
484 {
485   int    ierr;
486   PC_IS* pcis = (PC_IS*)(pc->data);
487 
488   PetscFunctionBegin;
489 
490   /*
491     First balancing step.
492   */
493   {
494     PetscTruth flg;
495     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr);
496     if (!flg) {
497       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
498     } else {
499       ierr = VecCopy(r,z);CHKERRQ(ierr);
500     }
501   }
502 
503   /*
504     Extract the local interface part of z and scale it by D
505   */
506   ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
507   ierr = VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
508   ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr);
509 
510   /* Neumann Solver */
511   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
512 
513   /*
514     Second balancing step.
515   */
516   {
517     PetscTruth flg;
518     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr);
519     if (!flg) {
520       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
521     } else {
522       PetscScalar zero = 0.0;
523       ierr = VecPointwiseMult(pcis->D,vec1_B,vec2_B);CHKERRQ(ierr);
524       ierr = VecSet(&zero,z);CHKERRQ(ierr);
525       ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
526       ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
527     }
528   }
529 
530   PetscFunctionReturn(0);
531 }
532 
533 /* -------------------------------------------------------------------------- */
534 /*
535    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
536                    input argument u is provided), or s, as given in equations
537                    (12) and (13), if the input argument u is a null vector.
538                    Notice that the input argument u plays the role of u_i in
539                    equation (14). The equation numbers refer to [Man93].
540 
541    Input Parameters:
542 .  pcnn - NN preconditioner context.
543 .  r - MPI vector of all nodes (interior and interface). It's preserved.
544 .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
545 
546    Output Parameters:
547 .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
548 .  vec1_B - Sequential vector of local interface nodes. Workspace.
549 .  vec2_B - Sequential vector of local interface nodes. Workspace.
550 .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
551 .  vec1_D - Sequential vector of local interior nodes. Workspace.
552 .  vec2_D - Sequential vector of local interior nodes. Workspace.
553 .  work_N - Array of all local nodes (interior and interface). Workspace.
554 
555 */
556 #undef __FUNCT__
557 #define __FUNCT__ "PCNNBalancing"
558 int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
559                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
560 {
561   int            k, ierr;
562   PetscScalar    zero     =  0.0;
563   PetscScalar    m_one    = -1.0;
564   PetscScalar    value;
565   PetscScalar*   lambda;
566   PC_NN*         pcnn     = (PC_NN*)(pc->data);
567   PC_IS*         pcis     = (PC_IS*)(pc->data);
568 
569   PetscFunctionBegin;
570   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
571 
572   if (u) {
573     if (!vec3_B) { vec3_B = u; }
574     ierr = VecPointwiseMult(pcis->D,u,vec1_B);CHKERRQ(ierr);
575     ierr = VecSet(&zero,z);CHKERRQ(ierr);
576     ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
577     ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
578     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
579     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
580     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
581     ierr = VecScale(&m_one,vec3_B);CHKERRQ(ierr);
582     ierr = VecCopy(r,z);CHKERRQ(ierr);
583     ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
584     ierr = VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
585   } else {
586     ierr = VecCopy(r,z);CHKERRQ(ierr);
587   }
588   ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
589   ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
590   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
591   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
592   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
593   {
594     int rank;
595     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
596     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
597     /*
598        Since we are only inserting local values (one value actually) we don't need to do the
599        reduction that tells us there is no data that needs to be moved. Hence we comment out these
600        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
601        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
602     */
603   }
604   ierr = KSPSetRhs(pcnn->ksp_coarse,pcnn->coarse_b);CHKERRQ(ierr);
605   ierr = KSPSetSolution(pcnn->ksp_coarse,pcnn->coarse_x);CHKERRQ(ierr);
606   ierr = KSPSolve(pcnn->ksp_coarse);CHKERRQ(ierr);
607   if (!u) { ierr = VecScale(&m_one,pcnn->coarse_x);CHKERRQ(ierr); }
608   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
609   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
610   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
611   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
612   ierr = VecSet(&zero,z);CHKERRQ(ierr);
613   ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
614   ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
615   if (!u) {
616     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
617     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
618     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
619     ierr = VecCopy(r,z);CHKERRQ(ierr);
620   }
621   ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
622   ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
623   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
624 
625   PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNCT__
629 
630 
631 
632 /*  -------   E N D   O F   T H E   C O D E   -------  */
633 /*                                                     */
634 /*  From now on, "footnotes" (or "historical notes").  */
635 /*                                                     */
636 /*  -------------------------------------------------  */
637 
638 
639 #ifdef __HISTORICAL_NOTES___do_not_compile__
640 
641 /* --------------------------------------------------------------------------
642    Historical note 01
643    -------------------------------------------------------------------------- */
644 /*
645    We considered the possibility of an alternative D_i that would still
646    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
647    The basic principle was still the pseudo-inverse of the counting
648    function; the difference was that we would not count subdomains
649    that do not contribute to the coarse space (i.e., not pure-Neumann
650    subdomains).
651 
652    This turned out to be a bad idea:  we would solve trivial Neumann
653    problems in the not pure-Neumann subdomains, since we would be scaling
654    the balanced residual by zero.
655 */
656 
657     {
658       PetscTruth flg;
659       ierr = PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);CHKERRQ(ierr);
660       if (flg) {
661         Vec    counter;
662         PetscScalar one=1.0, zero=0.0;
663         ierr = VecDuplicate(pc->vec,&counter);CHKERRQ(ierr);
664         ierr = VecSet(&zero,counter);CHKERRQ(ierr);
665         if (pcnn->pure_neumann) {
666           ierr = VecSet(&one,pcnn->D);CHKERRQ(ierr);
667         } else {
668           ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr);
669         }
670         ierr = VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
671         ierr = VecScatterEnd  (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
672         ierr = VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
673         ierr = VecScatterEnd  (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
674         ierr = VecDestroy(counter);CHKERRQ(ierr);
675         if (pcnn->pure_neumann) {
676           ierr = VecReciprocal(pcnn->D);CHKERRQ(ierr);
677         } else {
678           ierr = VecSet(&zero,pcnn->D);CHKERRQ(ierr);
679         }
680       }
681     }
682 
683 
684 
685 /* --------------------------------------------------------------------------
686    Historical note 02
687    -------------------------------------------------------------------------- */
688 /*
689    We tried an alternative coarse problem, that would eliminate exactly a
690    constant error. Turned out not to improve the overall convergence.
691 */
692 
693   /*  Set the variable pcnn->factor_coarse_rhs. */
694   {
695     PetscTruth flg;
696     ierr = PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);CHKERRQ(ierr);
697     if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; }
698     else {
699       PetscScalar zero = 0.0, one = 1.0;
700       ierr = VecSet(&one,pcnn->vec1_B);
701       ierr = ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);CHKERRQ(ierr);
702       ierr = VecSet(&zero,pcnn->vec1_global);CHKERRQ(ierr);
703       ierr = VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
704       ierr = VecScatterEnd  (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);CHKERRQ(ierr);
705       ierr = VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
706       ierr = VecScatterEnd  (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);CHKERRQ(ierr);
707       if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; }
708       else {
709         ierr = ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);CHKERRQ(ierr);
710         for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) {
711           pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k];
712         }
713         if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; }
714         else { SETERRQ(1,"Constants cannot be preserved. Remove \"-enforce_preserving_constants\" option."); }
715       }
716     }
717   }
718 
719 #endif /* __HISTORICAL_NOTES___do_not_compile */
720