xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision f4f49eeac7efa77fffa46b7ff95a3ed169f659ed)
1c6db04a5SJed Brown #include <../src/ksp/pc/impls/is/nn/nn.h>
24b9ad928SBarry Smith 
34b9ad928SBarry Smith /*
44b9ad928SBarry Smith    PCSetUp_NN - Prepares for the use of the NN preconditioner
54b9ad928SBarry Smith                     by setting data structures and options.
64b9ad928SBarry Smith 
74b9ad928SBarry Smith    Input Parameter:
84b9ad928SBarry Smith .  pc - the preconditioner context
94b9ad928SBarry Smith 
104b9ad928SBarry Smith    Application Interface Routine: PCSetUp()
114b9ad928SBarry Smith 
12f1580f4eSBarry Smith    Note:
134b9ad928SBarry Smith    The interface routine PCSetUp() is not usually called directly by
144b9ad928SBarry Smith    the user, but instead is called by PCApply() if necessary.
154b9ad928SBarry Smith */
16d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCSetUp_NN(PC pc)
17d71ae5a4SJacob Faibussowitsch {
184b9ad928SBarry Smith   PetscFunctionBegin;
194b9ad928SBarry Smith   if (!pc->setupcalled) {
204b9ad928SBarry Smith     /* Set up all the "iterative substructuring" common block */
219566063dSJacob Faibussowitsch     PetscCall(PCISSetUp(pc, PETSC_TRUE, PETSC_TRUE));
224b9ad928SBarry Smith     /* Create the coarse matrix. */
239566063dSJacob Faibussowitsch     PetscCall(PCNNCreateCoarseMatrix(pc));
244b9ad928SBarry Smith   }
253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
264b9ad928SBarry Smith }
274b9ad928SBarry Smith 
284b9ad928SBarry Smith /*
294b9ad928SBarry Smith    PCApply_NN - Applies the NN preconditioner to a vector.
304b9ad928SBarry Smith 
314b9ad928SBarry Smith    Input Parameters:
322fe279fdSBarry Smith +  pc - the preconditioner context
332fe279fdSBarry Smith -  r - input vector (global)
344b9ad928SBarry Smith 
354b9ad928SBarry Smith    Output Parameter:
364b9ad928SBarry Smith .  z - output vector (global)
374b9ad928SBarry Smith 
384b9ad928SBarry Smith    Application Interface Routine: PCApply()
394b9ad928SBarry Smith  */
40d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCApply_NN(PC pc, Vec r, Vec z)
41d71ae5a4SJacob Faibussowitsch {
42*f4f49eeaSPierre Jolivet   PC_IS      *pcis  = (PC_IS *)pc->data;
434b9ad928SBarry Smith   PetscScalar m_one = -1.0;
444b9ad928SBarry Smith   Vec         w     = pcis->vec1_global;
454b9ad928SBarry Smith 
464b9ad928SBarry Smith   PetscFunctionBegin;
474b9ad928SBarry Smith   /*
484b9ad928SBarry Smith     Dirichlet solvers.
494b9ad928SBarry Smith     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
504b9ad928SBarry Smith     Storing the local results at vec2_D
514b9ad928SBarry Smith   */
529566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
539566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
549566063dSJacob Faibussowitsch   PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));
554b9ad928SBarry Smith 
564b9ad928SBarry Smith   /*
574b9ad928SBarry Smith     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
584b9ad928SBarry Smith     Storing the result in the interface portion of the global vector w.
594b9ad928SBarry Smith   */
609566063dSJacob Faibussowitsch   PetscCall(MatMult(pcis->A_BI, pcis->vec2_D, pcis->vec1_B));
619566063dSJacob Faibussowitsch   PetscCall(VecScale(pcis->vec1_B, m_one));
629566063dSJacob Faibussowitsch   PetscCall(VecCopy(r, w));
639566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));
649566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));
654b9ad928SBarry Smith 
664b9ad928SBarry Smith   /*
674b9ad928SBarry Smith     Apply the interface preconditioner
684b9ad928SBarry Smith   */
69d0609cedSBarry Smith   PetscCall(PCNNApplyInterfacePreconditioner(pc, w, z, pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec3_B, pcis->vec1_D, pcis->vec3_D, pcis->vec1_N, pcis->vec2_N));
704b9ad928SBarry Smith 
714b9ad928SBarry Smith   /*
724b9ad928SBarry Smith     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
734b9ad928SBarry Smith     The result is stored in vec1_D.
744b9ad928SBarry Smith   */
759566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
769566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
779566063dSJacob Faibussowitsch   PetscCall(MatMult(pcis->A_IB, pcis->vec1_B, pcis->vec1_D));
784b9ad928SBarry Smith 
794b9ad928SBarry Smith   /*
804b9ad928SBarry Smith     Dirichlet solvers.
814b9ad928SBarry Smith     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
824b9ad928SBarry Smith     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
834b9ad928SBarry Smith   */
849566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
859566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
869566063dSJacob Faibussowitsch   PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));
879566063dSJacob Faibussowitsch   PetscCall(VecScale(pcis->vec2_D, m_one));
889566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
899566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
914b9ad928SBarry Smith }
924b9ad928SBarry Smith 
934b9ad928SBarry Smith /*
944b9ad928SBarry Smith    PCDestroy_NN - Destroys the private context for the NN preconditioner
954b9ad928SBarry Smith    that was created with PCCreate_NN().
964b9ad928SBarry Smith 
974b9ad928SBarry Smith    Input Parameter:
984b9ad928SBarry Smith .  pc - the preconditioner context
994b9ad928SBarry Smith 
1004b9ad928SBarry Smith    Application Interface Routine: PCDestroy()
1014b9ad928SBarry Smith */
102d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCDestroy_NN(PC pc)
103d71ae5a4SJacob Faibussowitsch {
1044b9ad928SBarry Smith   PC_NN *pcnn = (PC_NN *)pc->data;
1054b9ad928SBarry Smith 
1064b9ad928SBarry Smith   PetscFunctionBegin;
10704c3f3b8SBarry Smith   PetscCall(PCISReset(pc));
1084b9ad928SBarry Smith 
1099566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&pcnn->coarse_mat));
1109566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&pcnn->coarse_x));
1119566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&pcnn->coarse_b));
1129566063dSJacob Faibussowitsch   PetscCall(KSPDestroy(&pcnn->ksp_coarse));
1134b9ad928SBarry Smith   if (pcnn->DZ_IN) {
1149566063dSJacob Faibussowitsch     PetscCall(PetscFree(pcnn->DZ_IN[0]));
1159566063dSJacob Faibussowitsch     PetscCall(PetscFree(pcnn->DZ_IN));
1164b9ad928SBarry Smith   }
1174b9ad928SBarry Smith 
1184b9ad928SBarry Smith   /*
1194b9ad928SBarry Smith       Free the private data structure that was hanging off the PC
1204b9ad928SBarry Smith   */
1219566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
1223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1234b9ad928SBarry Smith }
1244b9ad928SBarry Smith 
125dfc58137SBarry Smith /*MC
12637a17b4dSBarry Smith    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
1274b9ad928SBarry Smith 
12837a17b4dSBarry Smith    Options Database Keys:
12937a17b4dSBarry Smith +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
13037a17b4dSBarry Smith                                        (this skips the first coarse grid solve in the preconditioner)
1315c9740d6SBarry Smith .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
13237a17b4dSBarry Smith                                        (this skips the second coarse grid solve in the preconditioner)
1335c9740d6SBarry Smith .    -pc_is_damp_fixed <fact> -
1345c9740d6SBarry Smith .    -pc_is_remove_nullspace_fixed -
1355c9740d6SBarry Smith .    -pc_is_set_damping_factor_floating <fact> -
1365c9740d6SBarry Smith .    -pc_is_not_damp_floating -
137a2b725a8SWilliam Gropp -    -pc_is_not_remove_nullspace_floating -
1384b9ad928SBarry Smith 
139f1580f4eSBarry Smith    Options Database prefixes for the subsolvers this preconditioner uses:
140f1580f4eSBarry Smith +  -nn_coarse_pc_ - for the coarse grid preconditioner
141f1580f4eSBarry Smith .  -is_localD_pc_ - for the Dirichlet subproblem preconditioner
142f1580f4eSBarry Smith -  -is_localN_pc_ - for the Neumann subproblem preconditioner
143f1580f4eSBarry Smith 
1448eb2139fSSatish Balay    Level: intermediate
14537a17b4dSBarry Smith 
14695452b02SPatrick Sanan    Notes:
147f1580f4eSBarry Smith    The matrix used with this preconditioner must be of type `MATIS`
14837a17b4dSBarry Smith 
1491b99c236SBarry Smith    Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
1501b99c236SBarry Smith    degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
1511b99c236SBarry Smith    on the subdomains; though in our experience using approximate solvers is slower.).
1521b99c236SBarry Smith 
15337a17b4dSBarry Smith    Contributed by Paulo Goldfeld
15437a17b4dSBarry Smith 
155562efe2eSBarry Smith .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `MATIS`, `PCBDDC`
15637a17b4dSBarry Smith M*/
157b2573a8aSBarry Smith 
158d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
159d71ae5a4SJacob Faibussowitsch {
1604b9ad928SBarry Smith   PC_NN *pcnn;
1614b9ad928SBarry Smith 
1624b9ad928SBarry Smith   PetscFunctionBegin;
1634b9ad928SBarry Smith   /*
1644b9ad928SBarry Smith      Creates the private data structure for this preconditioner and
1654b9ad928SBarry Smith      attach it to the PC object.
1664b9ad928SBarry Smith   */
1674dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&pcnn));
1684b9ad928SBarry Smith   pc->data = (void *)pcnn;
1694b9ad928SBarry Smith 
17004c3f3b8SBarry Smith   PetscCall(PCISInitialize(pc));
1710a545947SLisandro Dalcin   pcnn->coarse_mat = NULL;
1720a545947SLisandro Dalcin   pcnn->coarse_x   = NULL;
1730a545947SLisandro Dalcin   pcnn->coarse_b   = NULL;
1740a545947SLisandro Dalcin   pcnn->ksp_coarse = NULL;
1750a545947SLisandro Dalcin   pcnn->DZ_IN      = NULL;
1764b9ad928SBarry Smith 
1774b9ad928SBarry Smith   /*
1784b9ad928SBarry Smith       Set the pointers for the functions that are provided above.
1794b9ad928SBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
1804b9ad928SBarry Smith       are called, they will automatically call these functions.  Note we
1814b9ad928SBarry Smith       choose not to provide a couple of these functions since they are
1824b9ad928SBarry Smith       not needed.
1834b9ad928SBarry Smith   */
1844b9ad928SBarry Smith   pc->ops->apply               = PCApply_NN;
1850a545947SLisandro Dalcin   pc->ops->applytranspose      = NULL;
1864b9ad928SBarry Smith   pc->ops->setup               = PCSetUp_NN;
1874b9ad928SBarry Smith   pc->ops->destroy             = PCDestroy_NN;
1880a545947SLisandro Dalcin   pc->ops->view                = NULL;
1890a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
1900a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
1910a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
1923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1934b9ad928SBarry Smith }
1944b9ad928SBarry Smith 
1954b9ad928SBarry Smith /*
1964b9ad928SBarry Smith    PCNNCreateCoarseMatrix -
1974b9ad928SBarry Smith */
198d71ae5a4SJacob Faibussowitsch PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
199d71ae5a4SJacob Faibussowitsch {
2004b9ad928SBarry Smith   MPI_Request  *send_request, *recv_request;
20113f74950SBarry Smith   PetscInt      i, j, k;
2024b9ad928SBarry Smith   PetscScalar  *mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
2034b9ad928SBarry Smith   PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
2044b9ad928SBarry Smith 
2054b9ad928SBarry Smith   /* aliasing some names */
206*f4f49eeaSPierre Jolivet   PC_IS        *pcis     = (PC_IS *)pc->data;
2074b9ad928SBarry Smith   PC_NN        *pcnn     = (PC_NN *)pc->data;
20813f74950SBarry Smith   PetscInt      n_neigh  = pcis->n_neigh;
20913f74950SBarry Smith   PetscInt     *neigh    = pcis->neigh;
21013f74950SBarry Smith   PetscInt     *n_shared = pcis->n_shared;
21113f74950SBarry Smith   PetscInt    **shared   = pcis->shared;
2124b9ad928SBarry Smith   PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */
2134b9ad928SBarry Smith 
2144b9ad928SBarry Smith   PetscFunctionBegin;
2154b9ad928SBarry Smith   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
2169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n_neigh * n_neigh + 1, &mat));
2174b9ad928SBarry Smith 
2184b9ad928SBarry Smith   /* Allocate memory for DZ */
2194b9ad928SBarry Smith   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
2204b9ad928SBarry Smith   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
2214b9ad928SBarry Smith   {
22213f74950SBarry Smith     PetscInt size_of_Z = 0;
2239566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &pcnn->DZ_IN));
2244b9ad928SBarry Smith     DZ_IN = pcnn->DZ_IN;
2259566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &DZ_OUT));
2262fa5cd67SKarl Rupp     for (i = 0; i < n_neigh; i++) size_of_Z += n_shared[i];
2279566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_IN[0]));
2289566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_OUT[0]));
2294b9ad928SBarry Smith   }
2304b9ad928SBarry Smith   for (i = 1; i < n_neigh; i++) {
2314b9ad928SBarry Smith     DZ_IN[i]  = DZ_IN[i - 1] + n_shared[i - 1];
2324b9ad928SBarry Smith     DZ_OUT[i] = DZ_OUT[i - 1] + n_shared[i - 1];
2334b9ad928SBarry Smith   }
2344b9ad928SBarry Smith 
2354b9ad928SBarry Smith   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
2364b9ad928SBarry Smith   /* First, set the auxiliary array pcis->work_N. */
23704c3f3b8SBarry Smith   PetscCall(PCISScatterArrayNToVecB(pc, pcis->work_N, pcis->D, INSERT_VALUES, SCATTER_REVERSE));
2384b9ad928SBarry Smith   for (i = 1; i < n_neigh; i++) {
239ad540459SPierre Jolivet     for (j = 0; j < n_shared[i]; j++) DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
2404b9ad928SBarry Smith   }
2414b9ad928SBarry Smith 
2424b9ad928SBarry Smith   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
2434b9ad928SBarry Smith   /* Notice that send_request[] and recv_request[] could have one less element. */
2444b9ad928SBarry Smith   /* We make them longer to have request[i] corresponding to neigh[i].          */
2454b9ad928SBarry Smith   {
24613f74950SBarry Smith     PetscMPIInt tag;
2479566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));
2489566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(n_neigh + 1, &send_request, n_neigh + 1, &recv_request));
2494b9ad928SBarry Smith     for (i = 1; i < n_neigh; i++) {
250*f4f49eeaSPierre Jolivet       PetscCallMPI(MPI_Isend((void *)DZ_OUT[i], n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &send_request[i]));
251*f4f49eeaSPierre Jolivet       PetscCallMPI(MPI_Irecv((void *)DZ_IN[i], n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &recv_request[i]));
2524b9ad928SBarry Smith     }
2534b9ad928SBarry Smith   }
2544b9ad928SBarry Smith 
2554b9ad928SBarry Smith   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
2562fa5cd67SKarl Rupp   for (j = 0; j < n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];
2574b9ad928SBarry Smith 
2584b9ad928SBarry Smith   /* Start computing with local D*Z while communication goes on.    */
2594b9ad928SBarry Smith   /* Apply Schur complement. The result is "stored" in vec (more    */
2604b9ad928SBarry Smith   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
2614b9ad928SBarry Smith   /* and also scattered to pcnn->work_N.                            */
2629566063dSJacob Faibussowitsch   PetscCall(PCNNApplySchurToChunk(pc, n_shared[0], shared[0], DZ_IN[0], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));
2634b9ad928SBarry Smith 
2644b9ad928SBarry Smith   /* Compute the first column, while completing the receiving. */
2654b9ad928SBarry Smith   for (i = 0; i < n_neigh; i++) {
2664b9ad928SBarry Smith     MPI_Status  stat;
26713f74950SBarry Smith     PetscMPIInt ind = 0;
2689371c9d4SSatish Balay     if (i > 0) {
2699371c9d4SSatish Balay       PetscCallMPI(MPI_Waitany(n_neigh - 1, recv_request + 1, &ind, &stat));
2709371c9d4SSatish Balay       ind++;
2719371c9d4SSatish Balay     }
2724b9ad928SBarry Smith     mat[ind * n_neigh + 0] = 0.0;
2732fa5cd67SKarl Rupp     for (k = 0; k < n_shared[ind]; k++) mat[ind * n_neigh + 0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
2744b9ad928SBarry Smith   }
2754b9ad928SBarry Smith 
2764b9ad928SBarry Smith   /* Compute the remaining of the columns */
2774b9ad928SBarry Smith   for (j = 1; j < n_neigh; j++) {
2789566063dSJacob Faibussowitsch     PetscCall(PCNNApplySchurToChunk(pc, n_shared[j], shared[j], DZ_IN[j], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));
2794b9ad928SBarry Smith     for (i = 0; i < n_neigh; i++) {
2804b9ad928SBarry Smith       mat[i * n_neigh + j] = 0.0;
2812fa5cd67SKarl Rupp       for (k = 0; k < n_shared[i]; k++) mat[i * n_neigh + j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
2824b9ad928SBarry Smith     }
2834b9ad928SBarry Smith   }
2844b9ad928SBarry Smith 
2854b9ad928SBarry Smith   /* Complete the sending. */
2864b9ad928SBarry Smith   if (n_neigh > 1) {
2874b9ad928SBarry Smith     MPI_Status *stat;
2889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n_neigh - 1, &stat));
289*f4f49eeaSPierre Jolivet     if (n_neigh - 1) PetscCallMPI(MPI_Waitall(n_neigh - 1, &send_request[1], stat));
2909566063dSJacob Faibussowitsch     PetscCall(PetscFree(stat));
2914b9ad928SBarry Smith   }
2924b9ad928SBarry Smith 
2934b9ad928SBarry Smith   /* Free the memory for the MPI requests */
2949566063dSJacob Faibussowitsch   PetscCall(PetscFree2(send_request, recv_request));
2954b9ad928SBarry Smith 
2964b9ad928SBarry Smith   /* Free the memory for DZ_OUT */
2974b9ad928SBarry Smith   if (DZ_OUT) {
2989566063dSJacob Faibussowitsch     PetscCall(PetscFree(DZ_OUT[0]));
2999566063dSJacob Faibussowitsch     PetscCall(PetscFree(DZ_OUT));
3004b9ad928SBarry Smith   }
3014b9ad928SBarry Smith 
3024b9ad928SBarry Smith   {
30313f74950SBarry Smith     PetscMPIInt size;
3049566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
3054b9ad928SBarry Smith     /* Create the global coarse vectors (rhs and solution). */
306*f4f49eeaSPierre Jolivet     PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), 1, size, &pcnn->coarse_b));
307*f4f49eeaSPierre Jolivet     PetscCall(VecDuplicate(pcnn->coarse_b, &pcnn->coarse_x));
308f204ca49SKris Buschelman     /* Create and set the global coarse AIJ matrix. */
309*f4f49eeaSPierre Jolivet     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcnn->coarse_mat));
3109566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(pcnn->coarse_mat, 1, 1, size, size));
3119566063dSJacob Faibussowitsch     PetscCall(MatSetType(pcnn->coarse_mat, MATAIJ));
3129566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(pcnn->coarse_mat, 1, NULL));
3139566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(pcnn->coarse_mat, 1, NULL, n_neigh, NULL));
3149566063dSJacob Faibussowitsch     PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
3159566063dSJacob Faibussowitsch     PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3169566063dSJacob Faibussowitsch     PetscCall(MatSetValues(pcnn->coarse_mat, n_neigh, neigh, n_neigh, neigh, mat, ADD_VALUES));
3179566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
3189566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
3194b9ad928SBarry Smith   }
3204b9ad928SBarry Smith 
3214b9ad928SBarry Smith   {
32213f74950SBarry Smith     PetscMPIInt rank;
3234b9ad928SBarry Smith     PetscScalar one = 1.0;
3249566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
3254b9ad928SBarry Smith     /* "Zero out" rows of not-purely-Neumann subdomains */
3264b9ad928SBarry Smith     if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
3279566063dSJacob Faibussowitsch       PetscCall(MatZeroRows(pcnn->coarse_mat, 0, NULL, one, NULL, NULL));
3284b9ad928SBarry Smith     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
3291c82e0cfSMatthew Knepley       PetscInt row = (PetscInt)rank;
3309566063dSJacob Faibussowitsch       PetscCall(MatZeroRows(pcnn->coarse_mat, 1, &row, one, NULL, NULL));
3314b9ad928SBarry Smith     }
3324b9ad928SBarry Smith   }
3334b9ad928SBarry Smith 
3344b9ad928SBarry Smith   /* Create the coarse linear solver context */
3354b9ad928SBarry Smith   {
3364b9ad928SBarry Smith     PC  pc_ctx, inner_pc;
33783ab6a24SBarry Smith     KSP inner_ksp;
33883ab6a24SBarry Smith 
3399566063dSJacob Faibussowitsch     PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &pcnn->ksp_coarse));
3403821be0aSBarry Smith     PetscCall(KSPSetNestLevel(pcnn->ksp_coarse, pc->kspnestlevel));
3419566063dSJacob Faibussowitsch     PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse, (PetscObject)pc, 2));
3429566063dSJacob Faibussowitsch     PetscCall(KSPSetOperators(pcnn->ksp_coarse, pcnn->coarse_mat, pcnn->coarse_mat));
3439566063dSJacob Faibussowitsch     PetscCall(KSPGetPC(pcnn->ksp_coarse, &pc_ctx));
3449566063dSJacob Faibussowitsch     PetscCall(PCSetType(pc_ctx, PCREDUNDANT));
3459566063dSJacob Faibussowitsch     PetscCall(KSPSetType(pcnn->ksp_coarse, KSPPREONLY));
3469566063dSJacob Faibussowitsch     PetscCall(PCRedundantGetKSP(pc_ctx, &inner_ksp));
3479566063dSJacob Faibussowitsch     PetscCall(KSPGetPC(inner_ksp, &inner_pc));
3489566063dSJacob Faibussowitsch     PetscCall(PCSetType(inner_pc, PCLU));
3499566063dSJacob Faibussowitsch     PetscCall(KSPSetOptionsPrefix(pcnn->ksp_coarse, "nn_coarse_"));
3509566063dSJacob Faibussowitsch     PetscCall(KSPSetFromOptions(pcnn->ksp_coarse));
3514b9ad928SBarry Smith     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
3529566063dSJacob Faibussowitsch     PetscCall(KSPSetUp(pcnn->ksp_coarse));
3534b9ad928SBarry Smith   }
3544b9ad928SBarry Smith 
3554b9ad928SBarry Smith   /* Free the memory for mat */
3569566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat));
3574b9ad928SBarry Smith 
3584b9ad928SBarry Smith   /* for DEBUGGING, save the coarse matrix to a file. */
3594b9ad928SBarry Smith   {
360ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
3619566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_save_coarse_matrix", &flg, NULL));
3624b9ad928SBarry Smith     if (flg) {
3634b9ad928SBarry Smith       PetscViewer viewer;
3649566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_WORLD, "coarse.m", &viewer));
3659566063dSJacob Faibussowitsch       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
3669566063dSJacob Faibussowitsch       PetscCall(MatView(pcnn->coarse_mat, viewer));
3679566063dSJacob Faibussowitsch       PetscCall(PetscViewerPopFormat(viewer));
3689566063dSJacob Faibussowitsch       PetscCall(PetscViewerDestroy(&viewer));
3694b9ad928SBarry Smith     }
3704b9ad928SBarry Smith   }
3714b9ad928SBarry Smith 
3724b9ad928SBarry Smith   /*  Set the variable pcnn->factor_coarse_rhs. */
3734b9ad928SBarry Smith   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
3744b9ad928SBarry Smith 
3754b9ad928SBarry Smith   /* See historical note 02, at the bottom of this file. */
3763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3774b9ad928SBarry Smith }
3784b9ad928SBarry Smith 
3794b9ad928SBarry Smith /*
3804b9ad928SBarry Smith    PCNNApplySchurToChunk -
3814b9ad928SBarry Smith 
3824b9ad928SBarry Smith    Input parameters:
3834b9ad928SBarry Smith .  pcnn
3844b9ad928SBarry Smith .  n - size of chunk
3854b9ad928SBarry Smith .  idx - indices of chunk
3864b9ad928SBarry Smith .  chunk - values
3874b9ad928SBarry Smith 
3884b9ad928SBarry Smith    Output parameters:
3894b9ad928SBarry Smith .  array_N - result of Schur complement applied to chunk, scattered to big array
3904b9ad928SBarry Smith .  vec1_B  - result of Schur complement applied to chunk
3914b9ad928SBarry Smith .  vec2_B  - garbage (used as work space)
3924b9ad928SBarry Smith .  vec1_D  - garbage (used as work space)
3934b9ad928SBarry Smith .  vec2_D  - garbage (used as work space)
3944b9ad928SBarry Smith 
3954b9ad928SBarry Smith */
396d71ae5a4SJacob Faibussowitsch PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
397d71ae5a4SJacob Faibussowitsch {
39813f74950SBarry Smith   PetscInt i;
399*f4f49eeaSPierre Jolivet   PC_IS   *pcis = (PC_IS *)pc->data;
4004b9ad928SBarry Smith 
4014b9ad928SBarry Smith   PetscFunctionBegin;
4029566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(array_N, pcis->n));
4032fa5cd67SKarl Rupp   for (i = 0; i < n; i++) array_N[idx[i]] = chunk[i];
40404c3f3b8SBarry Smith   PetscCall(PCISScatterArrayNToVecB(pc, array_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
4059566063dSJacob Faibussowitsch   PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
40604c3f3b8SBarry Smith   PetscCall(PCISScatterArrayNToVecB(pc, array_N, vec1_B, INSERT_VALUES, SCATTER_REVERSE));
4073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4084b9ad928SBarry Smith }
4094b9ad928SBarry Smith 
4104b9ad928SBarry Smith /*
4114b9ad928SBarry Smith    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
4124b9ad928SBarry Smith                                       the preconditioner for the Schur complement.
4134b9ad928SBarry Smith 
4144b9ad928SBarry Smith    Input parameter:
4154b9ad928SBarry Smith .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
4164b9ad928SBarry Smith 
4174b9ad928SBarry Smith    Output parameters:
4184b9ad928SBarry Smith .  z - global vector of interior and interface nodes. The values on the interface are the result of
4194b9ad928SBarry Smith        the application of the interface preconditioner to the interface part of r. The values on the
4204b9ad928SBarry Smith        interior nodes are garbage.
4214b9ad928SBarry Smith .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4224b9ad928SBarry Smith .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4234b9ad928SBarry Smith .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4244b9ad928SBarry Smith .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4254b9ad928SBarry Smith .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
4264b9ad928SBarry Smith .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
4274b9ad928SBarry Smith .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4284b9ad928SBarry Smith .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4294b9ad928SBarry Smith 
4304b9ad928SBarry Smith */
431d71ae5a4SJacob Faibussowitsch PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, Vec vec1_N, Vec vec2_N)
432d71ae5a4SJacob Faibussowitsch {
433*f4f49eeaSPierre Jolivet   PC_IS *pcis = (PC_IS *)pc->data;
4344b9ad928SBarry Smith 
4354b9ad928SBarry Smith   PetscFunctionBegin;
4364b9ad928SBarry Smith   /*
4374b9ad928SBarry Smith     First balancing step.
4384b9ad928SBarry Smith   */
4394b9ad928SBarry Smith   {
440ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
4419566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_turn_off_first_balancing", &flg, NULL));
4424b9ad928SBarry Smith     if (!flg) {
4439566063dSJacob Faibussowitsch       PetscCall(PCNNBalancing(pc, r, (Vec)0, z, vec1_B, vec2_B, (Vec)0, vec1_D, vec2_D, work_N));
4444b9ad928SBarry Smith     } else {
4459566063dSJacob Faibussowitsch       PetscCall(VecCopy(r, z));
4464b9ad928SBarry Smith     }
4474b9ad928SBarry Smith   }
4484b9ad928SBarry Smith 
4494b9ad928SBarry Smith   /*
4504b9ad928SBarry Smith     Extract the local interface part of z and scale it by D
4514b9ad928SBarry Smith   */
4529566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4539566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4549566063dSJacob Faibussowitsch   PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));
4554b9ad928SBarry Smith 
4564b9ad928SBarry Smith   /* Neumann Solver */
4579566063dSJacob Faibussowitsch   PetscCall(PCISApplyInvSchur(pc, vec2_B, vec1_B, vec1_N, vec2_N));
4584b9ad928SBarry Smith 
4594b9ad928SBarry Smith   /*
4604b9ad928SBarry Smith     Second balancing step.
4614b9ad928SBarry Smith   */
4624b9ad928SBarry Smith   {
463ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
4649566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_turn_off_second_balancing", &flg, NULL));
4654b9ad928SBarry Smith     if (!flg) {
4669566063dSJacob Faibussowitsch       PetscCall(PCNNBalancing(pc, r, vec1_B, z, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D, work_N));
4674b9ad928SBarry Smith     } else {
4689566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));
4699566063dSJacob Faibussowitsch       PetscCall(VecSet(z, 0.0));
4709566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
4719566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
4724b9ad928SBarry Smith     }
4734b9ad928SBarry Smith   }
4743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4754b9ad928SBarry Smith }
4764b9ad928SBarry Smith 
4774b9ad928SBarry Smith /*
4784b9ad928SBarry Smith    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
4794b9ad928SBarry Smith                    input argument u is provided), or s, as given in equations
4804b9ad928SBarry Smith                    (12) and (13), if the input argument u is a null vector.
4814b9ad928SBarry Smith                    Notice that the input argument u plays the role of u_i in
4824b9ad928SBarry Smith                    equation (14). The equation numbers refer to [Man93].
4834b9ad928SBarry Smith 
4844b9ad928SBarry Smith    Input Parameters:
4852fe279fdSBarry Smith +  pcnn - NN preconditioner context.
4864b9ad928SBarry Smith .  r - MPI vector of all nodes (interior and interface). It's preserved.
4872fe279fdSBarry Smith -  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
4884b9ad928SBarry Smith 
4894b9ad928SBarry Smith    Output Parameters:
4902fe279fdSBarry Smith +  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
4914b9ad928SBarry Smith .  vec1_B - Sequential vector of local interface nodes. Workspace.
4924b9ad928SBarry Smith .  vec2_B - Sequential vector of local interface nodes. Workspace.
4934b9ad928SBarry Smith .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
4944b9ad928SBarry Smith .  vec1_D - Sequential vector of local interior nodes. Workspace.
4954b9ad928SBarry Smith .  vec2_D - Sequential vector of local interior nodes. Workspace.
4962fe279fdSBarry Smith -  work_N - Array of all local nodes (interior and interface). Workspace.
4974b9ad928SBarry Smith 
4984b9ad928SBarry Smith */
499d71ae5a4SJacob Faibussowitsch PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
500d71ae5a4SJacob Faibussowitsch {
50113f74950SBarry Smith   PetscInt     k;
5024b9ad928SBarry Smith   PetscScalar  value;
5034b9ad928SBarry Smith   PetscScalar *lambda;
504*f4f49eeaSPierre Jolivet   PC_NN       *pcnn = (PC_NN *)pc->data;
505*f4f49eeaSPierre Jolivet   PC_IS       *pcis = (PC_IS *)pc->data;
5064b9ad928SBarry Smith 
5074b9ad928SBarry Smith   PetscFunctionBegin;
5089566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PC_ApplyCoarse, pc, 0, 0, 0));
5094b9ad928SBarry Smith   if (u) {
5102fa5cd67SKarl Rupp     if (!vec3_B) vec3_B = u;
5119566063dSJacob Faibussowitsch     PetscCall(VecPointwiseMult(vec1_B, pcis->D, u));
5129566063dSJacob Faibussowitsch     PetscCall(VecSet(z, 0.0));
5139566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5149566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5159566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5169566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5179566063dSJacob Faibussowitsch     PetscCall(PCISApplySchur(pc, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D));
5189566063dSJacob Faibussowitsch     PetscCall(VecScale(vec3_B, -1.0));
5199566063dSJacob Faibussowitsch     PetscCall(VecCopy(r, z));
5209566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
5219566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
5224b9ad928SBarry Smith   } else {
5239566063dSJacob Faibussowitsch     PetscCall(VecCopy(r, z));
5244b9ad928SBarry Smith   }
5259566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5269566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
52704c3f3b8SBarry Smith   PetscCall(PCISScatterArrayNToVecB(pc, work_N, vec2_B, INSERT_VALUES, SCATTER_REVERSE));
5282fa5cd67SKarl Rupp   for (k = 0, value = 0.0; k < pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
5294b9ad928SBarry Smith   value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
5304b9ad928SBarry Smith   {
53113f74950SBarry Smith     PetscMPIInt rank;
5329566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
5339566063dSJacob Faibussowitsch     PetscCall(VecSetValue(pcnn->coarse_b, rank, value, INSERT_VALUES));
5344b9ad928SBarry Smith     /*
5354b9ad928SBarry Smith        Since we are only inserting local values (one value actually) we don't need to do the
5364b9ad928SBarry Smith        reduction that tells us there is no data that needs to be moved. Hence we comment out these
5379566063dSJacob Faibussowitsch        PetscCall(VecAssemblyBegin(pcnn->coarse_b));
5389566063dSJacob Faibussowitsch        PetscCall(VecAssemblyEnd  (pcnn->coarse_b));
5394b9ad928SBarry Smith     */
5404b9ad928SBarry Smith   }
5419566063dSJacob Faibussowitsch   PetscCall(KSPSolve(pcnn->ksp_coarse, pcnn->coarse_b, pcnn->coarse_x));
5429566063dSJacob Faibussowitsch   if (!u) PetscCall(VecScale(pcnn->coarse_x, -1.0));
5439566063dSJacob Faibussowitsch   PetscCall(VecGetArray(pcnn->coarse_x, &lambda));
5442fa5cd67SKarl Rupp   for (k = 0; k < pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
5459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(pcnn->coarse_x, &lambda));
54604c3f3b8SBarry Smith   PetscCall(PCISScatterArrayNToVecB(pc, work_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5479566063dSJacob Faibussowitsch   PetscCall(VecSet(z, 0.0));
5489566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
5499566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
5504b9ad928SBarry Smith   if (!u) {
5519566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5529566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5539566063dSJacob Faibussowitsch     PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
5549566063dSJacob Faibussowitsch     PetscCall(VecCopy(r, z));
5554b9ad928SBarry Smith   }
5569566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5579566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5589566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PC_ApplyCoarse, pc, 0, 0, 0));
5593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5604b9ad928SBarry Smith }
5614b9ad928SBarry Smith 
5624b9ad928SBarry Smith /*                                                     */
5634b9ad928SBarry Smith /*  From now on, "footnotes" (or "historical notes").  */
5644b9ad928SBarry Smith /*                                                     */
5654b9ad928SBarry Smith /*
566f1580f4eSBarry Smith    Historical note 01
567f1580f4eSBarry Smith 
5684b9ad928SBarry Smith    We considered the possibility of an alternative D_i that would still
5694b9ad928SBarry Smith    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
5704b9ad928SBarry Smith    The basic principle was still the pseudo-inverse of the counting
5714b9ad928SBarry Smith    function; the difference was that we would not count subdomains
5724b9ad928SBarry Smith    that do not contribute to the coarse space (i.e., not pure-Neumann
5734b9ad928SBarry Smith    subdomains).
5744b9ad928SBarry Smith 
5754b9ad928SBarry Smith    This turned out to be a bad idea:  we would solve trivial Neumann
5764b9ad928SBarry Smith    problems in the not pure-Neumann subdomains, since we would be scaling
5774b9ad928SBarry Smith    the balanced residual by zero.
5784b9ad928SBarry Smith */
5794b9ad928SBarry Smith 
5804b9ad928SBarry Smith /*
581f1580f4eSBarry Smith    Historical note 02
582f1580f4eSBarry Smith 
5834b9ad928SBarry Smith    We tried an alternative coarse problem, that would eliminate exactly a
5844b9ad928SBarry Smith    constant error. Turned out not to improve the overall convergence.
5854b9ad928SBarry Smith */
586