xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision f1580f4e3ce5d5b2393648fd039d0d41b440385d)
14b9ad928SBarry Smith 
2c6db04a5SJed Brown #include <../src/ksp/pc/impls/is/nn/nn.h>
34b9ad928SBarry Smith 
44b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
54b9ad928SBarry Smith /*
64b9ad928SBarry Smith    PCSetUp_NN - Prepares for the use of the NN preconditioner
74b9ad928SBarry Smith                     by setting data structures and options.
84b9ad928SBarry Smith 
94b9ad928SBarry Smith    Input Parameter:
104b9ad928SBarry Smith .  pc - the preconditioner context
114b9ad928SBarry Smith 
124b9ad928SBarry Smith    Application Interface Routine: PCSetUp()
134b9ad928SBarry Smith 
14*f1580f4eSBarry Smith    Note:
154b9ad928SBarry Smith    The interface routine PCSetUp() is not usually called directly by
164b9ad928SBarry Smith    the user, but instead is called by PCApply() if necessary.
174b9ad928SBarry Smith */
189371c9d4SSatish Balay static PetscErrorCode PCSetUp_NN(PC pc) {
194b9ad928SBarry Smith   PetscFunctionBegin;
204b9ad928SBarry Smith   if (!pc->setupcalled) {
214b9ad928SBarry Smith     /* Set up all the "iterative substructuring" common block */
229566063dSJacob Faibussowitsch     PetscCall(PCISSetUp(pc, PETSC_TRUE, PETSC_TRUE));
234b9ad928SBarry Smith     /* Create the coarse matrix. */
249566063dSJacob Faibussowitsch     PetscCall(PCNNCreateCoarseMatrix(pc));
254b9ad928SBarry Smith   }
264b9ad928SBarry Smith   PetscFunctionReturn(0);
274b9ad928SBarry Smith }
284b9ad928SBarry Smith 
294b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
304b9ad928SBarry Smith /*
314b9ad928SBarry Smith    PCApply_NN - Applies the NN preconditioner to a vector.
324b9ad928SBarry Smith 
334b9ad928SBarry Smith    Input Parameters:
344b9ad928SBarry Smith .  pc - the preconditioner context
354b9ad928SBarry Smith .  r - input vector (global)
364b9ad928SBarry Smith 
374b9ad928SBarry Smith    Output Parameter:
384b9ad928SBarry Smith .  z - output vector (global)
394b9ad928SBarry Smith 
404b9ad928SBarry Smith    Application Interface Routine: PCApply()
414b9ad928SBarry Smith  */
429371c9d4SSatish Balay static PetscErrorCode PCApply_NN(PC pc, Vec r, Vec z) {
434b9ad928SBarry Smith   PC_IS      *pcis  = (PC_IS *)(pc->data);
444b9ad928SBarry Smith   PetscScalar m_one = -1.0;
454b9ad928SBarry Smith   Vec         w     = pcis->vec1_global;
464b9ad928SBarry Smith 
474b9ad928SBarry Smith   PetscFunctionBegin;
484b9ad928SBarry Smith   /*
494b9ad928SBarry Smith     Dirichlet solvers.
504b9ad928SBarry Smith     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
514b9ad928SBarry Smith     Storing the local results at vec2_D
524b9ad928SBarry Smith   */
539566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
549566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
559566063dSJacob Faibussowitsch   PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));
564b9ad928SBarry Smith 
574b9ad928SBarry Smith   /*
584b9ad928SBarry Smith     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
594b9ad928SBarry Smith     Storing the result in the interface portion of the global vector w.
604b9ad928SBarry Smith   */
619566063dSJacob Faibussowitsch   PetscCall(MatMult(pcis->A_BI, pcis->vec2_D, pcis->vec1_B));
629566063dSJacob Faibussowitsch   PetscCall(VecScale(pcis->vec1_B, m_one));
639566063dSJacob Faibussowitsch   PetscCall(VecCopy(r, w));
649566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));
659566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));
664b9ad928SBarry Smith 
674b9ad928SBarry Smith   /*
684b9ad928SBarry Smith     Apply the interface preconditioner
694b9ad928SBarry Smith   */
70d0609cedSBarry Smith   PetscCall(PCNNApplyInterfacePreconditioner(pc, w, z, pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec3_B, pcis->vec1_D, pcis->vec3_D, pcis->vec1_N, pcis->vec2_N));
714b9ad928SBarry Smith 
724b9ad928SBarry Smith   /*
734b9ad928SBarry Smith     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
744b9ad928SBarry Smith     The result is stored in vec1_D.
754b9ad928SBarry Smith   */
769566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
779566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
789566063dSJacob Faibussowitsch   PetscCall(MatMult(pcis->A_IB, pcis->vec1_B, pcis->vec1_D));
794b9ad928SBarry Smith 
804b9ad928SBarry Smith   /*
814b9ad928SBarry Smith     Dirichlet solvers.
824b9ad928SBarry Smith     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
834b9ad928SBarry Smith     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
844b9ad928SBarry Smith   */
859566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
869566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
879566063dSJacob Faibussowitsch   PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));
889566063dSJacob Faibussowitsch   PetscCall(VecScale(pcis->vec2_D, m_one));
899566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
909566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
914b9ad928SBarry Smith   PetscFunctionReturn(0);
924b9ad928SBarry Smith }
934b9ad928SBarry Smith 
944b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
954b9ad928SBarry Smith /*
964b9ad928SBarry Smith    PCDestroy_NN - Destroys the private context for the NN preconditioner
974b9ad928SBarry Smith    that was created with PCCreate_NN().
984b9ad928SBarry Smith 
994b9ad928SBarry Smith    Input Parameter:
1004b9ad928SBarry Smith .  pc - the preconditioner context
1014b9ad928SBarry Smith 
1024b9ad928SBarry Smith    Application Interface Routine: PCDestroy()
1034b9ad928SBarry Smith */
1049371c9d4SSatish Balay static PetscErrorCode PCDestroy_NN(PC pc) {
1054b9ad928SBarry Smith   PC_NN *pcnn = (PC_NN *)pc->data;
1064b9ad928SBarry Smith 
1074b9ad928SBarry Smith   PetscFunctionBegin;
1089566063dSJacob Faibussowitsch   PetscCall(PCISDestroy(pc));
1094b9ad928SBarry Smith 
1109566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&pcnn->coarse_mat));
1119566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&pcnn->coarse_x));
1129566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&pcnn->coarse_b));
1139566063dSJacob Faibussowitsch   PetscCall(KSPDestroy(&pcnn->ksp_coarse));
1144b9ad928SBarry Smith   if (pcnn->DZ_IN) {
1159566063dSJacob Faibussowitsch     PetscCall(PetscFree(pcnn->DZ_IN[0]));
1169566063dSJacob Faibussowitsch     PetscCall(PetscFree(pcnn->DZ_IN));
1174b9ad928SBarry Smith   }
1184b9ad928SBarry Smith 
1194b9ad928SBarry Smith   /*
1204b9ad928SBarry Smith       Free the private data structure that was hanging off the PC
1214b9ad928SBarry Smith   */
1229566063dSJacob Faibussowitsch   PetscCall(PetscFree(pc->data));
1234b9ad928SBarry Smith   PetscFunctionReturn(0);
1244b9ad928SBarry Smith }
1254b9ad928SBarry Smith 
126dfc58137SBarry Smith /*MC
12737a17b4dSBarry Smith    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
1284b9ad928SBarry Smith 
12937a17b4dSBarry Smith    Options Database Keys:
13037a17b4dSBarry Smith +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
13137a17b4dSBarry Smith                                        (this skips the first coarse grid solve in the preconditioner)
1325c9740d6SBarry Smith .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
13337a17b4dSBarry Smith                                        (this skips the second coarse grid solve in the preconditioner)
1345c9740d6SBarry Smith .    -pc_is_damp_fixed <fact> -
1355c9740d6SBarry Smith .    -pc_is_remove_nullspace_fixed -
1365c9740d6SBarry Smith .    -pc_is_set_damping_factor_floating <fact> -
1375c9740d6SBarry Smith .    -pc_is_not_damp_floating -
138a2b725a8SWilliam Gropp -    -pc_is_not_remove_nullspace_floating -
1394b9ad928SBarry Smith 
140*f1580f4eSBarry Smith    Options Database prefixes for the subsolvers this preconditioner uses:
141*f1580f4eSBarry Smith +  -nn_coarse_pc_ - for the coarse grid preconditioner
142*f1580f4eSBarry Smith .  -is_localD_pc_ - for the Dirichlet subproblem preconditioner
143*f1580f4eSBarry Smith -  -is_localN_pc_ - for the Neumann subproblem preconditioner
144*f1580f4eSBarry Smith 
1458eb2139fSSatish Balay    Level: intermediate
14637a17b4dSBarry Smith 
14795452b02SPatrick Sanan    Notes:
148*f1580f4eSBarry Smith     The matrix used with this preconditioner must be of type `MATIS`
14937a17b4dSBarry Smith 
1501b99c236SBarry Smith           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
1511b99c236SBarry Smith           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
1521b99c236SBarry Smith           on the subdomains; though in our experience using approximate solvers is slower.).
1531b99c236SBarry Smith 
15437a17b4dSBarry Smith    Contributed by Paulo Goldfeld
15537a17b4dSBarry Smith 
156*f1580f4eSBarry Smith .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `MATIS`, `PCBDDC`
15737a17b4dSBarry Smith M*/
158b2573a8aSBarry Smith 
1599371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc) {
1604b9ad928SBarry Smith   PC_NN *pcnn;
1614b9ad928SBarry Smith 
1624b9ad928SBarry Smith   PetscFunctionBegin;
1634b9ad928SBarry Smith   /*
1644b9ad928SBarry Smith      Creates the private data structure for this preconditioner and
1654b9ad928SBarry Smith      attach it to the PC object.
1664b9ad928SBarry Smith   */
1679566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(pc, &pcnn));
1684b9ad928SBarry Smith   pc->data = (void *)pcnn;
1694b9ad928SBarry Smith 
1709566063dSJacob Faibussowitsch   PetscCall(PCISCreate(pc));
1710a545947SLisandro Dalcin   pcnn->coarse_mat = NULL;
1720a545947SLisandro Dalcin   pcnn->coarse_x   = NULL;
1730a545947SLisandro Dalcin   pcnn->coarse_b   = NULL;
1740a545947SLisandro Dalcin   pcnn->ksp_coarse = NULL;
1750a545947SLisandro Dalcin   pcnn->DZ_IN      = NULL;
1764b9ad928SBarry Smith 
1774b9ad928SBarry Smith   /*
1784b9ad928SBarry Smith       Set the pointers for the functions that are provided above.
1794b9ad928SBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
1804b9ad928SBarry Smith       are called, they will automatically call these functions.  Note we
1814b9ad928SBarry Smith       choose not to provide a couple of these functions since they are
1824b9ad928SBarry Smith       not needed.
1834b9ad928SBarry Smith   */
1844b9ad928SBarry Smith   pc->ops->apply               = PCApply_NN;
1850a545947SLisandro Dalcin   pc->ops->applytranspose      = NULL;
1864b9ad928SBarry Smith   pc->ops->setup               = PCSetUp_NN;
1874b9ad928SBarry Smith   pc->ops->destroy             = PCDestroy_NN;
1880a545947SLisandro Dalcin   pc->ops->view                = NULL;
1890a545947SLisandro Dalcin   pc->ops->applyrichardson     = NULL;
1900a545947SLisandro Dalcin   pc->ops->applysymmetricleft  = NULL;
1910a545947SLisandro Dalcin   pc->ops->applysymmetricright = NULL;
1924b9ad928SBarry Smith   PetscFunctionReturn(0);
1934b9ad928SBarry Smith }
1944b9ad928SBarry Smith 
1954b9ad928SBarry Smith /*
1964b9ad928SBarry Smith    PCNNCreateCoarseMatrix -
1974b9ad928SBarry Smith */
1989371c9d4SSatish Balay PetscErrorCode PCNNCreateCoarseMatrix(PC pc) {
1994b9ad928SBarry Smith   MPI_Request  *send_request, *recv_request;
20013f74950SBarry Smith   PetscInt      i, j, k;
2014b9ad928SBarry Smith   PetscScalar  *mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
2024b9ad928SBarry Smith   PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
2034b9ad928SBarry Smith 
2044b9ad928SBarry Smith   /* aliasing some names */
2054b9ad928SBarry Smith   PC_IS        *pcis     = (PC_IS *)(pc->data);
2064b9ad928SBarry Smith   PC_NN        *pcnn     = (PC_NN *)pc->data;
20713f74950SBarry Smith   PetscInt      n_neigh  = pcis->n_neigh;
20813f74950SBarry Smith   PetscInt     *neigh    = pcis->neigh;
20913f74950SBarry Smith   PetscInt     *n_shared = pcis->n_shared;
21013f74950SBarry Smith   PetscInt    **shared   = pcis->shared;
2114b9ad928SBarry Smith   PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */
2124b9ad928SBarry Smith 
2134b9ad928SBarry Smith   PetscFunctionBegin;
2144b9ad928SBarry Smith   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
2159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n_neigh * n_neigh + 1, &mat));
2164b9ad928SBarry Smith 
2174b9ad928SBarry Smith   /* Allocate memory for DZ */
2184b9ad928SBarry Smith   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
2194b9ad928SBarry Smith   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
2204b9ad928SBarry Smith   {
22113f74950SBarry Smith     PetscInt size_of_Z = 0;
2229566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &pcnn->DZ_IN));
2234b9ad928SBarry Smith     DZ_IN = pcnn->DZ_IN;
2249566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &DZ_OUT));
2252fa5cd67SKarl Rupp     for (i = 0; i < n_neigh; i++) size_of_Z += n_shared[i];
2269566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_IN[0]));
2279566063dSJacob Faibussowitsch     PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_OUT[0]));
2284b9ad928SBarry Smith   }
2294b9ad928SBarry Smith   for (i = 1; i < n_neigh; i++) {
2304b9ad928SBarry Smith     DZ_IN[i]  = DZ_IN[i - 1] + n_shared[i - 1];
2314b9ad928SBarry Smith     DZ_OUT[i] = DZ_OUT[i - 1] + n_shared[i - 1];
2324b9ad928SBarry Smith   }
2334b9ad928SBarry Smith 
2344b9ad928SBarry Smith   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
2354b9ad928SBarry Smith   /* First, set the auxiliary array pcis->work_N. */
2369566063dSJacob Faibussowitsch   PetscCall(PCISScatterArrayNToVecB(pcis->work_N, pcis->D, INSERT_VALUES, SCATTER_REVERSE, pc));
2374b9ad928SBarry Smith   for (i = 1; i < n_neigh; i++) {
238ad540459SPierre Jolivet     for (j = 0; j < n_shared[i]; j++) DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
2394b9ad928SBarry Smith   }
2404b9ad928SBarry Smith 
2414b9ad928SBarry Smith   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
2424b9ad928SBarry Smith   /* Notice that send_request[] and recv_request[] could have one less element. */
2434b9ad928SBarry Smith   /* We make them longer to have request[i] corresponding to neigh[i].          */
2444b9ad928SBarry Smith   {
24513f74950SBarry Smith     PetscMPIInt tag;
2469566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));
2479566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(n_neigh + 1, &send_request, n_neigh + 1, &recv_request));
2484b9ad928SBarry Smith     for (i = 1; i < n_neigh; i++) {
2499566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend((void *)(DZ_OUT[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(send_request[i])));
2509566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv((void *)(DZ_IN[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(recv_request[i])));
2514b9ad928SBarry Smith     }
2524b9ad928SBarry Smith   }
2534b9ad928SBarry Smith 
2544b9ad928SBarry Smith   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
2552fa5cd67SKarl Rupp   for (j = 0; j < n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];
2564b9ad928SBarry Smith 
2574b9ad928SBarry Smith   /* Start computing with local D*Z while communication goes on.    */
2584b9ad928SBarry Smith   /* Apply Schur complement. The result is "stored" in vec (more    */
2594b9ad928SBarry Smith   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
2604b9ad928SBarry Smith   /* and also scattered to pcnn->work_N.                            */
2619566063dSJacob Faibussowitsch   PetscCall(PCNNApplySchurToChunk(pc, n_shared[0], shared[0], DZ_IN[0], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));
2624b9ad928SBarry Smith 
2634b9ad928SBarry Smith   /* Compute the first column, while completing the receiving. */
2644b9ad928SBarry Smith   for (i = 0; i < n_neigh; i++) {
2654b9ad928SBarry Smith     MPI_Status  stat;
26613f74950SBarry Smith     PetscMPIInt ind = 0;
2679371c9d4SSatish Balay     if (i > 0) {
2689371c9d4SSatish Balay       PetscCallMPI(MPI_Waitany(n_neigh - 1, recv_request + 1, &ind, &stat));
2699371c9d4SSatish Balay       ind++;
2709371c9d4SSatish Balay     }
2714b9ad928SBarry Smith     mat[ind * n_neigh + 0] = 0.0;
2722fa5cd67SKarl Rupp     for (k = 0; k < n_shared[ind]; k++) mat[ind * n_neigh + 0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
2734b9ad928SBarry Smith   }
2744b9ad928SBarry Smith 
2754b9ad928SBarry Smith   /* Compute the remaining of the columns */
2764b9ad928SBarry Smith   for (j = 1; j < n_neigh; j++) {
2779566063dSJacob Faibussowitsch     PetscCall(PCNNApplySchurToChunk(pc, n_shared[j], shared[j], DZ_IN[j], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));
2784b9ad928SBarry Smith     for (i = 0; i < n_neigh; i++) {
2794b9ad928SBarry Smith       mat[i * n_neigh + j] = 0.0;
2802fa5cd67SKarl Rupp       for (k = 0; k < n_shared[i]; k++) mat[i * n_neigh + j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
2814b9ad928SBarry Smith     }
2824b9ad928SBarry Smith   }
2834b9ad928SBarry Smith 
2844b9ad928SBarry Smith   /* Complete the sending. */
2854b9ad928SBarry Smith   if (n_neigh > 1) {
2864b9ad928SBarry Smith     MPI_Status *stat;
2879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n_neigh - 1, &stat));
2889566063dSJacob Faibussowitsch     if (n_neigh - 1) PetscCallMPI(MPI_Waitall(n_neigh - 1, &(send_request[1]), stat));
2899566063dSJacob Faibussowitsch     PetscCall(PetscFree(stat));
2904b9ad928SBarry Smith   }
2914b9ad928SBarry Smith 
2924b9ad928SBarry Smith   /* Free the memory for the MPI requests */
2939566063dSJacob Faibussowitsch   PetscCall(PetscFree2(send_request, recv_request));
2944b9ad928SBarry Smith 
2954b9ad928SBarry Smith   /* Free the memory for DZ_OUT */
2964b9ad928SBarry Smith   if (DZ_OUT) {
2979566063dSJacob Faibussowitsch     PetscCall(PetscFree(DZ_OUT[0]));
2989566063dSJacob Faibussowitsch     PetscCall(PetscFree(DZ_OUT));
2994b9ad928SBarry Smith   }
3004b9ad928SBarry Smith 
3014b9ad928SBarry Smith   {
30213f74950SBarry Smith     PetscMPIInt size;
3039566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
3044b9ad928SBarry Smith     /* Create the global coarse vectors (rhs and solution). */
3059566063dSJacob Faibussowitsch     PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), 1, size, &(pcnn->coarse_b)));
3069566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(pcnn->coarse_b, &(pcnn->coarse_x)));
307f204ca49SKris Buschelman     /* Create and set the global coarse AIJ matrix. */
3089566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &(pcnn->coarse_mat)));
3099566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(pcnn->coarse_mat, 1, 1, size, size));
3109566063dSJacob Faibussowitsch     PetscCall(MatSetType(pcnn->coarse_mat, MATAIJ));
3119566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(pcnn->coarse_mat, 1, NULL));
3129566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(pcnn->coarse_mat, 1, NULL, n_neigh, NULL));
3139566063dSJacob Faibussowitsch     PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
3149566063dSJacob Faibussowitsch     PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3159566063dSJacob Faibussowitsch     PetscCall(MatSetValues(pcnn->coarse_mat, n_neigh, neigh, n_neigh, neigh, mat, ADD_VALUES));
3169566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
3179566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
3184b9ad928SBarry Smith   }
3194b9ad928SBarry Smith 
3204b9ad928SBarry Smith   {
32113f74950SBarry Smith     PetscMPIInt rank;
3224b9ad928SBarry Smith     PetscScalar one = 1.0;
3239566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
3244b9ad928SBarry Smith     /* "Zero out" rows of not-purely-Neumann subdomains */
3254b9ad928SBarry Smith     if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
3269566063dSJacob Faibussowitsch       PetscCall(MatZeroRows(pcnn->coarse_mat, 0, NULL, one, NULL, NULL));
3274b9ad928SBarry Smith     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
3281c82e0cfSMatthew Knepley       PetscInt row = (PetscInt)rank;
3299566063dSJacob Faibussowitsch       PetscCall(MatZeroRows(pcnn->coarse_mat, 1, &row, one, NULL, NULL));
3304b9ad928SBarry Smith     }
3314b9ad928SBarry Smith   }
3324b9ad928SBarry Smith 
3334b9ad928SBarry Smith   /* Create the coarse linear solver context */
3344b9ad928SBarry Smith   {
3354b9ad928SBarry Smith     PC  pc_ctx, inner_pc;
33683ab6a24SBarry Smith     KSP inner_ksp;
33783ab6a24SBarry Smith 
3389566063dSJacob Faibussowitsch     PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &pcnn->ksp_coarse));
3399566063dSJacob Faibussowitsch     PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse, (PetscObject)pc, 2));
3409566063dSJacob Faibussowitsch     PetscCall(KSPSetOperators(pcnn->ksp_coarse, pcnn->coarse_mat, pcnn->coarse_mat));
3419566063dSJacob Faibussowitsch     PetscCall(KSPGetPC(pcnn->ksp_coarse, &pc_ctx));
3429566063dSJacob Faibussowitsch     PetscCall(PCSetType(pc_ctx, PCREDUNDANT));
3439566063dSJacob Faibussowitsch     PetscCall(KSPSetType(pcnn->ksp_coarse, KSPPREONLY));
3449566063dSJacob Faibussowitsch     PetscCall(PCRedundantGetKSP(pc_ctx, &inner_ksp));
3459566063dSJacob Faibussowitsch     PetscCall(KSPGetPC(inner_ksp, &inner_pc));
3469566063dSJacob Faibussowitsch     PetscCall(PCSetType(inner_pc, PCLU));
3479566063dSJacob Faibussowitsch     PetscCall(KSPSetOptionsPrefix(pcnn->ksp_coarse, "nn_coarse_"));
3489566063dSJacob Faibussowitsch     PetscCall(KSPSetFromOptions(pcnn->ksp_coarse));
3494b9ad928SBarry Smith     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
3509566063dSJacob Faibussowitsch     PetscCall(KSPSetUp(pcnn->ksp_coarse));
3514b9ad928SBarry Smith   }
3524b9ad928SBarry Smith 
3534b9ad928SBarry Smith   /* Free the memory for mat */
3549566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat));
3554b9ad928SBarry Smith 
3564b9ad928SBarry Smith   /* for DEBUGGING, save the coarse matrix to a file. */
3574b9ad928SBarry Smith   {
358ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
3599566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_save_coarse_matrix", &flg, NULL));
3604b9ad928SBarry Smith     if (flg) {
3614b9ad928SBarry Smith       PetscViewer viewer;
3629566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_WORLD, "coarse.m", &viewer));
3639566063dSJacob Faibussowitsch       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
3649566063dSJacob Faibussowitsch       PetscCall(MatView(pcnn->coarse_mat, viewer));
3659566063dSJacob Faibussowitsch       PetscCall(PetscViewerPopFormat(viewer));
3669566063dSJacob Faibussowitsch       PetscCall(PetscViewerDestroy(&viewer));
3674b9ad928SBarry Smith     }
3684b9ad928SBarry Smith   }
3694b9ad928SBarry Smith 
3704b9ad928SBarry Smith   /*  Set the variable pcnn->factor_coarse_rhs. */
3714b9ad928SBarry Smith   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
3724b9ad928SBarry Smith 
3734b9ad928SBarry Smith   /* See historical note 02, at the bottom of this file. */
3744b9ad928SBarry Smith   PetscFunctionReturn(0);
3754b9ad928SBarry Smith }
3764b9ad928SBarry Smith 
3774b9ad928SBarry Smith /*
3784b9ad928SBarry Smith    PCNNApplySchurToChunk -
3794b9ad928SBarry Smith 
3804b9ad928SBarry Smith    Input parameters:
3814b9ad928SBarry Smith .  pcnn
3824b9ad928SBarry Smith .  n - size of chunk
3834b9ad928SBarry Smith .  idx - indices of chunk
3844b9ad928SBarry Smith .  chunk - values
3854b9ad928SBarry Smith 
3864b9ad928SBarry Smith    Output parameters:
3874b9ad928SBarry Smith .  array_N - result of Schur complement applied to chunk, scattered to big array
3884b9ad928SBarry Smith .  vec1_B  - result of Schur complement applied to chunk
3894b9ad928SBarry Smith .  vec2_B  - garbage (used as work space)
3904b9ad928SBarry Smith .  vec1_D  - garbage (used as work space)
3914b9ad928SBarry Smith .  vec2_D  - garbage (used as work space)
3924b9ad928SBarry Smith 
3934b9ad928SBarry Smith */
3949371c9d4SSatish Balay PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) {
39513f74950SBarry Smith   PetscInt i;
3964b9ad928SBarry Smith   PC_IS   *pcis = (PC_IS *)(pc->data);
3974b9ad928SBarry Smith 
3984b9ad928SBarry Smith   PetscFunctionBegin;
3999566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(array_N, pcis->n));
4002fa5cd67SKarl Rupp   for (i = 0; i < n; i++) array_N[idx[i]] = chunk[i];
4019566063dSJacob Faibussowitsch   PetscCall(PCISScatterArrayNToVecB(array_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc));
4029566063dSJacob Faibussowitsch   PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
4039566063dSJacob Faibussowitsch   PetscCall(PCISScatterArrayNToVecB(array_N, vec1_B, INSERT_VALUES, SCATTER_REVERSE, pc));
4044b9ad928SBarry Smith   PetscFunctionReturn(0);
4054b9ad928SBarry Smith }
4064b9ad928SBarry Smith 
4074b9ad928SBarry Smith /*
4084b9ad928SBarry Smith    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
4094b9ad928SBarry Smith                                       the preconditioner for the Schur complement.
4104b9ad928SBarry Smith 
4114b9ad928SBarry Smith    Input parameter:
4124b9ad928SBarry Smith .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
4134b9ad928SBarry Smith 
4144b9ad928SBarry Smith    Output parameters:
4154b9ad928SBarry Smith .  z - global vector of interior and interface nodes. The values on the interface are the result of
4164b9ad928SBarry Smith        the application of the interface preconditioner to the interface part of r. The values on the
4174b9ad928SBarry Smith        interior nodes are garbage.
4184b9ad928SBarry Smith .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4194b9ad928SBarry Smith .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4204b9ad928SBarry Smith .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4214b9ad928SBarry Smith .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4224b9ad928SBarry Smith .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
4234b9ad928SBarry Smith .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
4244b9ad928SBarry Smith .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4254b9ad928SBarry Smith .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4264b9ad928SBarry Smith 
4274b9ad928SBarry Smith */
4289371c9d4SSatish Balay PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, Vec vec1_N, Vec vec2_N) {
4294b9ad928SBarry Smith   PC_IS *pcis = (PC_IS *)(pc->data);
4304b9ad928SBarry Smith 
4314b9ad928SBarry Smith   PetscFunctionBegin;
4324b9ad928SBarry Smith   /*
4334b9ad928SBarry Smith     First balancing step.
4344b9ad928SBarry Smith   */
4354b9ad928SBarry Smith   {
436ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
4379566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_turn_off_first_balancing", &flg, NULL));
4384b9ad928SBarry Smith     if (!flg) {
4399566063dSJacob Faibussowitsch       PetscCall(PCNNBalancing(pc, r, (Vec)0, z, vec1_B, vec2_B, (Vec)0, vec1_D, vec2_D, work_N));
4404b9ad928SBarry Smith     } else {
4419566063dSJacob Faibussowitsch       PetscCall(VecCopy(r, z));
4424b9ad928SBarry Smith     }
4434b9ad928SBarry Smith   }
4444b9ad928SBarry Smith 
4454b9ad928SBarry Smith   /*
4464b9ad928SBarry Smith     Extract the local interface part of z and scale it by D
4474b9ad928SBarry Smith   */
4489566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4499566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4509566063dSJacob Faibussowitsch   PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));
4514b9ad928SBarry Smith 
4524b9ad928SBarry Smith   /* Neumann Solver */
4539566063dSJacob Faibussowitsch   PetscCall(PCISApplyInvSchur(pc, vec2_B, vec1_B, vec1_N, vec2_N));
4544b9ad928SBarry Smith 
4554b9ad928SBarry Smith   /*
4564b9ad928SBarry Smith     Second balancing step.
4574b9ad928SBarry Smith   */
4584b9ad928SBarry Smith   {
459ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
4609566063dSJacob Faibussowitsch     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_turn_off_second_balancing", &flg, NULL));
4614b9ad928SBarry Smith     if (!flg) {
4629566063dSJacob Faibussowitsch       PetscCall(PCNNBalancing(pc, r, vec1_B, z, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D, work_N));
4634b9ad928SBarry Smith     } else {
4649566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));
4659566063dSJacob Faibussowitsch       PetscCall(VecSet(z, 0.0));
4669566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
4679566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
4684b9ad928SBarry Smith     }
4694b9ad928SBarry Smith   }
4704b9ad928SBarry Smith   PetscFunctionReturn(0);
4714b9ad928SBarry Smith }
4724b9ad928SBarry Smith 
4734b9ad928SBarry Smith /*
4744b9ad928SBarry Smith    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
4754b9ad928SBarry Smith                    input argument u is provided), or s, as given in equations
4764b9ad928SBarry Smith                    (12) and (13), if the input argument u is a null vector.
4774b9ad928SBarry Smith                    Notice that the input argument u plays the role of u_i in
4784b9ad928SBarry Smith                    equation (14). The equation numbers refer to [Man93].
4794b9ad928SBarry Smith 
4804b9ad928SBarry Smith    Input Parameters:
4814b9ad928SBarry Smith .  pcnn - NN preconditioner context.
4824b9ad928SBarry Smith .  r - MPI vector of all nodes (interior and interface). It's preserved.
4834b9ad928SBarry Smith .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
4844b9ad928SBarry Smith 
4854b9ad928SBarry Smith    Output Parameters:
4864b9ad928SBarry Smith .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
4874b9ad928SBarry Smith .  vec1_B - Sequential vector of local interface nodes. Workspace.
4884b9ad928SBarry Smith .  vec2_B - Sequential vector of local interface nodes. Workspace.
4894b9ad928SBarry Smith .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
4904b9ad928SBarry Smith .  vec1_D - Sequential vector of local interior nodes. Workspace.
4914b9ad928SBarry Smith .  vec2_D - Sequential vector of local interior nodes. Workspace.
4924b9ad928SBarry Smith .  work_N - Array of all local nodes (interior and interface). Workspace.
4934b9ad928SBarry Smith 
4944b9ad928SBarry Smith */
4959371c9d4SSatish Balay PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, PetscScalar *work_N) {
49613f74950SBarry Smith   PetscInt     k;
4974b9ad928SBarry Smith   PetscScalar  value;
4984b9ad928SBarry Smith   PetscScalar *lambda;
4994b9ad928SBarry Smith   PC_NN       *pcnn = (PC_NN *)(pc->data);
5004b9ad928SBarry Smith   PC_IS       *pcis = (PC_IS *)(pc->data);
5014b9ad928SBarry Smith 
5024b9ad928SBarry Smith   PetscFunctionBegin;
5039566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PC_ApplyCoarse, pc, 0, 0, 0));
5044b9ad928SBarry Smith   if (u) {
5052fa5cd67SKarl Rupp     if (!vec3_B) vec3_B = u;
5069566063dSJacob Faibussowitsch     PetscCall(VecPointwiseMult(vec1_B, pcis->D, u));
5079566063dSJacob Faibussowitsch     PetscCall(VecSet(z, 0.0));
5089566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5099566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5109566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5119566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5129566063dSJacob Faibussowitsch     PetscCall(PCISApplySchur(pc, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D));
5139566063dSJacob Faibussowitsch     PetscCall(VecScale(vec3_B, -1.0));
5149566063dSJacob Faibussowitsch     PetscCall(VecCopy(r, z));
5159566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
5169566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
5174b9ad928SBarry Smith   } else {
5189566063dSJacob Faibussowitsch     PetscCall(VecCopy(r, z));
5194b9ad928SBarry Smith   }
5209566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5219566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5229566063dSJacob Faibussowitsch   PetscCall(PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_REVERSE, pc));
5232fa5cd67SKarl Rupp   for (k = 0, value = 0.0; k < pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
5244b9ad928SBarry Smith   value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
5254b9ad928SBarry Smith   {
52613f74950SBarry Smith     PetscMPIInt rank;
5279566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
5289566063dSJacob Faibussowitsch     PetscCall(VecSetValue(pcnn->coarse_b, rank, value, INSERT_VALUES));
5294b9ad928SBarry Smith     /*
5304b9ad928SBarry Smith        Since we are only inserting local values (one value actually) we don't need to do the
5314b9ad928SBarry Smith        reduction that tells us there is no data that needs to be moved. Hence we comment out these
5329566063dSJacob Faibussowitsch        PetscCall(VecAssemblyBegin(pcnn->coarse_b));
5339566063dSJacob Faibussowitsch        PetscCall(VecAssemblyEnd  (pcnn->coarse_b));
5344b9ad928SBarry Smith     */
5354b9ad928SBarry Smith   }
5369566063dSJacob Faibussowitsch   PetscCall(KSPSolve(pcnn->ksp_coarse, pcnn->coarse_b, pcnn->coarse_x));
5379566063dSJacob Faibussowitsch   if (!u) PetscCall(VecScale(pcnn->coarse_x, -1.0));
5389566063dSJacob Faibussowitsch   PetscCall(VecGetArray(pcnn->coarse_x, &lambda));
5392fa5cd67SKarl Rupp   for (k = 0; k < pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
5409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(pcnn->coarse_x, &lambda));
5419566063dSJacob Faibussowitsch   PetscCall(PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc));
5429566063dSJacob Faibussowitsch   PetscCall(VecSet(z, 0.0));
5439566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
5449566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
5454b9ad928SBarry Smith   if (!u) {
5469566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5479566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
5489566063dSJacob Faibussowitsch     PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
5499566063dSJacob Faibussowitsch     PetscCall(VecCopy(r, z));
5504b9ad928SBarry Smith   }
5519566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5529566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
5539566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PC_ApplyCoarse, pc, 0, 0, 0));
5544b9ad928SBarry Smith   PetscFunctionReturn(0);
5554b9ad928SBarry Smith }
5564b9ad928SBarry Smith 
5574b9ad928SBarry Smith /*                                                     */
5584b9ad928SBarry Smith /*  From now on, "footnotes" (or "historical notes").  */
5594b9ad928SBarry Smith /*                                                     */
5604b9ad928SBarry Smith /*
561*f1580f4eSBarry Smith    Historical note 01
562*f1580f4eSBarry Smith 
5634b9ad928SBarry Smith    We considered the possibility of an alternative D_i that would still
5644b9ad928SBarry Smith    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
5654b9ad928SBarry Smith    The basic principle was still the pseudo-inverse of the counting
5664b9ad928SBarry Smith    function; the difference was that we would not count subdomains
5674b9ad928SBarry Smith    that do not contribute to the coarse space (i.e., not pure-Neumann
5684b9ad928SBarry Smith    subdomains).
5694b9ad928SBarry Smith 
5704b9ad928SBarry Smith    This turned out to be a bad idea:  we would solve trivial Neumann
5714b9ad928SBarry Smith    problems in the not pure-Neumann subdomains, since we would be scaling
5724b9ad928SBarry Smith    the balanced residual by zero.
5734b9ad928SBarry Smith */
5744b9ad928SBarry Smith 
5754b9ad928SBarry Smith /*
576*f1580f4eSBarry Smith    Historical note 02
577*f1580f4eSBarry Smith 
5784b9ad928SBarry Smith    We tried an alternative coarse problem, that would eliminate exactly a
5794b9ad928SBarry Smith    constant error. Turned out not to improve the overall convergence.
5804b9ad928SBarry Smith */
581