xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision 38f2d2fdb3b6f522a3102c6eb796cebecf3224c0)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
24b9ad928SBarry Smith 
34b9ad928SBarry Smith #include "src/ksp/pc/impls/is/nn/nn.h"
44b9ad928SBarry Smith 
54b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
64b9ad928SBarry Smith /*
74b9ad928SBarry Smith    PCSetUp_NN - Prepares for the use of the NN preconditioner
84b9ad928SBarry Smith                     by setting data structures and options.
94b9ad928SBarry Smith 
104b9ad928SBarry Smith    Input Parameter:
114b9ad928SBarry Smith .  pc - the preconditioner context
124b9ad928SBarry Smith 
134b9ad928SBarry Smith    Application Interface Routine: PCSetUp()
144b9ad928SBarry Smith 
154b9ad928SBarry Smith    Notes:
164b9ad928SBarry Smith    The interface routine PCSetUp() is not usually called directly by
174b9ad928SBarry Smith    the user, but instead is called by PCApply() if necessary.
184b9ad928SBarry Smith */
194b9ad928SBarry Smith #undef __FUNCT__
204b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_NN"
216849ba73SBarry Smith static PetscErrorCode PCSetUp_NN(PC pc)
224b9ad928SBarry Smith {
23dfbe8321SBarry Smith   PetscErrorCode ierr;
244b9ad928SBarry Smith 
254b9ad928SBarry Smith   PetscFunctionBegin;
264b9ad928SBarry Smith   if (!pc->setupcalled) {
274b9ad928SBarry Smith     /* Set up all the "iterative substructuring" common block */
284b9ad928SBarry Smith     ierr = PCISSetUp(pc);CHKERRQ(ierr);
294b9ad928SBarry Smith     /* Create the coarse matrix. */
304b9ad928SBarry Smith     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
314b9ad928SBarry Smith   }
324b9ad928SBarry Smith   PetscFunctionReturn(0);
334b9ad928SBarry Smith }
344b9ad928SBarry Smith 
354b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
364b9ad928SBarry Smith /*
374b9ad928SBarry Smith    PCApply_NN - Applies the NN preconditioner to a vector.
384b9ad928SBarry Smith 
394b9ad928SBarry Smith    Input Parameters:
404b9ad928SBarry Smith .  pc - the preconditioner context
414b9ad928SBarry Smith .  r - input vector (global)
424b9ad928SBarry Smith 
434b9ad928SBarry Smith    Output Parameter:
444b9ad928SBarry Smith .  z - output vector (global)
454b9ad928SBarry Smith 
464b9ad928SBarry Smith    Application Interface Routine: PCApply()
474b9ad928SBarry Smith  */
484b9ad928SBarry Smith #undef __FUNCT__
494b9ad928SBarry Smith #define __FUNCT__ "PCApply_NN"
506849ba73SBarry Smith static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
514b9ad928SBarry Smith {
524b9ad928SBarry Smith   PC_IS          *pcis = (PC_IS*)(pc->data);
53dfbe8321SBarry Smith   PetscErrorCode ierr;
544b9ad928SBarry Smith   PetscScalar    m_one = -1.0;
554b9ad928SBarry Smith   Vec            w = pcis->vec1_global;
564b9ad928SBarry Smith 
574b9ad928SBarry Smith   PetscFunctionBegin;
584b9ad928SBarry Smith   /*
594b9ad928SBarry Smith     Dirichlet solvers.
604b9ad928SBarry Smith     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
614b9ad928SBarry Smith     Storing the local results at vec2_D
624b9ad928SBarry Smith   */
63ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
64ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6523ce1328SBarry Smith   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
664b9ad928SBarry Smith 
674b9ad928SBarry Smith   /*
684b9ad928SBarry Smith     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
694b9ad928SBarry Smith     Storing the result in the interface portion of the global vector w.
704b9ad928SBarry Smith   */
714b9ad928SBarry Smith   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
722dcb1b2aSMatthew Knepley   ierr = VecScale(pcis->vec1_B,m_one);CHKERRQ(ierr);
734b9ad928SBarry Smith   ierr = VecCopy(r,w);CHKERRQ(ierr);
74ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
75ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
764b9ad928SBarry Smith 
774b9ad928SBarry Smith   /*
784b9ad928SBarry Smith     Apply the interface preconditioner
794b9ad928SBarry Smith   */
804b9ad928SBarry Smith   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
814b9ad928SBarry Smith                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
824b9ad928SBarry Smith 
834b9ad928SBarry Smith   /*
844b9ad928SBarry Smith     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
854b9ad928SBarry Smith     The result is stored in vec1_D.
864b9ad928SBarry Smith   */
87ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894b9ad928SBarry Smith   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
904b9ad928SBarry Smith 
914b9ad928SBarry Smith   /*
924b9ad928SBarry Smith     Dirichlet solvers.
934b9ad928SBarry Smith     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
944b9ad928SBarry Smith     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
954b9ad928SBarry Smith   */
96ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
97ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
9823ce1328SBarry Smith   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
992dcb1b2aSMatthew Knepley   ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr);
100ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
101ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1024b9ad928SBarry Smith   PetscFunctionReturn(0);
1034b9ad928SBarry Smith }
1044b9ad928SBarry Smith 
1054b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
1064b9ad928SBarry Smith /*
1074b9ad928SBarry Smith    PCDestroy_NN - Destroys the private context for the NN preconditioner
1084b9ad928SBarry Smith    that was created with PCCreate_NN().
1094b9ad928SBarry Smith 
1104b9ad928SBarry Smith    Input Parameter:
1114b9ad928SBarry Smith .  pc - the preconditioner context
1124b9ad928SBarry Smith 
1134b9ad928SBarry Smith    Application Interface Routine: PCDestroy()
1144b9ad928SBarry Smith */
1154b9ad928SBarry Smith #undef __FUNCT__
1164b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_NN"
1176849ba73SBarry Smith static PetscErrorCode PCDestroy_NN(PC pc)
1184b9ad928SBarry Smith {
1194b9ad928SBarry Smith   PC_NN          *pcnn = (PC_NN*)pc->data;
120dfbe8321SBarry Smith   PetscErrorCode ierr;
1214b9ad928SBarry Smith 
1224b9ad928SBarry Smith   PetscFunctionBegin;
1234b9ad928SBarry Smith   ierr = PCISDestroy(pc);CHKERRQ(ierr);
1244b9ad928SBarry Smith 
1254b9ad928SBarry Smith   if (pcnn->coarse_mat)  {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);}
1264b9ad928SBarry Smith   if (pcnn->coarse_x)    {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);}
1274b9ad928SBarry Smith   if (pcnn->coarse_b)    {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);}
1284b9ad928SBarry Smith   if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);}
1294b9ad928SBarry Smith   if (pcnn->DZ_IN) {
13005b42c5fSBarry Smith     ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);
1314b9ad928SBarry Smith     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
1324b9ad928SBarry Smith   }
1334b9ad928SBarry Smith 
1344b9ad928SBarry Smith   /*
1354b9ad928SBarry Smith       Free the private data structure that was hanging off the PC
1364b9ad928SBarry Smith   */
1374b9ad928SBarry Smith   ierr = PetscFree(pcnn);CHKERRQ(ierr);
1384b9ad928SBarry Smith   PetscFunctionReturn(0);
1394b9ad928SBarry Smith }
1404b9ad928SBarry Smith 
1414b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
142dfc58137SBarry Smith /*MC
14337a17b4dSBarry Smith    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
1444b9ad928SBarry Smith 
14537a17b4dSBarry Smith    Options Database Keys:
14637a17b4dSBarry Smith +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
14737a17b4dSBarry Smith                                        (this skips the first coarse grid solve in the preconditioner)
1485c9740d6SBarry Smith .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
14937a17b4dSBarry Smith                                        (this skips the second coarse grid solve in the preconditioner)
1505c9740d6SBarry Smith .    -pc_is_damp_fixed <fact> -
1515c9740d6SBarry Smith .    -pc_is_remove_nullspace_fixed -
1525c9740d6SBarry Smith .    -pc_is_set_damping_factor_floating <fact> -
1535c9740d6SBarry Smith .    -pc_is_not_damp_floating -
1545c9740d6SBarry Smith +    -pc_is_not_remove_nullspace_floating -
1554b9ad928SBarry Smith 
1568eb2139fSSatish Balay    Level: intermediate
15737a17b4dSBarry Smith 
15837a17b4dSBarry Smith    Notes: The matrix used with this preconditioner must be of type MATIS
15937a17b4dSBarry Smith 
1601b99c236SBarry Smith           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
1611b99c236SBarry Smith           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
1621b99c236SBarry Smith           on the subdomains; though in our experience using approximate solvers is slower.).
1631b99c236SBarry Smith 
1645c9740d6SBarry Smith           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
1655c9740d6SBarry Smith           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
1665c9740d6SBarry Smith           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
16737a17b4dSBarry Smith 
16837a17b4dSBarry Smith    Contributed by Paulo Goldfeld
16937a17b4dSBarry Smith 
17078392ef1SBarry Smith .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MATIS
17137a17b4dSBarry Smith M*/
1724b9ad928SBarry Smith EXTERN_C_BEGIN
1734b9ad928SBarry Smith #undef __FUNCT__
1744b9ad928SBarry Smith #define __FUNCT__ "PCCreate_NN"
175dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_NN(PC pc)
1764b9ad928SBarry Smith {
177dfbe8321SBarry Smith   PetscErrorCode ierr;
1784b9ad928SBarry Smith   PC_NN          *pcnn;
1794b9ad928SBarry Smith 
1804b9ad928SBarry Smith   PetscFunctionBegin;
1814b9ad928SBarry Smith   /*
1824b9ad928SBarry Smith      Creates the private data structure for this preconditioner and
1834b9ad928SBarry Smith      attach it to the PC object.
1844b9ad928SBarry Smith   */
185*38f2d2fdSLisandro Dalcin   ierr      = PetscNewLog(pc,PC_NN,&pcnn);CHKERRQ(ierr);
1864b9ad928SBarry Smith   pc->data  = (void*)pcnn;
1874b9ad928SBarry Smith 
1884b9ad928SBarry Smith   ierr = PCISCreate(pc);CHKERRQ(ierr);
1894b9ad928SBarry Smith   pcnn->coarse_mat  = 0;
1904b9ad928SBarry Smith   pcnn->coarse_x    = 0;
1914b9ad928SBarry Smith   pcnn->coarse_b    = 0;
1924b9ad928SBarry Smith   pcnn->ksp_coarse = 0;
1934b9ad928SBarry Smith   pcnn->DZ_IN       = 0;
1944b9ad928SBarry Smith 
1954b9ad928SBarry Smith   /*
1964b9ad928SBarry Smith       Set the pointers for the functions that are provided above.
1974b9ad928SBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
1984b9ad928SBarry Smith       are called, they will automatically call these functions.  Note we
1994b9ad928SBarry Smith       choose not to provide a couple of these functions since they are
2004b9ad928SBarry Smith       not needed.
2014b9ad928SBarry Smith   */
2024b9ad928SBarry Smith   pc->ops->apply               = PCApply_NN;
2034b9ad928SBarry Smith   pc->ops->applytranspose      = 0;
2044b9ad928SBarry Smith   pc->ops->setup               = PCSetUp_NN;
2054b9ad928SBarry Smith   pc->ops->destroy             = PCDestroy_NN;
2064b9ad928SBarry Smith   pc->ops->view                = 0;
2074b9ad928SBarry Smith   pc->ops->applyrichardson     = 0;
2084b9ad928SBarry Smith   pc->ops->applysymmetricleft  = 0;
2094b9ad928SBarry Smith   pc->ops->applysymmetricright = 0;
2104b9ad928SBarry Smith   PetscFunctionReturn(0);
2114b9ad928SBarry Smith }
2124b9ad928SBarry Smith EXTERN_C_END
2134b9ad928SBarry Smith 
2144b9ad928SBarry Smith 
2154b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
2164b9ad928SBarry Smith /*
2174b9ad928SBarry Smith    PCNNCreateCoarseMatrix -
2184b9ad928SBarry Smith */
2194b9ad928SBarry Smith #undef __FUNCT__
2204b9ad928SBarry Smith #define __FUNCT__ "PCNNCreateCoarseMatrix"
221dfbe8321SBarry Smith PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
2224b9ad928SBarry Smith {
2234b9ad928SBarry Smith   MPI_Request    *send_request, *recv_request;
2246849ba73SBarry Smith   PetscErrorCode ierr;
22513f74950SBarry Smith   PetscInt       i, j, k;
2264b9ad928SBarry Smith   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
2274b9ad928SBarry Smith   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
2284b9ad928SBarry Smith 
2294b9ad928SBarry Smith   /* aliasing some names */
2304b9ad928SBarry Smith   PC_IS*         pcis     = (PC_IS*)(pc->data);
2314b9ad928SBarry Smith   PC_NN*         pcnn     = (PC_NN*)pc->data;
23213f74950SBarry Smith   PetscInt       n_neigh  = pcis->n_neigh;
23313f74950SBarry Smith   PetscInt*      neigh    = pcis->neigh;
23413f74950SBarry Smith   PetscInt*      n_shared = pcis->n_shared;
23513f74950SBarry Smith   PetscInt**     shared   = pcis->shared;
2364b9ad928SBarry Smith   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */
2374b9ad928SBarry Smith 
2384b9ad928SBarry Smith   PetscFunctionBegin;
2394b9ad928SBarry Smith   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
2404b9ad928SBarry Smith   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
2414b9ad928SBarry Smith 
2424b9ad928SBarry Smith   /* Allocate memory for DZ */
2434b9ad928SBarry Smith   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
2444b9ad928SBarry Smith   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
2454b9ad928SBarry Smith   {
24613f74950SBarry Smith     PetscInt size_of_Z = 0;
2474b9ad928SBarry Smith     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
2484b9ad928SBarry Smith     DZ_IN = pcnn->DZ_IN;
2494b9ad928SBarry Smith     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
2504b9ad928SBarry Smith     for (i=0; i<n_neigh; i++) {
2514b9ad928SBarry Smith       size_of_Z += n_shared[i];
2524b9ad928SBarry Smith     }
2534b9ad928SBarry Smith     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
2544b9ad928SBarry Smith     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
2554b9ad928SBarry Smith   }
2564b9ad928SBarry Smith   for (i=1; i<n_neigh; i++) {
2574b9ad928SBarry Smith     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
2584b9ad928SBarry Smith     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
2594b9ad928SBarry Smith   }
2604b9ad928SBarry Smith 
2614b9ad928SBarry Smith   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
2624b9ad928SBarry Smith   /* First, set the auxiliary array pcis->work_N. */
2634b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
2644b9ad928SBarry Smith   for (i=1; i<n_neigh; i++){
2654b9ad928SBarry Smith     for (j=0; j<n_shared[i]; j++) {
2664b9ad928SBarry Smith       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
2674b9ad928SBarry Smith     }
2684b9ad928SBarry Smith   }
2694b9ad928SBarry Smith 
2704b9ad928SBarry Smith   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
2714b9ad928SBarry Smith   /* Notice that send_request[] and recv_request[] could have one less element. */
2724b9ad928SBarry Smith   /* We make them longer to have request[i] corresponding to neigh[i].          */
2734b9ad928SBarry Smith   {
27413f74950SBarry Smith     PetscMPIInt tag;
2754b9ad928SBarry Smith     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
2764b9ad928SBarry Smith     ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr);
2774b9ad928SBarry Smith     recv_request = send_request + (n_neigh);
2784b9ad928SBarry Smith     for (i=1; i<n_neigh; i++) {
2794b9ad928SBarry Smith       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr);
2804b9ad928SBarry Smith       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr);
2814b9ad928SBarry Smith     }
2824b9ad928SBarry Smith   }
2834b9ad928SBarry Smith 
2844b9ad928SBarry Smith   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
2854b9ad928SBarry Smith   for(j=0; j<n_shared[0]; j++) {
2864b9ad928SBarry Smith     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
2874b9ad928SBarry Smith   }
2884b9ad928SBarry Smith 
2894b9ad928SBarry Smith   /* Start computing with local D*Z while communication goes on.    */
2904b9ad928SBarry Smith   /* Apply Schur complement. The result is "stored" in vec (more    */
2914b9ad928SBarry Smith   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
2924b9ad928SBarry Smith   /* and also scattered to pcnn->work_N.                            */
2934b9ad928SBarry Smith   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
2944b9ad928SBarry Smith                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2954b9ad928SBarry Smith 
2964b9ad928SBarry Smith   /* Compute the first column, while completing the receiving. */
2974b9ad928SBarry Smith   for (i=0; i<n_neigh; i++) {
2984b9ad928SBarry Smith     MPI_Status  stat;
29913f74950SBarry Smith     PetscMPIInt ind=0;
3004b9ad928SBarry Smith     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
3014b9ad928SBarry Smith     mat[ind*n_neigh+0] = 0.0;
3024b9ad928SBarry Smith     for (k=0; k<n_shared[ind]; k++) {
3034b9ad928SBarry Smith       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
3044b9ad928SBarry Smith     }
3054b9ad928SBarry Smith   }
3064b9ad928SBarry Smith 
3074b9ad928SBarry Smith   /* Compute the remaining of the columns */
3084b9ad928SBarry Smith   for (j=1; j<n_neigh; j++) {
3094b9ad928SBarry Smith     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
3104b9ad928SBarry Smith                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
3114b9ad928SBarry Smith     for (i=0; i<n_neigh; i++) {
3124b9ad928SBarry Smith       mat[i*n_neigh+j] = 0.0;
3134b9ad928SBarry Smith       for (k=0; k<n_shared[i]; k++) {
3144b9ad928SBarry Smith 	mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
3154b9ad928SBarry Smith       }
3164b9ad928SBarry Smith     }
3174b9ad928SBarry Smith   }
3184b9ad928SBarry Smith 
3194b9ad928SBarry Smith   /* Complete the sending. */
3204b9ad928SBarry Smith   if (n_neigh>1) {
3214b9ad928SBarry Smith     MPI_Status *stat;
3224b9ad928SBarry Smith     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
3230c468ba9SBarry Smith     if (n_neigh-1) {ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);}
3244b9ad928SBarry Smith     ierr = PetscFree(stat);CHKERRQ(ierr);
3254b9ad928SBarry Smith   }
3264b9ad928SBarry Smith 
3274b9ad928SBarry Smith   /* Free the memory for the MPI requests */
3284b9ad928SBarry Smith   ierr = PetscFree(send_request);CHKERRQ(ierr);
3294b9ad928SBarry Smith 
3304b9ad928SBarry Smith   /* Free the memory for DZ_OUT */
3314b9ad928SBarry Smith   if (DZ_OUT) {
33205b42c5fSBarry Smith     ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr);
3334b9ad928SBarry Smith     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
3344b9ad928SBarry Smith   }
3354b9ad928SBarry Smith 
3364b9ad928SBarry Smith   {
33713f74950SBarry Smith     PetscMPIInt size;
3384b9ad928SBarry Smith     ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
3394b9ad928SBarry Smith     /* Create the global coarse vectors (rhs and solution). */
3404b9ad928SBarry Smith     ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
3414b9ad928SBarry Smith     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
342f204ca49SKris Buschelman     /* Create and set the global coarse AIJ matrix. */
343f69a0ea3SMatthew Knepley     ierr = MatCreate(pc->comm,&(pcnn->coarse_mat));CHKERRQ(ierr);
344f69a0ea3SMatthew Knepley     ierr = MatSetSizes(pcnn->coarse_mat,1,1,size,size);CHKERRQ(ierr);
345f204ca49SKris Buschelman     ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr);
346f204ca49SKris Buschelman     ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr);
347f204ca49SKris Buschelman     ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr);
3484b9ad928SBarry Smith     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
3494b9ad928SBarry Smith     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3504b9ad928SBarry Smith     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3514b9ad928SBarry Smith   }
3524b9ad928SBarry Smith 
3534b9ad928SBarry Smith   {
35413f74950SBarry Smith     PetscMPIInt rank;
3554b9ad928SBarry Smith     PetscScalar one = 1.0;
3564b9ad928SBarry Smith     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
3574b9ad928SBarry Smith     /* "Zero out" rows of not-purely-Neumann subdomains */
3584b9ad928SBarry Smith     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
359f4df32b1SMatthew Knepley       ierr = MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one);CHKERRQ(ierr);
3604b9ad928SBarry Smith     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
3611c82e0cfSMatthew Knepley       PetscInt row = (PetscInt) rank;
3621c82e0cfSMatthew Knepley       ierr = MatZeroRows(pcnn->coarse_mat,1,&row,one);CHKERRQ(ierr);
3634b9ad928SBarry Smith     }
3644b9ad928SBarry Smith   }
3654b9ad928SBarry Smith 
3664b9ad928SBarry Smith   /* Create the coarse linear solver context */
3674b9ad928SBarry Smith   {
3684b9ad928SBarry Smith     PC  pc_ctx, inner_pc;
3694b9ad928SBarry Smith     ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr);
3704b9ad928SBarry Smith     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3714b9ad928SBarry Smith     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
3724b9ad928SBarry Smith     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
3734b9ad928SBarry Smith     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
3744b9ad928SBarry Smith     ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr);
3754b9ad928SBarry Smith     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
3765c9740d6SBarry Smith     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
3774b9ad928SBarry Smith     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
3784b9ad928SBarry Smith     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
3794b9ad928SBarry Smith     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
3804b9ad928SBarry Smith   }
3814b9ad928SBarry Smith 
3824b9ad928SBarry Smith   /* Free the memory for mat */
3834b9ad928SBarry Smith   ierr = PetscFree(mat);CHKERRQ(ierr);
3844b9ad928SBarry Smith 
3854b9ad928SBarry Smith   /* for DEBUGGING, save the coarse matrix to a file. */
3864b9ad928SBarry Smith   {
3874b9ad928SBarry Smith     PetscTruth flg;
3885c9740d6SBarry Smith     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr);
3894b9ad928SBarry Smith     if (flg) {
3904b9ad928SBarry Smith       PetscViewer viewer;
3914b9ad928SBarry Smith       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
3924b9ad928SBarry Smith       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3934b9ad928SBarry Smith       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
3944b9ad928SBarry Smith       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
3954b9ad928SBarry Smith     }
3964b9ad928SBarry Smith   }
3974b9ad928SBarry Smith 
3984b9ad928SBarry Smith   /*  Set the variable pcnn->factor_coarse_rhs. */
3994b9ad928SBarry Smith   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
4004b9ad928SBarry Smith 
4014b9ad928SBarry Smith   /* See historical note 02, at the bottom of this file. */
4024b9ad928SBarry Smith   PetscFunctionReturn(0);
4034b9ad928SBarry Smith }
4044b9ad928SBarry Smith 
4054b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
4064b9ad928SBarry Smith /*
4074b9ad928SBarry Smith    PCNNApplySchurToChunk -
4084b9ad928SBarry Smith 
4094b9ad928SBarry Smith    Input parameters:
4104b9ad928SBarry Smith .  pcnn
4114b9ad928SBarry Smith .  n - size of chunk
4124b9ad928SBarry Smith .  idx - indices of chunk
4134b9ad928SBarry Smith .  chunk - values
4144b9ad928SBarry Smith 
4154b9ad928SBarry Smith    Output parameters:
4164b9ad928SBarry Smith .  array_N - result of Schur complement applied to chunk, scattered to big array
4174b9ad928SBarry Smith .  vec1_B  - result of Schur complement applied to chunk
4184b9ad928SBarry Smith .  vec2_B  - garbage (used as work space)
4194b9ad928SBarry Smith .  vec1_D  - garbage (used as work space)
4204b9ad928SBarry Smith .  vec2_D  - garbage (used as work space)
4214b9ad928SBarry Smith 
4224b9ad928SBarry Smith */
4234b9ad928SBarry Smith #undef __FUNCT__
4244b9ad928SBarry Smith #define __FUNCT__ "PCNNApplySchurToChunk"
42513f74950SBarry Smith PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
4264b9ad928SBarry Smith {
4276849ba73SBarry Smith   PetscErrorCode ierr;
42813f74950SBarry Smith   PetscInt       i;
4294b9ad928SBarry Smith   PC_IS          *pcis = (PC_IS*)(pc->data);
4304b9ad928SBarry Smith 
4314b9ad928SBarry Smith   PetscFunctionBegin;
4324b9ad928SBarry Smith   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
4334b9ad928SBarry Smith   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
4344b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
4354b9ad928SBarry Smith   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
4364b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
4374b9ad928SBarry Smith   PetscFunctionReturn(0);
4384b9ad928SBarry Smith }
4394b9ad928SBarry Smith 
4404b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
4414b9ad928SBarry Smith /*
4424b9ad928SBarry Smith    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
4434b9ad928SBarry Smith                                       the preconditioner for the Schur complement.
4444b9ad928SBarry Smith 
4454b9ad928SBarry Smith    Input parameter:
4464b9ad928SBarry Smith .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
4474b9ad928SBarry Smith 
4484b9ad928SBarry Smith    Output parameters:
4494b9ad928SBarry Smith .  z - global vector of interior and interface nodes. The values on the interface are the result of
4504b9ad928SBarry Smith        the application of the interface preconditioner to the interface part of r. The values on the
4514b9ad928SBarry Smith        interior nodes are garbage.
4524b9ad928SBarry Smith .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4534b9ad928SBarry Smith .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4544b9ad928SBarry Smith .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4554b9ad928SBarry Smith .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4564b9ad928SBarry Smith .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
4574b9ad928SBarry Smith .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
4584b9ad928SBarry Smith .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4594b9ad928SBarry Smith .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4604b9ad928SBarry Smith 
4614b9ad928SBarry Smith */
4624b9ad928SBarry Smith #undef __FUNCT__
4634b9ad928SBarry Smith #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
464dfbe8321SBarry Smith PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
4654b9ad928SBarry Smith                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
4664b9ad928SBarry Smith {
467dfbe8321SBarry Smith   PetscErrorCode ierr;
4684b9ad928SBarry Smith   PC_IS*         pcis = (PC_IS*)(pc->data);
4694b9ad928SBarry Smith 
4704b9ad928SBarry Smith   PetscFunctionBegin;
4714b9ad928SBarry Smith   /*
4724b9ad928SBarry Smith     First balancing step.
4734b9ad928SBarry Smith   */
4744b9ad928SBarry Smith   {
4754b9ad928SBarry Smith     PetscTruth flg;
4765c9740d6SBarry Smith     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr);
4774b9ad928SBarry Smith     if (!flg) {
4784b9ad928SBarry Smith       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
4794b9ad928SBarry Smith     } else {
4804b9ad928SBarry Smith       ierr = VecCopy(r,z);CHKERRQ(ierr);
4814b9ad928SBarry Smith     }
4824b9ad928SBarry Smith   }
4834b9ad928SBarry Smith 
4844b9ad928SBarry Smith   /*
4854b9ad928SBarry Smith     Extract the local interface part of z and scale it by D
4864b9ad928SBarry Smith   */
487ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4892dcb1b2aSMatthew Knepley   ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
4904b9ad928SBarry Smith 
4914b9ad928SBarry Smith   /* Neumann Solver */
4924b9ad928SBarry Smith   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
4934b9ad928SBarry Smith 
4944b9ad928SBarry Smith   /*
4954b9ad928SBarry Smith     Second balancing step.
4964b9ad928SBarry Smith   */
4974b9ad928SBarry Smith   {
4984b9ad928SBarry Smith     PetscTruth flg;
4995c9740d6SBarry Smith     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr);
5004b9ad928SBarry Smith     if (!flg) {
5014b9ad928SBarry Smith       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
5024b9ad928SBarry Smith     } else {
5032dcb1b2aSMatthew Knepley       ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
504efb30889SBarry Smith       ierr = VecSet(z,0.0);CHKERRQ(ierr);
505ca9f406cSSatish Balay       ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
506ca9f406cSSatish Balay       ierr = VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5074b9ad928SBarry Smith     }
5084b9ad928SBarry Smith   }
5094b9ad928SBarry Smith   PetscFunctionReturn(0);
5104b9ad928SBarry Smith }
5114b9ad928SBarry Smith 
5124b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
5134b9ad928SBarry Smith /*
5144b9ad928SBarry Smith    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
5154b9ad928SBarry Smith                    input argument u is provided), or s, as given in equations
5164b9ad928SBarry Smith                    (12) and (13), if the input argument u is a null vector.
5174b9ad928SBarry Smith                    Notice that the input argument u plays the role of u_i in
5184b9ad928SBarry Smith                    equation (14). The equation numbers refer to [Man93].
5194b9ad928SBarry Smith 
5204b9ad928SBarry Smith    Input Parameters:
5214b9ad928SBarry Smith .  pcnn - NN preconditioner context.
5224b9ad928SBarry Smith .  r - MPI vector of all nodes (interior and interface). It's preserved.
5234b9ad928SBarry Smith .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
5244b9ad928SBarry Smith 
5254b9ad928SBarry Smith    Output Parameters:
5264b9ad928SBarry Smith .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
5274b9ad928SBarry Smith .  vec1_B - Sequential vector of local interface nodes. Workspace.
5284b9ad928SBarry Smith .  vec2_B - Sequential vector of local interface nodes. Workspace.
5294b9ad928SBarry Smith .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
5304b9ad928SBarry Smith .  vec1_D - Sequential vector of local interior nodes. Workspace.
5314b9ad928SBarry Smith .  vec2_D - Sequential vector of local interior nodes. Workspace.
5324b9ad928SBarry Smith .  work_N - Array of all local nodes (interior and interface). Workspace.
5334b9ad928SBarry Smith 
5344b9ad928SBarry Smith */
5354b9ad928SBarry Smith #undef __FUNCT__
5364b9ad928SBarry Smith #define __FUNCT__ "PCNNBalancing"
537dfbe8321SBarry Smith PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
5384b9ad928SBarry Smith                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
5394b9ad928SBarry Smith {
5406849ba73SBarry Smith   PetscErrorCode ierr;
54113f74950SBarry Smith   PetscInt       k;
5424b9ad928SBarry Smith   PetscScalar    value;
5434b9ad928SBarry Smith   PetscScalar*   lambda;
5444b9ad928SBarry Smith   PC_NN*         pcnn     = (PC_NN*)(pc->data);
5454b9ad928SBarry Smith   PC_IS*         pcis     = (PC_IS*)(pc->data);
5464b9ad928SBarry Smith 
5474b9ad928SBarry Smith   PetscFunctionBegin;
5484b9ad928SBarry Smith   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
5494b9ad928SBarry Smith   if (u) {
5504b9ad928SBarry Smith     if (!vec3_B) { vec3_B = u; }
5512dcb1b2aSMatthew Knepley     ierr = VecPointwiseMult(vec1_B,pcis->D,u);CHKERRQ(ierr);
552efb30889SBarry Smith     ierr = VecSet(z,0.0);CHKERRQ(ierr);
553ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
554ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
555ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5574b9ad928SBarry Smith     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
558efb30889SBarry Smith     ierr = VecScale(vec3_B,-1.0);CHKERRQ(ierr);
5594b9ad928SBarry Smith     ierr = VecCopy(r,z);CHKERRQ(ierr);
560ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
561ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5624b9ad928SBarry Smith   } else {
5634b9ad928SBarry Smith     ierr = VecCopy(r,z);CHKERRQ(ierr);
5644b9ad928SBarry Smith   }
565ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
566ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5674b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
5684b9ad928SBarry Smith   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
5694b9ad928SBarry Smith   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
5704b9ad928SBarry Smith   {
57113f74950SBarry Smith     PetscMPIInt rank;
5724b9ad928SBarry Smith     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
5734b9ad928SBarry Smith     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
5744b9ad928SBarry Smith     /*
5754b9ad928SBarry Smith        Since we are only inserting local values (one value actually) we don't need to do the
5764b9ad928SBarry Smith        reduction that tells us there is no data that needs to be moved. Hence we comment out these
5774b9ad928SBarry Smith        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
5784b9ad928SBarry Smith        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
5794b9ad928SBarry Smith     */
5804b9ad928SBarry Smith   }
58123ce1328SBarry Smith   ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr);
582efb30889SBarry Smith   if (!u) { ierr = VecScale(pcnn->coarse_x,-1.0);CHKERRQ(ierr); }
5834b9ad928SBarry Smith   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
5844b9ad928SBarry Smith   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
5854b9ad928SBarry Smith   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
5864b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
587efb30889SBarry Smith   ierr = VecSet(z,0.0);CHKERRQ(ierr);
588ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
589ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5904b9ad928SBarry Smith   if (!u) {
591ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
592ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5934b9ad928SBarry Smith     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
5944b9ad928SBarry Smith     ierr = VecCopy(r,z);CHKERRQ(ierr);
5954b9ad928SBarry Smith   }
596ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
597ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5984b9ad928SBarry Smith   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
5994b9ad928SBarry Smith   PetscFunctionReturn(0);
6004b9ad928SBarry Smith }
6014b9ad928SBarry Smith 
6024b9ad928SBarry Smith #undef __FUNCT__
6034b9ad928SBarry Smith 
6044b9ad928SBarry Smith 
6054b9ad928SBarry Smith 
6064b9ad928SBarry Smith /*  -------   E N D   O F   T H E   C O D E   -------  */
6074b9ad928SBarry Smith /*                                                     */
6084b9ad928SBarry Smith /*  From now on, "footnotes" (or "historical notes").  */
6094b9ad928SBarry Smith /*                                                     */
6104b9ad928SBarry Smith /*  -------------------------------------------------  */
6114b9ad928SBarry Smith 
6124b9ad928SBarry Smith 
6134b9ad928SBarry Smith 
6144b9ad928SBarry Smith /* --------------------------------------------------------------------------
6154b9ad928SBarry Smith    Historical note 01
6164b9ad928SBarry Smith    -------------------------------------------------------------------------- */
6174b9ad928SBarry Smith /*
6184b9ad928SBarry Smith    We considered the possibility of an alternative D_i that would still
6194b9ad928SBarry Smith    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
6204b9ad928SBarry Smith    The basic principle was still the pseudo-inverse of the counting
6214b9ad928SBarry Smith    function; the difference was that we would not count subdomains
6224b9ad928SBarry Smith    that do not contribute to the coarse space (i.e., not pure-Neumann
6234b9ad928SBarry Smith    subdomains).
6244b9ad928SBarry Smith 
6254b9ad928SBarry Smith    This turned out to be a bad idea:  we would solve trivial Neumann
6264b9ad928SBarry Smith    problems in the not pure-Neumann subdomains, since we would be scaling
6274b9ad928SBarry Smith    the balanced residual by zero.
6284b9ad928SBarry Smith */
6294b9ad928SBarry Smith 
6304b9ad928SBarry Smith 
6314b9ad928SBarry Smith 
6324b9ad928SBarry Smith 
6334b9ad928SBarry Smith /* --------------------------------------------------------------------------
6344b9ad928SBarry Smith    Historical note 02
6354b9ad928SBarry Smith    -------------------------------------------------------------------------- */
6364b9ad928SBarry Smith /*
6374b9ad928SBarry Smith    We tried an alternative coarse problem, that would eliminate exactly a
6384b9ad928SBarry Smith    constant error. Turned out not to improve the overall convergence.
6394b9ad928SBarry Smith */
6404b9ad928SBarry Smith 
6414b9ad928SBarry Smith 
642