xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision dcca6d9d80ebd869fe6029bd05a3aa9faafef49e)
14b9ad928SBarry Smith 
2c6db04a5SJed Brown #include <../src/ksp/pc/impls/is/nn/nn.h>
34b9ad928SBarry Smith 
44b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
54b9ad928SBarry Smith /*
64b9ad928SBarry Smith    PCSetUp_NN - Prepares for the use of the NN preconditioner
74b9ad928SBarry Smith                     by setting data structures and options.
84b9ad928SBarry Smith 
94b9ad928SBarry Smith    Input Parameter:
104b9ad928SBarry Smith .  pc - the preconditioner context
114b9ad928SBarry Smith 
124b9ad928SBarry Smith    Application Interface Routine: PCSetUp()
134b9ad928SBarry Smith 
144b9ad928SBarry Smith    Notes:
154b9ad928SBarry Smith    The interface routine PCSetUp() is not usually called directly by
164b9ad928SBarry Smith    the user, but instead is called by PCApply() if necessary.
174b9ad928SBarry Smith */
184b9ad928SBarry Smith #undef __FUNCT__
194b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_NN"
206849ba73SBarry Smith static PetscErrorCode PCSetUp_NN(PC pc)
214b9ad928SBarry Smith {
22dfbe8321SBarry Smith   PetscErrorCode ierr;
234b9ad928SBarry Smith 
244b9ad928SBarry Smith   PetscFunctionBegin;
254b9ad928SBarry Smith   if (!pc->setupcalled) {
264b9ad928SBarry Smith     /* Set up all the "iterative substructuring" common block */
274b9ad928SBarry Smith     ierr = PCISSetUp(pc);CHKERRQ(ierr);
284b9ad928SBarry Smith     /* Create the coarse matrix. */
294b9ad928SBarry Smith     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
304b9ad928SBarry Smith   }
314b9ad928SBarry Smith   PetscFunctionReturn(0);
324b9ad928SBarry Smith }
334b9ad928SBarry Smith 
344b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
354b9ad928SBarry Smith /*
364b9ad928SBarry Smith    PCApply_NN - Applies the NN preconditioner to a vector.
374b9ad928SBarry Smith 
384b9ad928SBarry Smith    Input Parameters:
394b9ad928SBarry Smith .  pc - the preconditioner context
404b9ad928SBarry Smith .  r - input vector (global)
414b9ad928SBarry Smith 
424b9ad928SBarry Smith    Output Parameter:
434b9ad928SBarry Smith .  z - output vector (global)
444b9ad928SBarry Smith 
454b9ad928SBarry Smith    Application Interface Routine: PCApply()
464b9ad928SBarry Smith  */
474b9ad928SBarry Smith #undef __FUNCT__
484b9ad928SBarry Smith #define __FUNCT__ "PCApply_NN"
496849ba73SBarry Smith static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
504b9ad928SBarry Smith {
514b9ad928SBarry Smith   PC_IS          *pcis = (PC_IS*)(pc->data);
52dfbe8321SBarry Smith   PetscErrorCode ierr;
534b9ad928SBarry Smith   PetscScalar    m_one = -1.0;
544b9ad928SBarry Smith   Vec            w     = pcis->vec1_global;
554b9ad928SBarry Smith 
564b9ad928SBarry Smith   PetscFunctionBegin;
574b9ad928SBarry Smith   /*
584b9ad928SBarry Smith     Dirichlet solvers.
594b9ad928SBarry Smith     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
604b9ad928SBarry Smith     Storing the local results at vec2_D
614b9ad928SBarry Smith   */
62ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
63ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6423ce1328SBarry Smith   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
654b9ad928SBarry Smith 
664b9ad928SBarry Smith   /*
674b9ad928SBarry Smith     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
684b9ad928SBarry Smith     Storing the result in the interface portion of the global vector w.
694b9ad928SBarry Smith   */
704b9ad928SBarry Smith   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
712dcb1b2aSMatthew Knepley   ierr = VecScale(pcis->vec1_B,m_one);CHKERRQ(ierr);
724b9ad928SBarry Smith   ierr = VecCopy(r,w);CHKERRQ(ierr);
73ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
74ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
754b9ad928SBarry Smith 
764b9ad928SBarry Smith   /*
774b9ad928SBarry Smith     Apply the interface preconditioner
784b9ad928SBarry Smith   */
794b9ad928SBarry Smith   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
804b9ad928SBarry Smith                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
814b9ad928SBarry Smith 
824b9ad928SBarry Smith   /*
834b9ad928SBarry Smith     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
844b9ad928SBarry Smith     The result is stored in vec1_D.
854b9ad928SBarry Smith   */
86ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
87ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884b9ad928SBarry Smith   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
894b9ad928SBarry Smith 
904b9ad928SBarry Smith   /*
914b9ad928SBarry Smith     Dirichlet solvers.
924b9ad928SBarry Smith     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
934b9ad928SBarry Smith     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
944b9ad928SBarry Smith   */
95ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
96ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
9723ce1328SBarry Smith   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
982dcb1b2aSMatthew Knepley   ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr);
99ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
100ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1014b9ad928SBarry Smith   PetscFunctionReturn(0);
1024b9ad928SBarry Smith }
1034b9ad928SBarry Smith 
1044b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
1054b9ad928SBarry Smith /*
1064b9ad928SBarry Smith    PCDestroy_NN - Destroys the private context for the NN preconditioner
1074b9ad928SBarry Smith    that was created with PCCreate_NN().
1084b9ad928SBarry Smith 
1094b9ad928SBarry Smith    Input Parameter:
1104b9ad928SBarry Smith .  pc - the preconditioner context
1114b9ad928SBarry Smith 
1124b9ad928SBarry Smith    Application Interface Routine: PCDestroy()
1134b9ad928SBarry Smith */
1144b9ad928SBarry Smith #undef __FUNCT__
1154b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_NN"
1166849ba73SBarry Smith static PetscErrorCode PCDestroy_NN(PC pc)
1174b9ad928SBarry Smith {
1184b9ad928SBarry Smith   PC_NN          *pcnn = (PC_NN*)pc->data;
119dfbe8321SBarry Smith   PetscErrorCode ierr;
1204b9ad928SBarry Smith 
1214b9ad928SBarry Smith   PetscFunctionBegin;
1224b9ad928SBarry Smith   ierr = PCISDestroy(pc);CHKERRQ(ierr);
1234b9ad928SBarry Smith 
124fcfd50ebSBarry Smith   ierr = MatDestroy(&pcnn->coarse_mat);CHKERRQ(ierr);
125fcfd50ebSBarry Smith   ierr = VecDestroy(&pcnn->coarse_x);CHKERRQ(ierr);
126fcfd50ebSBarry Smith   ierr = VecDestroy(&pcnn->coarse_b);CHKERRQ(ierr);
127fcfd50ebSBarry Smith   ierr = KSPDestroy(&pcnn->ksp_coarse);CHKERRQ(ierr);
1284b9ad928SBarry Smith   if (pcnn->DZ_IN) {
12905b42c5fSBarry Smith     ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);
1304b9ad928SBarry Smith     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
1314b9ad928SBarry Smith   }
1324b9ad928SBarry Smith 
1334b9ad928SBarry Smith   /*
1344b9ad928SBarry Smith       Free the private data structure that was hanging off the PC
1354b9ad928SBarry Smith   */
136c31cb41cSBarry Smith   ierr = PetscFree(pc->data);CHKERRQ(ierr);
1374b9ad928SBarry Smith   PetscFunctionReturn(0);
1384b9ad928SBarry Smith }
1394b9ad928SBarry Smith 
1404b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
141dfc58137SBarry Smith /*MC
14237a17b4dSBarry Smith    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
1434b9ad928SBarry Smith 
14437a17b4dSBarry Smith    Options Database Keys:
14537a17b4dSBarry Smith +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
14637a17b4dSBarry Smith                                        (this skips the first coarse grid solve in the preconditioner)
1475c9740d6SBarry Smith .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
14837a17b4dSBarry Smith                                        (this skips the second coarse grid solve in the preconditioner)
1495c9740d6SBarry Smith .    -pc_is_damp_fixed <fact> -
1505c9740d6SBarry Smith .    -pc_is_remove_nullspace_fixed -
1515c9740d6SBarry Smith .    -pc_is_set_damping_factor_floating <fact> -
1525c9740d6SBarry Smith .    -pc_is_not_damp_floating -
1535c9740d6SBarry Smith +    -pc_is_not_remove_nullspace_floating -
1544b9ad928SBarry Smith 
1558eb2139fSSatish Balay    Level: intermediate
15637a17b4dSBarry Smith 
15737a17b4dSBarry Smith    Notes: The matrix used with this preconditioner must be of type MATIS
15837a17b4dSBarry Smith 
1591b99c236SBarry Smith           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
1601b99c236SBarry Smith           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
1611b99c236SBarry Smith           on the subdomains; though in our experience using approximate solvers is slower.).
1621b99c236SBarry Smith 
1635c9740d6SBarry Smith           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
1645c9740d6SBarry Smith           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
1655c9740d6SBarry Smith           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
16637a17b4dSBarry Smith 
16737a17b4dSBarry Smith    Contributed by Paulo Goldfeld
16837a17b4dSBarry Smith 
16978392ef1SBarry Smith .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MATIS
17037a17b4dSBarry Smith M*/
171b2573a8aSBarry Smith 
1724b9ad928SBarry Smith #undef __FUNCT__
1734b9ad928SBarry Smith #define __FUNCT__ "PCCreate_NN"
1748cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
1754b9ad928SBarry Smith {
176dfbe8321SBarry Smith   PetscErrorCode ierr;
1774b9ad928SBarry Smith   PC_NN          *pcnn;
1784b9ad928SBarry Smith 
1794b9ad928SBarry Smith   PetscFunctionBegin;
1804b9ad928SBarry Smith   /*
1814b9ad928SBarry Smith      Creates the private data structure for this preconditioner and
1824b9ad928SBarry Smith      attach it to the PC object.
1834b9ad928SBarry Smith   */
18438f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(pc,PC_NN,&pcnn);CHKERRQ(ierr);
1854b9ad928SBarry Smith   pc->data = (void*)pcnn;
1864b9ad928SBarry Smith 
1874b9ad928SBarry Smith   ierr             = PCISCreate(pc);CHKERRQ(ierr);
1884b9ad928SBarry Smith   pcnn->coarse_mat = 0;
1894b9ad928SBarry Smith   pcnn->coarse_x   = 0;
1904b9ad928SBarry Smith   pcnn->coarse_b   = 0;
1914b9ad928SBarry Smith   pcnn->ksp_coarse = 0;
1924b9ad928SBarry Smith   pcnn->DZ_IN      = 0;
1934b9ad928SBarry Smith 
1944b9ad928SBarry Smith   /*
1954b9ad928SBarry Smith       Set the pointers for the functions that are provided above.
1964b9ad928SBarry Smith       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
1974b9ad928SBarry Smith       are called, they will automatically call these functions.  Note we
1984b9ad928SBarry Smith       choose not to provide a couple of these functions since they are
1994b9ad928SBarry Smith       not needed.
2004b9ad928SBarry Smith   */
2014b9ad928SBarry Smith   pc->ops->apply               = PCApply_NN;
2024b9ad928SBarry Smith   pc->ops->applytranspose      = 0;
2034b9ad928SBarry Smith   pc->ops->setup               = PCSetUp_NN;
2044b9ad928SBarry Smith   pc->ops->destroy             = PCDestroy_NN;
2054b9ad928SBarry Smith   pc->ops->view                = 0;
2064b9ad928SBarry Smith   pc->ops->applyrichardson     = 0;
2074b9ad928SBarry Smith   pc->ops->applysymmetricleft  = 0;
2084b9ad928SBarry Smith   pc->ops->applysymmetricright = 0;
2094b9ad928SBarry Smith   PetscFunctionReturn(0);
2104b9ad928SBarry Smith }
2114b9ad928SBarry Smith 
2124b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
2134b9ad928SBarry Smith /*
2144b9ad928SBarry Smith    PCNNCreateCoarseMatrix -
2154b9ad928SBarry Smith */
2164b9ad928SBarry Smith #undef __FUNCT__
2174b9ad928SBarry Smith #define __FUNCT__ "PCNNCreateCoarseMatrix"
218dfbe8321SBarry Smith PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
2194b9ad928SBarry Smith {
2204b9ad928SBarry Smith   MPI_Request    *send_request, *recv_request;
2216849ba73SBarry Smith   PetscErrorCode ierr;
22213f74950SBarry Smith   PetscInt       i, j, k;
2234b9ad928SBarry Smith   PetscScalar    *mat;     /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
2244b9ad928SBarry Smith   PetscScalar    **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
2254b9ad928SBarry Smith 
2264b9ad928SBarry Smith   /* aliasing some names */
2274b9ad928SBarry Smith   PC_IS       *pcis     = (PC_IS*)(pc->data);
2284b9ad928SBarry Smith   PC_NN       *pcnn     = (PC_NN*)pc->data;
22913f74950SBarry Smith   PetscInt    n_neigh   = pcis->n_neigh;
23013f74950SBarry Smith   PetscInt    *neigh    = pcis->neigh;
23113f74950SBarry Smith   PetscInt    *n_shared = pcis->n_shared;
23213f74950SBarry Smith   PetscInt    **shared  = pcis->shared;
2334b9ad928SBarry Smith   PetscScalar **DZ_IN;   /* Must be initialized after memory allocation. */
2344b9ad928SBarry Smith 
2354b9ad928SBarry Smith   PetscFunctionBegin;
2364b9ad928SBarry Smith   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
2374b9ad928SBarry Smith   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
2384b9ad928SBarry Smith 
2394b9ad928SBarry Smith   /* Allocate memory for DZ */
2404b9ad928SBarry Smith   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
2414b9ad928SBarry Smith   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
2424b9ad928SBarry Smith   {
24313f74950SBarry Smith     PetscInt size_of_Z = 0;
2444b9ad928SBarry Smith     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
2454b9ad928SBarry Smith     DZ_IN = pcnn->DZ_IN;
2464b9ad928SBarry Smith     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
2472fa5cd67SKarl Rupp     for (i=0; i<n_neigh; i++) size_of_Z += n_shared[i];
2484b9ad928SBarry Smith     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
2494b9ad928SBarry Smith     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
2504b9ad928SBarry Smith   }
2514b9ad928SBarry Smith   for (i=1; i<n_neigh; i++) {
2524b9ad928SBarry Smith     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
2534b9ad928SBarry Smith     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
2544b9ad928SBarry Smith   }
2554b9ad928SBarry Smith 
2564b9ad928SBarry Smith   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
2574b9ad928SBarry Smith   /* First, set the auxiliary array pcis->work_N. */
2584b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
2594b9ad928SBarry Smith   for (i=1; i<n_neigh; i++) {
2604b9ad928SBarry Smith     for (j=0; j<n_shared[i]; j++) {
2614b9ad928SBarry Smith       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
2624b9ad928SBarry Smith     }
2634b9ad928SBarry Smith   }
2644b9ad928SBarry Smith 
2654b9ad928SBarry Smith   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
2664b9ad928SBarry Smith   /* Notice that send_request[] and recv_request[] could have one less element. */
2674b9ad928SBarry Smith   /* We make them longer to have request[i] corresponding to neigh[i].          */
2684b9ad928SBarry Smith   {
26913f74950SBarry Smith     PetscMPIInt tag;
2704b9ad928SBarry Smith     ierr         = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
271*dcca6d9dSJed Brown     ierr         = PetscMalloc2(n_neigh+1,&send_request,n_neigh+1,&recv_request);CHKERRQ(ierr);
2724b9ad928SBarry Smith     for (i=1; i<n_neigh; i++) {
273ce94432eSBarry Smith       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(send_request[i]));CHKERRQ(ierr);
274ce94432eSBarry Smith       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(recv_request[i]));CHKERRQ(ierr);
2754b9ad928SBarry Smith     }
2764b9ad928SBarry Smith   }
2774b9ad928SBarry Smith 
2784b9ad928SBarry Smith   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
2792fa5cd67SKarl Rupp   for (j=0; j<n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];
2804b9ad928SBarry Smith 
2814b9ad928SBarry Smith   /* Start computing with local D*Z while communication goes on.    */
2824b9ad928SBarry Smith   /* Apply Schur complement. The result is "stored" in vec (more    */
2834b9ad928SBarry Smith   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
2844b9ad928SBarry Smith   /* and also scattered to pcnn->work_N.                            */
2854b9ad928SBarry Smith   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
2864b9ad928SBarry Smith                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2874b9ad928SBarry Smith 
2884b9ad928SBarry Smith   /* Compute the first column, while completing the receiving. */
2894b9ad928SBarry Smith   for (i=0; i<n_neigh; i++) {
2904b9ad928SBarry Smith     MPI_Status  stat;
29113f74950SBarry Smith     PetscMPIInt ind=0;
2924b9ad928SBarry Smith     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
2934b9ad928SBarry Smith     mat[ind*n_neigh+0] = 0.0;
2942fa5cd67SKarl Rupp     for (k=0; k<n_shared[ind]; k++) mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
2954b9ad928SBarry Smith   }
2964b9ad928SBarry Smith 
2974b9ad928SBarry Smith   /* Compute the remaining of the columns */
2984b9ad928SBarry Smith   for (j=1; j<n_neigh; j++) {
2994b9ad928SBarry Smith     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
3004b9ad928SBarry Smith                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
3014b9ad928SBarry Smith     for (i=0; i<n_neigh; i++) {
3024b9ad928SBarry Smith       mat[i*n_neigh+j] = 0.0;
3032fa5cd67SKarl Rupp       for (k=0; k<n_shared[i]; k++) mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
3044b9ad928SBarry Smith     }
3054b9ad928SBarry Smith   }
3064b9ad928SBarry Smith 
3074b9ad928SBarry Smith   /* Complete the sending. */
3084b9ad928SBarry Smith   if (n_neigh>1) {
3094b9ad928SBarry Smith     MPI_Status *stat;
3104b9ad928SBarry Smith     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
3110c468ba9SBarry Smith     if (n_neigh-1) {ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);}
3124b9ad928SBarry Smith     ierr = PetscFree(stat);CHKERRQ(ierr);
3134b9ad928SBarry Smith   }
3144b9ad928SBarry Smith 
3154b9ad928SBarry Smith   /* Free the memory for the MPI requests */
316eb9baa12SBarry Smith   ierr = PetscFree2(send_request,recv_request);CHKERRQ(ierr);
3174b9ad928SBarry Smith 
3184b9ad928SBarry Smith   /* Free the memory for DZ_OUT */
3194b9ad928SBarry Smith   if (DZ_OUT) {
32005b42c5fSBarry Smith     ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr);
3214b9ad928SBarry Smith     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
3224b9ad928SBarry Smith   }
3234b9ad928SBarry Smith 
3244b9ad928SBarry Smith   {
32513f74950SBarry Smith     PetscMPIInt size;
326ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
3274b9ad928SBarry Smith     /* Create the global coarse vectors (rhs and solution). */
328ce94432eSBarry Smith     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
3294b9ad928SBarry Smith     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
330f204ca49SKris Buschelman     /* Create and set the global coarse AIJ matrix. */
331ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&(pcnn->coarse_mat));CHKERRQ(ierr);
332f69a0ea3SMatthew Knepley     ierr = MatSetSizes(pcnn->coarse_mat,1,1,size,size);CHKERRQ(ierr);
333f204ca49SKris Buschelman     ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr);
3340298fd71SBarry Smith     ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,NULL);CHKERRQ(ierr);
335761a349fSStefano Zampini     ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,NULL,n_neigh,NULL);CHKERRQ(ierr);
336761a349fSStefano Zampini     ierr = MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
337761a349fSStefano Zampini     ierr = MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3384b9ad928SBarry Smith     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
3394b9ad928SBarry Smith     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3404b9ad928SBarry Smith     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3414b9ad928SBarry Smith   }
3424b9ad928SBarry Smith 
3434b9ad928SBarry Smith   {
34413f74950SBarry Smith     PetscMPIInt rank;
3454b9ad928SBarry Smith     PetscScalar one = 1.0;
346ce94432eSBarry Smith     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3474b9ad928SBarry Smith     /* "Zero out" rows of not-purely-Neumann subdomains */
3484b9ad928SBarry Smith     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
3490298fd71SBarry Smith       ierr = MatZeroRows(pcnn->coarse_mat,0,NULL,one,0,0);CHKERRQ(ierr);
3504b9ad928SBarry Smith     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
3511c82e0cfSMatthew Knepley       PetscInt row = (PetscInt) rank;
3522b40b63fSBarry Smith       ierr = MatZeroRows(pcnn->coarse_mat,1,&row,one,0,0);CHKERRQ(ierr);
3534b9ad928SBarry Smith     }
3544b9ad928SBarry Smith   }
3554b9ad928SBarry Smith 
3564b9ad928SBarry Smith   /* Create the coarse linear solver context */
3574b9ad928SBarry Smith   {
3584b9ad928SBarry Smith     PC  pc_ctx, inner_pc;
35983ab6a24SBarry Smith     KSP inner_ksp;
36083ab6a24SBarry Smith 
361ce94432eSBarry Smith     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcnn->ksp_coarse);CHKERRQ(ierr);
3621cee3971SBarry Smith     ierr = PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);CHKERRQ(ierr);
3634b9ad928SBarry Smith     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3644b9ad928SBarry Smith     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
3654b9ad928SBarry Smith     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
3664b9ad928SBarry Smith     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
36783ab6a24SBarry Smith     ierr = PCRedundantGetKSP(pc_ctx,&inner_ksp);CHKERRQ(ierr);
36883ab6a24SBarry Smith     ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3694b9ad928SBarry Smith     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
3705c9740d6SBarry Smith     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
3714b9ad928SBarry Smith     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
3724b9ad928SBarry Smith     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
3734b9ad928SBarry Smith     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
3744b9ad928SBarry Smith   }
3754b9ad928SBarry Smith 
3764b9ad928SBarry Smith   /* Free the memory for mat */
3774b9ad928SBarry Smith   ierr = PetscFree(mat);CHKERRQ(ierr);
3784b9ad928SBarry Smith 
3794b9ad928SBarry Smith   /* for DEBUGGING, save the coarse matrix to a file. */
3804b9ad928SBarry Smith   {
381ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
3820298fd71SBarry Smith     ierr = PetscOptionsGetBool(NULL,"-pc_nn_save_coarse_matrix",&flg,NULL);CHKERRQ(ierr);
3834b9ad928SBarry Smith     if (flg) {
3844b9ad928SBarry Smith       PetscViewer viewer;
3854b9ad928SBarry Smith       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
3864b9ad928SBarry Smith       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3874b9ad928SBarry Smith       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
3886bf464f9SBarry Smith       ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3894b9ad928SBarry Smith     }
3904b9ad928SBarry Smith   }
3914b9ad928SBarry Smith 
3924b9ad928SBarry Smith   /*  Set the variable pcnn->factor_coarse_rhs. */
3934b9ad928SBarry Smith   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
3944b9ad928SBarry Smith 
3954b9ad928SBarry Smith   /* See historical note 02, at the bottom of this file. */
3964b9ad928SBarry Smith   PetscFunctionReturn(0);
3974b9ad928SBarry Smith }
3984b9ad928SBarry Smith 
3994b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
4004b9ad928SBarry Smith /*
4014b9ad928SBarry Smith    PCNNApplySchurToChunk -
4024b9ad928SBarry Smith 
4034b9ad928SBarry Smith    Input parameters:
4044b9ad928SBarry Smith .  pcnn
4054b9ad928SBarry Smith .  n - size of chunk
4064b9ad928SBarry Smith .  idx - indices of chunk
4074b9ad928SBarry Smith .  chunk - values
4084b9ad928SBarry Smith 
4094b9ad928SBarry Smith    Output parameters:
4104b9ad928SBarry Smith .  array_N - result of Schur complement applied to chunk, scattered to big array
4114b9ad928SBarry Smith .  vec1_B  - result of Schur complement applied to chunk
4124b9ad928SBarry Smith .  vec2_B  - garbage (used as work space)
4134b9ad928SBarry Smith .  vec1_D  - garbage (used as work space)
4144b9ad928SBarry Smith .  vec2_D  - garbage (used as work space)
4154b9ad928SBarry Smith 
4164b9ad928SBarry Smith */
4174b9ad928SBarry Smith #undef __FUNCT__
4184b9ad928SBarry Smith #define __FUNCT__ "PCNNApplySchurToChunk"
41913f74950SBarry Smith PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
4204b9ad928SBarry Smith {
4216849ba73SBarry Smith   PetscErrorCode ierr;
42213f74950SBarry Smith   PetscInt       i;
4234b9ad928SBarry Smith   PC_IS          *pcis = (PC_IS*)(pc->data);
4244b9ad928SBarry Smith 
4254b9ad928SBarry Smith   PetscFunctionBegin;
4264b9ad928SBarry Smith   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
4272fa5cd67SKarl Rupp   for (i=0; i<n; i++) array_N[idx[i]] = chunk[i];
4284b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
4294b9ad928SBarry Smith   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
4304b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
4314b9ad928SBarry Smith   PetscFunctionReturn(0);
4324b9ad928SBarry Smith }
4334b9ad928SBarry Smith 
4344b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
4354b9ad928SBarry Smith /*
4364b9ad928SBarry Smith    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
4374b9ad928SBarry Smith                                       the preconditioner for the Schur complement.
4384b9ad928SBarry Smith 
4394b9ad928SBarry Smith    Input parameter:
4404b9ad928SBarry Smith .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
4414b9ad928SBarry Smith 
4424b9ad928SBarry Smith    Output parameters:
4434b9ad928SBarry Smith .  z - global vector of interior and interface nodes. The values on the interface are the result of
4444b9ad928SBarry Smith        the application of the interface preconditioner to the interface part of r. The values on the
4454b9ad928SBarry Smith        interior nodes are garbage.
4464b9ad928SBarry Smith .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4474b9ad928SBarry Smith .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4484b9ad928SBarry Smith .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4494b9ad928SBarry Smith .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
4504b9ad928SBarry Smith .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
4514b9ad928SBarry Smith .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
4524b9ad928SBarry Smith .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4534b9ad928SBarry Smith .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
4544b9ad928SBarry Smith 
4554b9ad928SBarry Smith */
4564b9ad928SBarry Smith #undef __FUNCT__
4574b9ad928SBarry Smith #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
4582fa5cd67SKarl Rupp PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,Vec vec2_D, Vec vec1_N, Vec vec2_N)
4594b9ad928SBarry Smith {
460dfbe8321SBarry Smith   PetscErrorCode ierr;
4614b9ad928SBarry Smith   PC_IS          *pcis = (PC_IS*)(pc->data);
4624b9ad928SBarry Smith 
4634b9ad928SBarry Smith   PetscFunctionBegin;
4644b9ad928SBarry Smith   /*
4654b9ad928SBarry Smith     First balancing step.
4664b9ad928SBarry Smith   */
4674b9ad928SBarry Smith   {
468ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
4690298fd71SBarry Smith     ierr = PetscOptionsGetBool(NULL,"-pc_nn_turn_off_first_balancing",&flg,NULL);CHKERRQ(ierr);
4704b9ad928SBarry Smith     if (!flg) {
4714b9ad928SBarry Smith       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
4724b9ad928SBarry Smith     } else {
4734b9ad928SBarry Smith       ierr = VecCopy(r,z);CHKERRQ(ierr);
4744b9ad928SBarry Smith     }
4754b9ad928SBarry Smith   }
4764b9ad928SBarry Smith 
4774b9ad928SBarry Smith   /*
4784b9ad928SBarry Smith     Extract the local interface part of z and scale it by D
4794b9ad928SBarry Smith   */
480ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
481ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4822dcb1b2aSMatthew Knepley   ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
4834b9ad928SBarry Smith 
4844b9ad928SBarry Smith   /* Neumann Solver */
4854b9ad928SBarry Smith   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
4864b9ad928SBarry Smith 
4874b9ad928SBarry Smith   /*
4884b9ad928SBarry Smith     Second balancing step.
4894b9ad928SBarry Smith   */
4904b9ad928SBarry Smith   {
491ace3abfcSBarry Smith     PetscBool flg = PETSC_FALSE;
4920298fd71SBarry Smith     ierr = PetscOptionsGetBool(NULL,"-pc_turn_off_second_balancing",&flg,NULL);CHKERRQ(ierr);
4934b9ad928SBarry Smith     if (!flg) {
4944b9ad928SBarry Smith       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
4954b9ad928SBarry Smith     } else {
4962dcb1b2aSMatthew Knepley       ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
497efb30889SBarry Smith       ierr = VecSet(z,0.0);CHKERRQ(ierr);
498ca9f406cSSatish Balay       ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499ca9f406cSSatish Balay       ierr = VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5004b9ad928SBarry Smith     }
5014b9ad928SBarry Smith   }
5024b9ad928SBarry Smith   PetscFunctionReturn(0);
5034b9ad928SBarry Smith }
5044b9ad928SBarry Smith 
5054b9ad928SBarry Smith /* -------------------------------------------------------------------------- */
5064b9ad928SBarry Smith /*
5074b9ad928SBarry Smith    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
5084b9ad928SBarry Smith                    input argument u is provided), or s, as given in equations
5094b9ad928SBarry Smith                    (12) and (13), if the input argument u is a null vector.
5104b9ad928SBarry Smith                    Notice that the input argument u plays the role of u_i in
5114b9ad928SBarry Smith                    equation (14). The equation numbers refer to [Man93].
5124b9ad928SBarry Smith 
5134b9ad928SBarry Smith    Input Parameters:
5144b9ad928SBarry Smith .  pcnn - NN preconditioner context.
5154b9ad928SBarry Smith .  r - MPI vector of all nodes (interior and interface). It's preserved.
5164b9ad928SBarry Smith .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
5174b9ad928SBarry Smith 
5184b9ad928SBarry Smith    Output Parameters:
5194b9ad928SBarry Smith .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
5204b9ad928SBarry Smith .  vec1_B - Sequential vector of local interface nodes. Workspace.
5214b9ad928SBarry Smith .  vec2_B - Sequential vector of local interface nodes. Workspace.
5224b9ad928SBarry Smith .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
5234b9ad928SBarry Smith .  vec1_D - Sequential vector of local interior nodes. Workspace.
5244b9ad928SBarry Smith .  vec2_D - Sequential vector of local interior nodes. Workspace.
5254b9ad928SBarry Smith .  work_N - Array of all local nodes (interior and interface). Workspace.
5264b9ad928SBarry Smith 
5274b9ad928SBarry Smith */
5284b9ad928SBarry Smith #undef __FUNCT__
5294b9ad928SBarry Smith #define __FUNCT__ "PCNNBalancing"
5302fa5cd67SKarl Rupp PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
5314b9ad928SBarry Smith {
5326849ba73SBarry Smith   PetscErrorCode ierr;
53313f74950SBarry Smith   PetscInt       k;
5344b9ad928SBarry Smith   PetscScalar    value;
5354b9ad928SBarry Smith   PetscScalar    *lambda;
5364b9ad928SBarry Smith   PC_NN          *pcnn = (PC_NN*)(pc->data);
5374b9ad928SBarry Smith   PC_IS          *pcis = (PC_IS*)(pc->data);
5384b9ad928SBarry Smith 
5394b9ad928SBarry Smith   PetscFunctionBegin;
5404b9ad928SBarry Smith   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
5414b9ad928SBarry Smith   if (u) {
5422fa5cd67SKarl Rupp     if (!vec3_B) vec3_B = u;
5432dcb1b2aSMatthew Knepley     ierr = VecPointwiseMult(vec1_B,pcis->D,u);CHKERRQ(ierr);
544efb30889SBarry Smith     ierr = VecSet(z,0.0);CHKERRQ(ierr);
545ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
547ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
548ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5494b9ad928SBarry Smith     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
550efb30889SBarry Smith     ierr = VecScale(vec3_B,-1.0);CHKERRQ(ierr);
5514b9ad928SBarry Smith     ierr = VecCopy(r,z);CHKERRQ(ierr);
552ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
553ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5544b9ad928SBarry Smith   } else {
5554b9ad928SBarry Smith     ierr = VecCopy(r,z);CHKERRQ(ierr);
5564b9ad928SBarry Smith   }
557ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
558ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5594b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
5602fa5cd67SKarl Rupp   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
5614b9ad928SBarry Smith   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
5624b9ad928SBarry Smith   {
56313f74950SBarry Smith     PetscMPIInt rank;
564ce94432eSBarry Smith     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
5654b9ad928SBarry Smith     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
5664b9ad928SBarry Smith     /*
5674b9ad928SBarry Smith        Since we are only inserting local values (one value actually) we don't need to do the
5684b9ad928SBarry Smith        reduction that tells us there is no data that needs to be moved. Hence we comment out these
5694b9ad928SBarry Smith        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
5704b9ad928SBarry Smith        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
5714b9ad928SBarry Smith     */
5724b9ad928SBarry Smith   }
57323ce1328SBarry Smith   ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr);
574efb30889SBarry Smith   if (!u) { ierr = VecScale(pcnn->coarse_x,-1.0);CHKERRQ(ierr); }
5754b9ad928SBarry Smith   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
5762fa5cd67SKarl Rupp   for (k=0; k<pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
5774b9ad928SBarry Smith   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
5784b9ad928SBarry Smith   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
579efb30889SBarry Smith   ierr = VecSet(z,0.0);CHKERRQ(ierr);
580ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
581ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5824b9ad928SBarry Smith   if (!u) {
583ca9f406cSSatish Balay     ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
584ca9f406cSSatish Balay     ierr = VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5854b9ad928SBarry Smith     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
5864b9ad928SBarry Smith     ierr = VecCopy(r,z);CHKERRQ(ierr);
5874b9ad928SBarry Smith   }
588ca9f406cSSatish Balay   ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
589ca9f406cSSatish Balay   ierr = VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5904b9ad928SBarry Smith   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
5914b9ad928SBarry Smith   PetscFunctionReturn(0);
5924b9ad928SBarry Smith }
5934b9ad928SBarry Smith 
5944b9ad928SBarry Smith #undef __FUNCT__
5954b9ad928SBarry Smith 
5964b9ad928SBarry Smith 
5974b9ad928SBarry Smith 
5984b9ad928SBarry Smith /*  -------   E N D   O F   T H E   C O D E   -------  */
5994b9ad928SBarry Smith /*                                                     */
6004b9ad928SBarry Smith /*  From now on, "footnotes" (or "historical notes").  */
6014b9ad928SBarry Smith /*                                                     */
6024b9ad928SBarry Smith /*  -------------------------------------------------  */
6034b9ad928SBarry Smith 
6044b9ad928SBarry Smith 
6054b9ad928SBarry Smith 
6064b9ad928SBarry Smith /* --------------------------------------------------------------------------
6074b9ad928SBarry Smith    Historical note 01
6084b9ad928SBarry Smith    -------------------------------------------------------------------------- */
6094b9ad928SBarry Smith /*
6104b9ad928SBarry Smith    We considered the possibility of an alternative D_i that would still
6114b9ad928SBarry Smith    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
6124b9ad928SBarry Smith    The basic principle was still the pseudo-inverse of the counting
6134b9ad928SBarry Smith    function; the difference was that we would not count subdomains
6144b9ad928SBarry Smith    that do not contribute to the coarse space (i.e., not pure-Neumann
6154b9ad928SBarry Smith    subdomains).
6164b9ad928SBarry Smith 
6174b9ad928SBarry Smith    This turned out to be a bad idea:  we would solve trivial Neumann
6184b9ad928SBarry Smith    problems in the not pure-Neumann subdomains, since we would be scaling
6194b9ad928SBarry Smith    the balanced residual by zero.
6204b9ad928SBarry Smith */
6214b9ad928SBarry Smith 
6224b9ad928SBarry Smith 
6234b9ad928SBarry Smith 
6244b9ad928SBarry Smith 
6254b9ad928SBarry Smith /* --------------------------------------------------------------------------
6264b9ad928SBarry Smith    Historical note 02
6274b9ad928SBarry Smith    -------------------------------------------------------------------------- */
6284b9ad928SBarry Smith /*
6294b9ad928SBarry Smith    We tried an alternative coarse problem, that would eliminate exactly a
6304b9ad928SBarry Smith    constant error. Turned out not to improve the overall convergence.
6314b9ad928SBarry Smith */
6324b9ad928SBarry Smith 
6334b9ad928SBarry Smith 
634