xref: /petsc/src/vec/vec/impls/mpi/pbvec.c (revision d1ea7ff4799cf467dd337f0614c8ffa8e693e594)
1 /*
2    This file contains routines for Parallel vector operations.
3  */
4 #include <petscsys.h>
5 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
6 
7 PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec, PetscViewer);
8 
9 PetscErrorCode VecPlaceArray_MPI(Vec vin, const PetscScalar *a)
10 {
11   Vec_MPI *v = (Vec_MPI *)vin->data;
12 
13   PetscFunctionBegin;
14   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
15   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
16   v->array         = (PetscScalar *)a;
17   if (v->localrep) PetscCall(VecPlaceArray(v->localrep, a));
18   PetscFunctionReturn(PETSC_SUCCESS);
19 }
20 
21 PetscErrorCode VecDuplicate_MPI(Vec win, Vec *v)
22 {
23   Vec_MPI     *vw, *w = (Vec_MPI *)win->data;
24   PetscScalar *array;
25 
26   PetscFunctionBegin;
27   PetscCall(VecCreateWithLayout_Private(win->map, v));
28 
29   PetscCall(VecCreate_MPI_Private(*v, PETSC_TRUE, w->nghost, NULL));
30   vw           = (Vec_MPI *)(*v)->data;
31   (*v)->ops[0] = win->ops[0];
32 
33   /* save local representation of the parallel vector (and scatter) if it exists */
34   if (w->localrep) {
35     PetscCall(VecGetArray(*v, &array));
36     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscAbs(win->map->bs), win->map->n + w->nghost, array, &vw->localrep));
37     vw->localrep->ops[0] = w->localrep->ops[0];
38     PetscCall(VecRestoreArray(*v, &array));
39 
40     vw->localupdate = w->localupdate;
41     if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
42 
43     vw->ghost = w->ghost;
44     if (vw->ghost) PetscCall(PetscObjectReference((PetscObject)vw->ghost));
45   }
46 
47   /* New vector should inherit stashing property of parent */
48   (*v)->stash.donotstash   = win->stash.donotstash;
49   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
50 
51   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
52   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
53 
54   (*v)->bstash.bs = win->bstash.bs;
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode VecDuplicateVecs_MPI_GEMV(Vec w, PetscInt m, Vec *V[])
59 {
60   Vec_MPI *wmpi = (Vec_MPI *)w->data;
61 
62   PetscFunctionBegin;
63   // Currently only do GEMV for vectors without ghosts. Note w might be a VECMPI subclass object.
64   // This routine relies on the duplicate operation being VecDuplicate_MPI. If not, bail out to the default.
65   if (wmpi->nghost || w->ops->duplicate != VecDuplicate_MPI) {
66     w->ops->duplicatevecs = VecDuplicateVecs_Default;
67     PetscCall(VecDuplicateVecs(w, m, V));
68   } else {
69     PetscScalar *array;
70     PetscInt64   lda; // use 64-bit as we will do "m * lda"
71 
72     PetscCall(PetscMalloc1(m, V));
73     VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size
74 
75     PetscCall(PetscCalloc1(m * lda, &array));
76     for (PetscInt i = 0; i < m; i++) {
77       Vec v;
78       PetscCall(VecCreateMPIWithLayoutAndArray_Private(w->map, PetscSafePointerPlusOffset(array, i * lda), &v));
79       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
80       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
81       v->ops->view          = w->ops->view;
82       v->stash.donotstash   = w->stash.donotstash;
83       v->stash.ignorenegidx = w->stash.ignorenegidx;
84       v->stash.bs           = w->stash.bs;
85       (*V)[i]               = v;
86     }
87     // So when the first vector is destroyed it will destroy the array
88     if (m) ((Vec_MPI *)(*V)[0]->data)->array_allocated = array;
89     // disable replacearray of the first vector, as freeing its memory also frees others in the group.
90     // But replacearray of others is ok, as they don't own their array.
91     if (m > 1) (*V)[0]->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
92   }
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode VecSetOption_MPI(Vec V, VecOption op, PetscBool flag)
97 {
98   Vec_MPI *v = (Vec_MPI *)V->data;
99 
100   PetscFunctionBegin;
101   switch (op) {
102   case VEC_IGNORE_OFF_PROC_ENTRIES:
103     V->stash.donotstash = flag;
104     break;
105   case VEC_IGNORE_NEGATIVE_INDICES:
106     V->stash.ignorenegidx = flag;
107     break;
108   case VEC_SUBSET_OFF_PROC_ENTRIES:
109     v->assembly_subset = flag;              /* See the same logic in MatAssembly wrt MAT_SUBSET_OFF_PROC_ENTRIES */
110     if (!v->assembly_subset) {              /* User indicates "do not reuse the communication pattern" */
111       PetscCall(VecAssemblyReset_MPI(V));   /* Reset existing pattern to free memory */
112       v->first_assembly_done = PETSC_FALSE; /* Mark the first assembly is not done */
113     }
114     break;
115   }
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PetscErrorCode VecResetArray_MPI(Vec vin)
120 {
121   Vec_MPI *v = (Vec_MPI *)vin->data;
122 
123   PetscFunctionBegin;
124   v->array         = v->unplacedarray;
125   v->unplacedarray = NULL;
126   if (v->localrep) PetscCall(VecResetArray(v->localrep));
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode VecAssemblySend_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
131 {
132   Vec                X   = (Vec)ctx;
133   Vec_MPI           *x   = (Vec_MPI *)X->data;
134   VecAssemblyHeader *hdr = (VecAssemblyHeader *)sdata;
135   PetscInt           bs  = X->map->bs;
136 
137   PetscFunctionBegin;
138   /* x->first_assembly_done indicates we are reusing a communication network. In that case, some
139      messages can be empty, but we have to send them this time if we sent them before because the
140      receiver is expecting them.
141    */
142   if (hdr->count || (x->first_assembly_done && x->sendptrs[rankid].ints)) {
143     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
144     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
145   }
146   if (hdr->bcount || (x->first_assembly_done && x->sendptrs[rankid].intb)) {
147     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
148     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
149   }
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 static PetscErrorCode VecAssemblyRecv_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
154 {
155   Vec                X   = (Vec)ctx;
156   Vec_MPI           *x   = (Vec_MPI *)X->data;
157   VecAssemblyHeader *hdr = (VecAssemblyHeader *)rdata;
158   PetscInt           bs  = X->map->bs;
159   VecAssemblyFrame  *frame;
160 
161   PetscFunctionBegin;
162   PetscCall(PetscSegBufferGet(x->segrecvframe, 1, &frame));
163 
164   if (hdr->count) {
165     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->count, &frame->ints));
166     PetscCallMPI(MPI_Irecv(frame->ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
167     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->count, &frame->scalars));
168     PetscCallMPI(MPI_Irecv(frame->scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
169     frame->pendings = 2;
170   } else {
171     frame->ints     = NULL;
172     frame->scalars  = NULL;
173     frame->pendings = 0;
174   }
175 
176   if (hdr->bcount) {
177     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->bcount, &frame->intb));
178     PetscCallMPI(MPI_Irecv(frame->intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
179     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->bcount * bs, &frame->scalarb));
180     PetscCallMPI(MPI_Irecv(frame->scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
181     frame->pendingb = 2;
182   } else {
183     frame->intb     = NULL;
184     frame->scalarb  = NULL;
185     frame->pendingb = 0;
186   }
187   PetscFunctionReturn(PETSC_SUCCESS);
188 }
189 
190 static PetscErrorCode VecAssemblyBegin_MPI_BTS(Vec X)
191 {
192   Vec_MPI *x = (Vec_MPI *)X->data;
193   MPI_Comm comm;
194   PetscInt i, j, jb, bs;
195 
196   PetscFunctionBegin;
197   if (X->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
198 
199   PetscCall(PetscObjectGetComm((PetscObject)X, &comm));
200   PetscCall(VecGetBlockSize(X, &bs));
201   if (PetscDefined(USE_DEBUG)) {
202     InsertMode addv;
203     PetscCall(MPIU_Allreduce((PetscEnum *)&X->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
204     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
205   }
206   X->bstash.insertmode = X->stash.insertmode; /* Block stash implicitly tracks InsertMode of scalar stash */
207 
208   PetscCall(VecStashSortCompress_Private(&X->stash));
209   PetscCall(VecStashSortCompress_Private(&X->bstash));
210 
211   if (!x->sendranks) {
212     PetscMPIInt nowners, bnowners, *owners, *bowners;
213     PetscInt    ntmp;
214     PetscCall(VecStashGetOwnerList_Private(&X->stash, X->map, &nowners, &owners));
215     PetscCall(VecStashGetOwnerList_Private(&X->bstash, X->map, &bnowners, &bowners));
216     PetscCall(PetscMergeMPIIntArray(nowners, owners, bnowners, bowners, &ntmp, &x->sendranks));
217     x->nsendranks = ntmp;
218     PetscCall(PetscFree(owners));
219     PetscCall(PetscFree(bowners));
220     PetscCall(PetscMalloc1(x->nsendranks, &x->sendhdr));
221     PetscCall(PetscCalloc1(x->nsendranks, &x->sendptrs));
222   }
223   for (i = 0, j = 0, jb = 0; i < x->nsendranks; i++) {
224     PetscMPIInt rank         = x->sendranks[i];
225     x->sendhdr[i].insertmode = X->stash.insertmode;
226     /* Initialize pointers for non-empty stashes the first time around.  Subsequent assemblies with
227      * VEC_SUBSET_OFF_PROC_ENTRIES will leave the old pointers (dangling because the stash has been collected) when
228      * there is nothing new to send, so that size-zero messages get sent instead. */
229     x->sendhdr[i].count = 0;
230     if (X->stash.n) {
231       x->sendptrs[i].ints    = &X->stash.idx[j];
232       x->sendptrs[i].scalars = &X->stash.array[j];
233       for (; j < X->stash.n && X->stash.idx[j] < X->map->range[rank + 1]; j++) x->sendhdr[i].count++;
234     }
235     x->sendhdr[i].bcount = 0;
236     if (X->bstash.n) {
237       x->sendptrs[i].intb    = &X->bstash.idx[jb];
238       x->sendptrs[i].scalarb = &X->bstash.array[jb * bs];
239       for (; jb < X->bstash.n && X->bstash.idx[jb] * bs < X->map->range[rank + 1]; jb++) x->sendhdr[i].bcount++;
240     }
241   }
242 
243   if (!x->segrecvint) PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &x->segrecvint));
244   if (!x->segrecvscalar) PetscCall(PetscSegBufferCreate(sizeof(PetscScalar), 1000, &x->segrecvscalar));
245   if (!x->segrecvframe) PetscCall(PetscSegBufferCreate(sizeof(VecAssemblyFrame), 50, &x->segrecvframe));
246   if (x->first_assembly_done) { /* this is not the first assembly */
247     PetscMPIInt tag[4];
248     for (i = 0; i < 4; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
249     for (i = 0; i < x->nsendranks; i++) PetscCall(VecAssemblySend_MPI_Private(comm, tag, i, x->sendranks[i], x->sendhdr + i, x->sendreqs + 4 * i, X));
250     for (i = 0; i < x->nrecvranks; i++) PetscCall(VecAssemblyRecv_MPI_Private(comm, tag, x->recvranks[i], x->recvhdr + i, x->recvreqs + 4 * i, X));
251     x->use_status = PETSC_TRUE;
252   } else { /* First time assembly */
253     PetscCall(PetscCommBuildTwoSidedFReq(comm, 3, MPIU_INT, x->nsendranks, x->sendranks, (PetscInt *)x->sendhdr, &x->nrecvranks, &x->recvranks, &x->recvhdr, 4, &x->sendreqs, &x->recvreqs, VecAssemblySend_MPI_Private, VecAssemblyRecv_MPI_Private, X));
254     x->use_status = PETSC_FALSE;
255   }
256 
257   /* The first_assembly_done flag is only meaningful when x->assembly_subset is set.
258      This line says when assembly_subset is set, then we mark that the first assembly is done.
259    */
260   x->first_assembly_done = x->assembly_subset;
261 
262   {
263     PetscInt nstash, reallocs;
264     PetscCall(VecStashGetInfo_Private(&X->stash, &nstash, &reallocs));
265     PetscCall(PetscInfo(X, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
266     PetscCall(VecStashGetInfo_Private(&X->bstash, &nstash, &reallocs));
267     PetscCall(PetscInfo(X, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
268   }
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X)
273 {
274   Vec_MPI          *x  = (Vec_MPI *)X->data;
275   PetscInt          bs = X->map->bs;
276   PetscMPIInt       npending, *some_indices, r;
277   MPI_Status       *some_statuses;
278   PetscScalar      *xarray;
279   VecAssemblyFrame *frame;
280 
281   PetscFunctionBegin;
282   if (X->stash.donotstash) {
283     X->stash.insertmode  = NOT_SET_VALUES;
284     X->bstash.insertmode = NOT_SET_VALUES;
285     PetscFunctionReturn(PETSC_SUCCESS);
286   }
287 
288   PetscCheck(x->segrecvframe, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing segrecvframe! Probably you forgot to call VecAssemblyBegin() first");
289   PetscCall(VecGetArray(X, &xarray));
290   PetscCall(PetscSegBufferExtractInPlace(x->segrecvframe, &frame));
291   PetscCall(PetscMalloc2(4 * x->nrecvranks, &some_indices, x->use_status ? 4 * x->nrecvranks : 0, &some_statuses));
292   for (r = 0, npending = 0; r < x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb;
293   while (npending > 0) {
294     PetscMPIInt ndone = 0, ii;
295     /* Filling MPI_Status fields requires some resources from the MPI library.  We skip it on the first assembly, or
296      * when VEC_SUBSET_OFF_PROC_ENTRIES has not been set, because we could exchange exact sizes in the initial
297      * rendezvous.  When the rendezvous is elided, however, we use MPI_Status to get actual message lengths, so that
298      * subsequent assembly can set a proper subset of the values. */
299     PetscCallMPI(MPI_Waitsome(4 * x->nrecvranks, x->recvreqs, &ndone, some_indices, x->use_status ? some_statuses : MPI_STATUSES_IGNORE));
300     for (ii = 0; ii < ndone; ii++) {
301       PetscInt     i     = some_indices[ii] / 4, j, k;
302       InsertMode   imode = (InsertMode)x->recvhdr[i].insertmode;
303       PetscInt    *recvint;
304       PetscScalar *recvscalar;
305       PetscBool    intmsg   = (PetscBool)(some_indices[ii] % 2 == 0);
306       PetscBool    blockmsg = (PetscBool)((some_indices[ii] % 4) / 2 == 1);
307       npending--;
308       if (!blockmsg) { /* Scalar stash */
309         PetscMPIInt count;
310         if (--frame[i].pendings > 0) continue;
311         if (x->use_status) {
312           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
313         } else count = x->recvhdr[i].count;
314         for (j = 0, recvint = frame[i].ints, recvscalar = frame[i].scalars; j < count; j++, recvint++) {
315           PetscInt loc = *recvint - X->map->rstart;
316           PetscCheck(*recvint >= X->map->rstart && X->map->rend > *recvint, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Received vector entry %" PetscInt_FMT " out of local range [%" PetscInt_FMT ",%" PetscInt_FMT ")]", *recvint, X->map->rstart, X->map->rend);
317           switch (imode) {
318           case ADD_VALUES:
319             xarray[loc] += *recvscalar++;
320             break;
321           case INSERT_VALUES:
322             xarray[loc] = *recvscalar++;
323             break;
324           default:
325             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
326           }
327         }
328       } else { /* Block stash */
329         PetscMPIInt count;
330         if (--frame[i].pendingb > 0) continue;
331         if (x->use_status) {
332           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
333           if (!intmsg) count /= bs; /* Convert from number of scalars to number of blocks */
334         } else count = x->recvhdr[i].bcount;
335         for (j = 0, recvint = frame[i].intb, recvscalar = frame[i].scalarb; j < count; j++, recvint++) {
336           PetscInt loc = (*recvint) * bs - X->map->rstart;
337           switch (imode) {
338           case ADD_VALUES:
339             for (k = loc; k < loc + bs; k++) xarray[k] += *recvscalar++;
340             break;
341           case INSERT_VALUES:
342             for (k = loc; k < loc + bs; k++) xarray[k] = *recvscalar++;
343             break;
344           default:
345             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
346           }
347         }
348       }
349     }
350   }
351   PetscCall(VecRestoreArray(X, &xarray));
352   PetscCallMPI(MPI_Waitall(4 * x->nsendranks, x->sendreqs, MPI_STATUSES_IGNORE));
353   PetscCall(PetscFree2(some_indices, some_statuses));
354   if (x->assembly_subset) {
355     PetscCall(PetscSegBufferExtractInPlace(x->segrecvint, NULL));
356     PetscCall(PetscSegBufferExtractInPlace(x->segrecvscalar, NULL));
357   } else {
358     PetscCall(VecAssemblyReset_MPI(X));
359   }
360 
361   X->stash.insertmode  = NOT_SET_VALUES;
362   X->bstash.insertmode = NOT_SET_VALUES;
363   PetscCall(VecStashScatterEnd_Private(&X->stash));
364   PetscCall(VecStashScatterEnd_Private(&X->bstash));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 PetscErrorCode VecAssemblyReset_MPI(Vec X)
369 {
370   Vec_MPI *x = (Vec_MPI *)X->data;
371 
372   PetscFunctionBegin;
373   PetscCall(PetscFree(x->sendreqs));
374   PetscCall(PetscFree(x->recvreqs));
375   PetscCall(PetscFree(x->sendranks));
376   PetscCall(PetscFree(x->recvranks));
377   PetscCall(PetscFree(x->sendhdr));
378   PetscCall(PetscFree(x->recvhdr));
379   PetscCall(PetscFree(x->sendptrs));
380   PetscCall(PetscSegBufferDestroy(&x->segrecvint));
381   PetscCall(PetscSegBufferDestroy(&x->segrecvscalar));
382   PetscCall(PetscSegBufferDestroy(&x->segrecvframe));
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 static PetscErrorCode VecSetFromOptions_MPI(Vec X, PetscOptionItems *PetscOptionsObject)
387 {
388 #if !defined(PETSC_HAVE_MPIUNI)
389   PetscBool flg = PETSC_FALSE, set;
390 
391   PetscFunctionBegin;
392   PetscOptionsHeadBegin(PetscOptionsObject, "VecMPI Options");
393   PetscCall(PetscOptionsBool("-vec_assembly_legacy", "Use MPI 1 version of assembly", "", flg, &flg, &set));
394   if (set) {
395     X->ops->assemblybegin = flg ? VecAssemblyBegin_MPI : VecAssemblyBegin_MPI_BTS;
396     X->ops->assemblyend   = flg ? VecAssemblyEnd_MPI : VecAssemblyEnd_MPI_BTS;
397   }
398   PetscOptionsHeadEnd();
399 #else
400   PetscFunctionBegin;
401   X->ops->assemblybegin = VecAssemblyBegin_MPI;
402   X->ops->assemblyend   = VecAssemblyEnd_MPI;
403 #endif
404   PetscFunctionReturn(PETSC_SUCCESS);
405 }
406 
407 static PetscErrorCode VecGetLocalToGlobalMapping_MPI_VecGhost(Vec X, ISLocalToGlobalMapping *ltg)
408 {
409   PetscInt       *indices, n, nghost, rstart, i;
410   IS              ghostis;
411   const PetscInt *ghostidx;
412 
413   PetscFunctionBegin;
414   if (X->map->mapping) {
415     *ltg = X->map->mapping;
416     PetscFunctionReturn(PETSC_SUCCESS);
417   }
418   PetscCall(VecGhostGetGhostIS(X, &ghostis));
419   PetscCall(ISGetLocalSize(ghostis, &nghost));
420   PetscCall(VecGetLocalSize(X, &n));
421   PetscCall(ISGetIndices(ghostis, &ghostidx));
422   /* set local to global mapping for ghosted vector */
423   PetscCall(PetscMalloc1(n + nghost, &indices));
424   PetscCall(VecGetOwnershipRange(X, &rstart, NULL));
425   for (i = 0; i < n; i++) indices[i] = rstart + i;
426   PetscCall(PetscArraycpy(indices + n, ghostidx, nghost));
427   PetscCall(ISRestoreIndices(ghostis, &ghostidx));
428   PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, n + nghost, indices, PETSC_OWN_POINTER, &X->map->mapping));
429   *ltg = X->map->mapping;
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 static struct _VecOps DvOps = {PetscDesignatedInitializer(duplicate, VecDuplicate_MPI), /* 1 */
434                                PetscDesignatedInitializer(duplicatevecs, VecDuplicateVecs_Default),
435                                PetscDesignatedInitializer(destroyvecs, VecDestroyVecs_Default),
436                                PetscDesignatedInitializer(dot, VecDot_MPI),
437                                PetscDesignatedInitializer(mdot, VecMDot_MPI),
438                                PetscDesignatedInitializer(norm, VecNorm_MPI),
439                                PetscDesignatedInitializer(tdot, VecTDot_MPI),
440                                PetscDesignatedInitializer(mtdot, VecMTDot_MPI),
441                                PetscDesignatedInitializer(scale, VecScale_Seq),
442                                PetscDesignatedInitializer(copy, VecCopy_Seq), /* 10 */
443                                PetscDesignatedInitializer(set, VecSet_Seq),
444                                PetscDesignatedInitializer(swap, VecSwap_Seq),
445                                PetscDesignatedInitializer(axpy, VecAXPY_Seq),
446                                PetscDesignatedInitializer(axpby, VecAXPBY_Seq),
447                                PetscDesignatedInitializer(maxpy, VecMAXPY_Seq),
448                                PetscDesignatedInitializer(aypx, VecAYPX_Seq),
449                                PetscDesignatedInitializer(waxpy, VecWAXPY_Seq),
450                                PetscDesignatedInitializer(axpbypcz, VecAXPBYPCZ_Seq),
451                                PetscDesignatedInitializer(pointwisemult, VecPointwiseMult_Seq),
452                                PetscDesignatedInitializer(pointwisedivide, VecPointwiseDivide_Seq),
453                                PetscDesignatedInitializer(setvalues, VecSetValues_MPI), /* 20 */
454                                PetscDesignatedInitializer(assemblybegin, VecAssemblyBegin_MPI_BTS),
455                                PetscDesignatedInitializer(assemblyend, VecAssemblyEnd_MPI_BTS),
456                                PetscDesignatedInitializer(getarray, NULL),
457                                PetscDesignatedInitializer(getsize, VecGetSize_MPI),
458                                PetscDesignatedInitializer(getlocalsize, VecGetSize_Seq),
459                                PetscDesignatedInitializer(restorearray, NULL),
460                                PetscDesignatedInitializer(max, VecMax_MPI),
461                                PetscDesignatedInitializer(min, VecMin_MPI),
462                                PetscDesignatedInitializer(setrandom, VecSetRandom_Seq),
463                                PetscDesignatedInitializer(setoption, VecSetOption_MPI),
464                                PetscDesignatedInitializer(setvaluesblocked, VecSetValuesBlocked_MPI),
465                                PetscDesignatedInitializer(destroy, VecDestroy_MPI),
466                                PetscDesignatedInitializer(view, VecView_MPI),
467                                PetscDesignatedInitializer(placearray, VecPlaceArray_MPI),
468                                PetscDesignatedInitializer(replacearray, VecReplaceArray_Seq),
469                                PetscDesignatedInitializer(dot_local, VecDot_Seq),
470                                PetscDesignatedInitializer(tdot_local, VecTDot_Seq),
471                                PetscDesignatedInitializer(norm_local, VecNorm_Seq),
472                                PetscDesignatedInitializer(mdot_local, VecMDot_Seq),
473                                PetscDesignatedInitializer(mtdot_local, VecMTDot_Seq),
474                                PetscDesignatedInitializer(load, VecLoad_Default),
475                                PetscDesignatedInitializer(reciprocal, VecReciprocal_Default),
476                                PetscDesignatedInitializer(conjugate, VecConjugate_Seq),
477                                PetscDesignatedInitializer(setlocaltoglobalmapping, NULL),
478                                PetscDesignatedInitializer(getlocaltoglobalmapping, VecGetLocalToGlobalMapping_MPI_VecGhost),
479                                PetscDesignatedInitializer(setvalueslocal, NULL),
480                                PetscDesignatedInitializer(resetarray, VecResetArray_MPI),
481                                PetscDesignatedInitializer(setfromoptions, VecSetFromOptions_MPI), /*set from options */
482                                PetscDesignatedInitializer(maxpointwisedivide, VecMaxPointwiseDivide_Seq),
483                                PetscDesignatedInitializer(pointwisemax, VecPointwiseMax_Seq),
484                                PetscDesignatedInitializer(pointwisemaxabs, VecPointwiseMaxAbs_Seq),
485                                PetscDesignatedInitializer(pointwisemin, VecPointwiseMin_Seq),
486                                PetscDesignatedInitializer(getvalues, VecGetValues_MPI),
487                                PetscDesignatedInitializer(sqrt, NULL),
488                                PetscDesignatedInitializer(abs, NULL),
489                                PetscDesignatedInitializer(exp, NULL),
490                                PetscDesignatedInitializer(log, NULL),
491                                PetscDesignatedInitializer(shift, NULL),
492                                PetscDesignatedInitializer(create, NULL), /* really? */
493                                PetscDesignatedInitializer(stridegather, VecStrideGather_Default),
494                                PetscDesignatedInitializer(stridescatter, VecStrideScatter_Default),
495                                PetscDesignatedInitializer(dotnorm2, NULL),
496                                PetscDesignatedInitializer(getsubvector, NULL),
497                                PetscDesignatedInitializer(restoresubvector, NULL),
498                                PetscDesignatedInitializer(getarrayread, NULL),
499                                PetscDesignatedInitializer(restorearrayread, NULL),
500                                PetscDesignatedInitializer(stridesubsetgather, VecStrideSubSetGather_Default),
501                                PetscDesignatedInitializer(stridesubsetscatter, VecStrideSubSetScatter_Default),
502                                PetscDesignatedInitializer(viewnative, VecView_MPI),
503                                PetscDesignatedInitializer(loadnative, NULL),
504                                PetscDesignatedInitializer(createlocalvector, NULL),
505                                PetscDesignatedInitializer(getlocalvector, NULL),
506                                PetscDesignatedInitializer(restorelocalvector, NULL),
507                                PetscDesignatedInitializer(getlocalvectorread, NULL),
508                                PetscDesignatedInitializer(restorelocalvectorread, NULL),
509                                PetscDesignatedInitializer(bindtocpu, NULL),
510                                PetscDesignatedInitializer(getarraywrite, NULL),
511                                PetscDesignatedInitializer(restorearraywrite, NULL),
512                                PetscDesignatedInitializer(getarrayandmemtype, NULL),
513                                PetscDesignatedInitializer(restorearrayandmemtype, NULL),
514                                PetscDesignatedInitializer(getarrayreadandmemtype, NULL),
515                                PetscDesignatedInitializer(restorearrayreadandmemtype, NULL),
516                                PetscDesignatedInitializer(getarraywriteandmemtype, NULL),
517                                PetscDesignatedInitializer(restorearraywriteandmemtype, NULL),
518                                PetscDesignatedInitializer(concatenate, NULL),
519                                PetscDesignatedInitializer(sum, NULL),
520                                PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_MPI),
521                                PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_MPI),
522                                PetscDesignatedInitializer(errorwnorm, NULL)};
523 
524 /*
525     VecCreate_MPI_Private - Basic create routine called by VecCreate_MPI() (i.e. VecCreateMPI()),
526     VecCreateMPIWithArray(), VecCreate_Shared() (i.e. VecCreateShared()), VecCreateGhost(),
527     VecDuplicate_MPI(), VecCreateGhostWithArray(), VecDuplicate_MPI(), and VecDuplicate_Shared()
528 
529     If alloc is true and array is NULL then this routine allocates the space, otherwise
530     no space is allocated.
531 */
532 PetscErrorCode VecCreate_MPI_Private(Vec v, PetscBool alloc, PetscInt nghost, const PetscScalar array[])
533 {
534   Vec_MPI  *s;
535   PetscBool mdot_use_gemv  = PETSC_TRUE;
536   PetscBool maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
537 
538   PetscFunctionBegin;
539   PetscCall(PetscNew(&s));
540   v->data   = (void *)s;
541   v->ops[0] = DvOps;
542 
543   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
544   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));
545 
546   // allocate multiple vectors together
547   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPI_GEMV;
548 
549   if (mdot_use_gemv) {
550     v->ops[0].mdot        = VecMDot_MPI_GEMV;
551     v->ops[0].mdot_local  = VecMDot_Seq_GEMV;
552     v->ops[0].mtdot       = VecMTDot_MPI_GEMV;
553     v->ops[0].mtdot_local = VecMTDot_Seq_GEMV;
554   }
555   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_Seq_GEMV;
556 
557   s->nghost      = nghost;
558   v->petscnative = PETSC_TRUE;
559   if (array) v->offloadmask = PETSC_OFFLOAD_CPU;
560 
561   PetscCall(PetscLayoutSetUp(v->map));
562 
563   s->array           = (PetscScalar *)array;
564   s->array_allocated = NULL;
565   if (alloc && !array) {
566     PetscInt n = v->map->n + nghost;
567     PetscCall(PetscCalloc1(n, &s->array));
568     s->array_allocated = s->array;
569     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_2], 0));
570     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_1], 0));
571     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_INFINITY], 0));
572   }
573 
574   /* By default parallel vectors do not have local representation */
575   s->localrep    = NULL;
576   s->localupdate = NULL;
577   s->ghost       = NULL;
578 
579   v->stash.insertmode  = NOT_SET_VALUES;
580   v->bstash.insertmode = NOT_SET_VALUES;
581   /* create the stashes. The block-size for bstash is set later when
582      VecSetValuesBlocked is called.
583   */
584   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), 1, &v->stash));
585   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), PetscAbs(v->map->bs), &v->bstash));
586 
587 #if defined(PETSC_HAVE_MATLAB)
588   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", VecMatlabEnginePut_Default));
589   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", VecMatlabEngineGet_Default));
590 #endif
591   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPI));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 /*
596   Create a VECMPI with the given layout and array
597 
598   Collective
599 
600   Input Parameter:
601 + map   - the layout
602 - array - the array on host
603 
604   Output Parameter:
605 . V  - The vector object
606 */
607 PetscErrorCode VecCreateMPIWithLayoutAndArray_Private(PetscLayout map, const PetscScalar array[], Vec *V)
608 {
609   PetscFunctionBegin;
610   PetscCall(VecCreateWithLayout_Private(map, V));
611   PetscCall(VecCreate_MPI_Private(*V, PETSC_FALSE, 0, array));
612   PetscFunctionReturn(PETSC_SUCCESS);
613 }
614 
615 /*MC
616    VECMPI - VECMPI = "mpi" - The basic parallel vector
617 
618    Options Database Key:
619 . -vec_type mpi - sets the vector type to `VECMPI` during a call to `VecSetFromOptions()`
620 
621   Level: beginner
622 
623 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
624 M*/
625 
626 PetscErrorCode VecCreate_MPI(Vec vv)
627 {
628   PetscFunctionBegin;
629   PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, 0, NULL));
630   PetscFunctionReturn(PETSC_SUCCESS);
631 }
632 
633 /*MC
634    VECSTANDARD = "standard" - A `VECSEQ` on one process and `VECMPI` on more than one process
635 
636    Options Database Key:
637 . -vec_type standard - sets a vector type to standard on calls to `VecSetFromOptions()`
638 
639   Level: beginner
640 
641 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreateMPI()`
642 M*/
643 
644 PETSC_EXTERN PetscErrorCode VecCreate_Standard(Vec v)
645 {
646   PetscMPIInt size;
647 
648   PetscFunctionBegin;
649   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
650   if (size == 1) {
651     PetscCall(VecSetType(v, VECSEQ));
652   } else {
653     PetscCall(VecSetType(v, VECMPI));
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*@C
659   VecCreateMPIWithArray - Creates a parallel, array-style vector,
660   where the user provides the array space to store the vector values.
661 
662   Collective
663 
664   Input Parameters:
665 + comm  - the MPI communicator to use
666 . bs    - block size, same meaning as `VecSetBlockSize()`
667 . n     - local vector length, cannot be `PETSC_DECIDE`
668 . N     - global vector length (or `PETSC_DETERMINE` to have calculated)
669 - array - the user provided array to store the vector values
670 
671   Output Parameter:
672 . vv - the vector
673 
674   Level: intermediate
675 
676   Notes:
677   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the
678   same type as an existing vector.
679 
680   If the user-provided array is `NULL`, then `VecPlaceArray()` can be used
681   at a later stage to SET the array for storing the vector values.
682 
683   PETSc does NOT free `array` when the vector is destroyed via `VecDestroy()`.
684 
685   The user should not free `array` until the vector is destroyed.
686 
687 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
688           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
689 @*/
690 PetscErrorCode VecCreateMPIWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar array[], Vec *vv)
691 {
692   PetscFunctionBegin;
693   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
694   PetscCall(PetscSplitOwnership(comm, &n, &N));
695   PetscCall(VecCreate(comm, vv));
696   PetscCall(VecSetSizes(*vv, n, N));
697   PetscCall(VecSetBlockSize(*vv, bs));
698   PetscCall(VecCreate_MPI_Private(*vv, PETSC_FALSE, 0, array));
699   PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 
702 /*@C
703   VecCreateGhostWithArray - Creates a parallel vector with ghost padding on each processor;
704   the caller allocates the array space.
705 
706   Collective
707 
708   Input Parameters:
709 + comm   - the MPI communicator to use
710 . n      - local vector length
711 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
712 . nghost - number of local ghost points
713 . ghosts - global indices of ghost points (or `NULL` if not needed), these do not need to be in increasing order (sorted)
714 - array  - the space to store the vector values (as long as n + nghost)
715 
716   Output Parameter:
717 . vv - the global vector representation (without ghost points as part of vector)
718 
719   Level: advanced
720 
721   Notes:
722   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
723   of the vector.
724 
725   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
726 
727 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
728           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
729           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
730 @*/
731 PetscErrorCode VecCreateGhostWithArray(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
732 {
733   Vec_MPI     *w;
734   PetscScalar *larray;
735   IS           from, to;
736 
737   PetscFunctionBegin;
738   *vv = NULL;
739 
740   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
741   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
742   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
743   PetscCall(PetscSplitOwnership(comm, &n, &N));
744   /* Create global representation */
745   PetscCall(VecCreate(comm, vv));
746   PetscCall(VecSetSizes(*vv, n, N));
747   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost, array));
748   w = (Vec_MPI *)(*vv)->data;
749   /* Create local representation */
750   PetscCall(VecGetArray(*vv, &larray));
751   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
752   PetscCall(VecRestoreArray(*vv, &larray));
753 
754   /*
755        Create scatter context for scattering (updating) ghost values
756   */
757   PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
758   PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
759   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
760   PetscCall(ISDestroy(&to));
761 
762   w->ghost                            = from;
763   (*vv)->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 /*@
768   VecGhostGetGhostIS - Return ghosting indices of a ghost vector
769 
770   Input Parameters:
771 . X - ghost vector
772 
773   Output Parameter:
774 . ghost - ghosting indices
775 
776   Level: beginner
777 
778 .seealso: `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`
779 @*/
780 PetscErrorCode VecGhostGetGhostIS(Vec X, IS *ghost)
781 {
782   Vec_MPI  *w;
783   PetscBool flg;
784 
785   PetscFunctionBegin;
786   PetscValidType(X, 1);
787   PetscAssertPointer(ghost, 2);
788   PetscCall(PetscObjectTypeCompare((PetscObject)X, VECMPI, &flg));
789   PetscCheck(flg, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "VecGhostGetGhostIS was not supported for vec type %s", ((PetscObject)X)->type_name);
790   w      = (Vec_MPI *)(X)->data;
791   *ghost = w->ghost;
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 /*@
796   VecCreateGhost - Creates a parallel vector with ghost padding on each processor.
797 
798   Collective
799 
800   Input Parameters:
801 + comm   - the MPI communicator to use
802 . n      - local vector length
803 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
804 . nghost - number of local ghost points
805 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
806 
807   Output Parameter:
808 . vv - the global vector representation (without ghost points as part of vector)
809 
810   Level: advanced
811 
812   Notes:
813   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
814   of the vector.
815 
816   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
817 
818 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
819           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
820           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
821           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
822 
823 @*/
824 PetscErrorCode VecCreateGhost(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
825 {
826   PetscFunctionBegin;
827   PetscCall(VecCreateGhostWithArray(comm, n, N, nghost, ghosts, NULL, vv));
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
831 /*@
832   VecMPISetGhost - Sets the ghost points for an MPI ghost vector
833 
834   Collective
835 
836   Input Parameters:
837 + vv     - the MPI vector
838 . nghost - number of local ghost points
839 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
840 
841   Level: advanced
842 
843   Notes:
844   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
845   of the vector.
846 
847   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
848 
849   You must call this AFTER you have set the type of the vector (with` VecSetType()`) and the size (with `VecSetSizes()`).
850 
851 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
852           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
853           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
854           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`
855 @*/
856 PetscErrorCode VecMPISetGhost(Vec vv, PetscInt nghost, const PetscInt ghosts[])
857 {
858   PetscBool flg;
859 
860   PetscFunctionBegin;
861   PetscCall(PetscObjectTypeCompare((PetscObject)vv, VECMPI, &flg));
862   /* if already fully existent VECMPI then basically destroy it and rebuild with ghosting */
863   if (flg) {
864     PetscInt     n, N;
865     Vec_MPI     *w;
866     PetscScalar *larray;
867     IS           from, to;
868     MPI_Comm     comm;
869 
870     PetscCall(PetscObjectGetComm((PetscObject)vv, &comm));
871     n = vv->map->n;
872     N = vv->map->N;
873     PetscUseTypeMethod(vv, destroy);
874     PetscCall(VecSetSizes(vv, n, N));
875     PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, nghost, NULL));
876     w = (Vec_MPI *)(vv)->data;
877     /* Create local representation */
878     PetscCall(VecGetArray(vv, &larray));
879     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
880     PetscCall(VecRestoreArray(vv, &larray));
881 
882     /*
883      Create scatter context for scattering (updating) ghost values
884      */
885     PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
886     PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
887     PetscCall(VecScatterCreate(vv, from, w->localrep, to, &w->localupdate));
888     PetscCall(ISDestroy(&to));
889 
890     w->ghost                         = from;
891     vv->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
892   } else {
893     PetscCheck(vv->ops->create != VecCreate_MPI, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set local or global size before setting ghosting");
894     PetscCheck(((PetscObject)vv)->type_name, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set type to VECMPI before ghosting");
895   }
896   PetscFunctionReturn(PETSC_SUCCESS);
897 }
898 
899 /* ------------------------------------------------------------------------------------------*/
900 /*@C
901   VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor;
902   the caller allocates the array space. Indices in the ghost region are based on blocks.
903 
904   Collective
905 
906   Input Parameters:
907 + comm   - the MPI communicator to use
908 . bs     - block size
909 . n      - local vector length
910 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
911 . nghost - number of local ghost blocks
912 . ghosts - global indices of ghost blocks (or `NULL` if not needed), counts are by block not by index, these do not need to be in increasing order (sorted)
913 - array  - the space to store the vector values (as long as `n + nghost*bs`)
914 
915   Output Parameter:
916 . vv - the global vector representation (without ghost points as part of vector)
917 
918   Level: advanced
919 
920   Notes:
921   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
922   of the vector.
923 
924   n is the local vector size (total local size not the number of blocks) while nghost
925   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
926   portion is bs*nghost
927 
928 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
929           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
930           `VecCreateGhostWithArray()`, `VecCreateGhostBlock()`
931 @*/
932 PetscErrorCode VecCreateGhostBlockWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
933 {
934   Vec_MPI               *w;
935   PetscScalar           *larray;
936   IS                     from, to;
937   ISLocalToGlobalMapping ltog;
938   PetscInt               rstart, i, nb, *indices;
939 
940   PetscFunctionBegin;
941   *vv = NULL;
942 
943   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
944   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
945   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
946   PetscCheck(n % bs == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local size must be a multiple of block size");
947   PetscCall(PetscSplitOwnership(comm, &n, &N));
948   /* Create global representation */
949   PetscCall(VecCreate(comm, vv));
950   PetscCall(VecSetSizes(*vv, n, N));
951   PetscCall(VecSetBlockSize(*vv, bs));
952   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost * bs, array));
953   w = (Vec_MPI *)(*vv)->data;
954   /* Create local representation */
955   PetscCall(VecGetArray(*vv, &larray));
956   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, bs, n + bs * nghost, larray, &w->localrep));
957   PetscCall(VecRestoreArray(*vv, &larray));
958 
959   /*
960        Create scatter context for scattering (updating) ghost values
961   */
962   PetscCall(ISCreateBlock(comm, bs, nghost, ghosts, PETSC_COPY_VALUES, &from));
963   PetscCall(ISCreateStride(PETSC_COMM_SELF, bs * nghost, n, 1, &to));
964   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
965   PetscCall(ISDestroy(&to));
966   PetscCall(ISDestroy(&from));
967 
968   /* set local to global mapping for ghosted vector */
969   nb = n / bs;
970   PetscCall(PetscMalloc1(nb + nghost, &indices));
971   PetscCall(VecGetOwnershipRange(*vv, &rstart, NULL));
972   rstart = rstart / bs;
973 
974   for (i = 0; i < nb; i++) indices[i] = rstart + i;
975   for (i = 0; i < nghost; i++) indices[nb + i] = ghosts[i];
976 
977   PetscCall(ISLocalToGlobalMappingCreate(comm, bs, nb + nghost, indices, PETSC_OWN_POINTER, &ltog));
978   PetscCall(VecSetLocalToGlobalMapping(*vv, ltog));
979   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
980   PetscFunctionReturn(PETSC_SUCCESS);
981 }
982 
983 /*@
984   VecCreateGhostBlock - Creates a parallel vector with ghost padding on each processor.
985   The indicing of the ghost points is done with blocks.
986 
987   Collective
988 
989   Input Parameters:
990 + comm   - the MPI communicator to use
991 . bs     - the block size
992 . n      - local vector length
993 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
994 . nghost - number of local ghost blocks
995 - ghosts - global indices of ghost blocks, counts are by block, not by individual index, these do not need to be in increasing order (sorted)
996 
997   Output Parameter:
998 . vv - the global vector representation (without ghost points as part of vector)
999 
1000   Level: advanced
1001 
1002   Notes:
1003   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
1004   of the vector.
1005 
1006   `n` is the local vector size (total local size not the number of blocks) while `nghost`
1007   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
1008   portion is `bs*nghost`
1009 
1010 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
1011           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
1012           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecCreateGhostBlockWithArray()`
1013 @*/
1014 PetscErrorCode VecCreateGhostBlock(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
1015 {
1016   PetscFunctionBegin;
1017   PetscCall(VecCreateGhostBlockWithArray(comm, bs, n, N, nghost, ghosts, NULL, vv));
1018   PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020