xref: /petsc/src/vec/vec/impls/mpi/pbvec.c (revision 9bcc50f161f00fed78b30f628c8384653fee89b4)
1 /*
2    This file contains routines for Parallel vector operations.
3  */
4 #include <petscsys.h>
5 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
6 
7 PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec, PetscViewer);
8 
9 PetscErrorCode VecPlaceArray_MPI(Vec vin, const PetscScalar *a)
10 {
11   Vec_MPI *v = (Vec_MPI *)vin->data;
12 
13   PetscFunctionBegin;
14   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
15   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
16   v->array         = (PetscScalar *)a;
17   if (v->localrep) PetscCall(VecPlaceArray(v->localrep, a));
18   PetscFunctionReturn(PETSC_SUCCESS);
19 }
20 
21 PetscErrorCode VecDuplicate_MPI(Vec win, Vec *v)
22 {
23   Vec_MPI     *vw, *w = (Vec_MPI *)win->data;
24   PetscScalar *array;
25 
26   PetscFunctionBegin;
27   PetscCall(VecCreateWithLayout_Private(win->map, v));
28 
29   PetscCall(VecCreate_MPI_Private(*v, PETSC_TRUE, w->nghost, NULL));
30   vw           = (Vec_MPI *)(*v)->data;
31   (*v)->ops[0] = win->ops[0];
32 
33   /* save local representation of the parallel vector (and scatter) if it exists */
34   if (w->localrep) {
35     PetscCall(VecGetArray(*v, &array));
36     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscAbs(win->map->bs), win->map->n + w->nghost, array, &vw->localrep));
37     vw->localrep->ops[0] = w->localrep->ops[0];
38     PetscCall(VecRestoreArray(*v, &array));
39 
40     vw->localupdate = w->localupdate;
41     if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
42 
43     vw->ghost = w->ghost;
44     if (vw->ghost) PetscCall(PetscObjectReference((PetscObject)vw->ghost));
45   }
46 
47   /* New vector should inherit stashing property of parent */
48   (*v)->stash.donotstash   = win->stash.donotstash;
49   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
50 
51   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*v))->olist));
52   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*v))->qlist));
53 
54   (*v)->bstash.bs = win->bstash.bs;
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode VecDuplicateVecs_MPI_GEMV(Vec w, PetscInt m, Vec *V[])
59 {
60   Vec_MPI *wmpi = (Vec_MPI *)w->data;
61 
62   PetscFunctionBegin;
63   // Currently only do GEMV for vectors without ghosts. Note w might be a VECMPI subclass object.
64   // This routine relies on the duplicate operation being VecDuplicate_MPI. If not, bail out to the default.
65   if (wmpi->nghost || w->ops->duplicate != VecDuplicate_MPI) {
66     w->ops->duplicatevecs = VecDuplicateVecs_Default;
67     PetscCall(VecDuplicateVecs(w, m, V));
68   } else {
69     PetscInt     nlocal;
70     PetscScalar *array;
71     PetscInt64   lda; // use 64-bit as we will do "m * lda"
72 
73     PetscCall(PetscMalloc1(m, V));
74     PetscCall(VecGetLocalSize(w, &nlocal));
75     lda = nlocal;
76     lda = ((lda + 31) / 32) * 32; // make every vector 32-elements aligned
77 
78     PetscCall(PetscCalloc1(m * lda, &array));
79     for (PetscInt i = 0; i < m; i++) {
80       Vec v;
81       PetscCall(VecCreateMPIWithLayoutAndArray_Private(w->map, PetscSafePointerPlusOffset(array, i * lda), &v));
82       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
83       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
84       v->ops->view          = w->ops->view;
85       v->stash.donotstash   = w->stash.donotstash;
86       v->stash.ignorenegidx = w->stash.ignorenegidx;
87       v->stash.bs           = w->stash.bs;
88       (*V)[i]               = v;
89     }
90     // So when the first vector is destroyed it will destroy the array
91     if (m) ((Vec_MPI *)(*V)[0]->data)->array_allocated = array;
92     // disable replacearray of the first vector, as freeing its memory also frees others in the group.
93     // But replacearray of others is ok, as they don't own their array.
94     if (m > 1) (*V)[0]->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
95   }
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 static PetscErrorCode VecSetOption_MPI(Vec V, VecOption op, PetscBool flag)
100 {
101   Vec_MPI *v = (Vec_MPI *)V->data;
102 
103   PetscFunctionBegin;
104   switch (op) {
105   case VEC_IGNORE_OFF_PROC_ENTRIES:
106     V->stash.donotstash = flag;
107     break;
108   case VEC_IGNORE_NEGATIVE_INDICES:
109     V->stash.ignorenegidx = flag;
110     break;
111   case VEC_SUBSET_OFF_PROC_ENTRIES:
112     v->assembly_subset = flag;              /* See the same logic in MatAssembly wrt MAT_SUBSET_OFF_PROC_ENTRIES */
113     if (!v->assembly_subset) {              /* User indicates "do not reuse the communication pattern" */
114       PetscCall(VecAssemblyReset_MPI(V));   /* Reset existing pattern to free memory */
115       v->first_assembly_done = PETSC_FALSE; /* Mark the first assembly is not done */
116     }
117     break;
118   }
119 
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 PetscErrorCode VecResetArray_MPI(Vec vin)
124 {
125   Vec_MPI *v = (Vec_MPI *)vin->data;
126 
127   PetscFunctionBegin;
128   v->array         = v->unplacedarray;
129   v->unplacedarray = NULL;
130   if (v->localrep) PetscCall(VecResetArray(v->localrep));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 static PetscErrorCode VecAssemblySend_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
135 {
136   Vec                X   = (Vec)ctx;
137   Vec_MPI           *x   = (Vec_MPI *)X->data;
138   VecAssemblyHeader *hdr = (VecAssemblyHeader *)sdata;
139   PetscInt           bs  = X->map->bs;
140 
141   PetscFunctionBegin;
142   /* x->first_assembly_done indicates we are reusing a communication network. In that case, some
143      messages can be empty, but we have to send them this time if we sent them before because the
144      receiver is expecting them.
145    */
146   if (hdr->count || (x->first_assembly_done && x->sendptrs[rankid].ints)) {
147     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
148     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
149   }
150   if (hdr->bcount || (x->first_assembly_done && x->sendptrs[rankid].intb)) {
151     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
152     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
153   }
154   PetscFunctionReturn(PETSC_SUCCESS);
155 }
156 
157 static PetscErrorCode VecAssemblyRecv_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
158 {
159   Vec                X   = (Vec)ctx;
160   Vec_MPI           *x   = (Vec_MPI *)X->data;
161   VecAssemblyHeader *hdr = (VecAssemblyHeader *)rdata;
162   PetscInt           bs  = X->map->bs;
163   VecAssemblyFrame  *frame;
164 
165   PetscFunctionBegin;
166   PetscCall(PetscSegBufferGet(x->segrecvframe, 1, &frame));
167 
168   if (hdr->count) {
169     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->count, &frame->ints));
170     PetscCallMPI(MPI_Irecv(frame->ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
171     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->count, &frame->scalars));
172     PetscCallMPI(MPI_Irecv(frame->scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
173     frame->pendings = 2;
174   } else {
175     frame->ints     = NULL;
176     frame->scalars  = NULL;
177     frame->pendings = 0;
178   }
179 
180   if (hdr->bcount) {
181     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->bcount, &frame->intb));
182     PetscCallMPI(MPI_Irecv(frame->intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
183     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->bcount * bs, &frame->scalarb));
184     PetscCallMPI(MPI_Irecv(frame->scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
185     frame->pendingb = 2;
186   } else {
187     frame->intb     = NULL;
188     frame->scalarb  = NULL;
189     frame->pendingb = 0;
190   }
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode VecAssemblyBegin_MPI_BTS(Vec X)
195 {
196   Vec_MPI *x = (Vec_MPI *)X->data;
197   MPI_Comm comm;
198   PetscInt i, j, jb, bs;
199 
200   PetscFunctionBegin;
201   if (X->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
202 
203   PetscCall(PetscObjectGetComm((PetscObject)X, &comm));
204   PetscCall(VecGetBlockSize(X, &bs));
205   if (PetscDefined(USE_DEBUG)) {
206     InsertMode addv;
207     PetscCall(MPIU_Allreduce((PetscEnum *)&X->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
208     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
209   }
210   X->bstash.insertmode = X->stash.insertmode; /* Block stash implicitly tracks InsertMode of scalar stash */
211 
212   PetscCall(VecStashSortCompress_Private(&X->stash));
213   PetscCall(VecStashSortCompress_Private(&X->bstash));
214 
215   if (!x->sendranks) {
216     PetscMPIInt nowners, bnowners, *owners, *bowners;
217     PetscInt    ntmp;
218     PetscCall(VecStashGetOwnerList_Private(&X->stash, X->map, &nowners, &owners));
219     PetscCall(VecStashGetOwnerList_Private(&X->bstash, X->map, &bnowners, &bowners));
220     PetscCall(PetscMergeMPIIntArray(nowners, owners, bnowners, bowners, &ntmp, &x->sendranks));
221     x->nsendranks = ntmp;
222     PetscCall(PetscFree(owners));
223     PetscCall(PetscFree(bowners));
224     PetscCall(PetscMalloc1(x->nsendranks, &x->sendhdr));
225     PetscCall(PetscCalloc1(x->nsendranks, &x->sendptrs));
226   }
227   for (i = 0, j = 0, jb = 0; i < x->nsendranks; i++) {
228     PetscMPIInt rank         = x->sendranks[i];
229     x->sendhdr[i].insertmode = X->stash.insertmode;
230     /* Initialize pointers for non-empty stashes the first time around.  Subsequent assemblies with
231      * VEC_SUBSET_OFF_PROC_ENTRIES will leave the old pointers (dangling because the stash has been collected) when
232      * there is nothing new to send, so that size-zero messages get sent instead. */
233     x->sendhdr[i].count = 0;
234     if (X->stash.n) {
235       x->sendptrs[i].ints    = &X->stash.idx[j];
236       x->sendptrs[i].scalars = &X->stash.array[j];
237       for (; j < X->stash.n && X->stash.idx[j] < X->map->range[rank + 1]; j++) x->sendhdr[i].count++;
238     }
239     x->sendhdr[i].bcount = 0;
240     if (X->bstash.n) {
241       x->sendptrs[i].intb    = &X->bstash.idx[jb];
242       x->sendptrs[i].scalarb = &X->bstash.array[jb * bs];
243       for (; jb < X->bstash.n && X->bstash.idx[jb] * bs < X->map->range[rank + 1]; jb++) x->sendhdr[i].bcount++;
244     }
245   }
246 
247   if (!x->segrecvint) PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &x->segrecvint));
248   if (!x->segrecvscalar) PetscCall(PetscSegBufferCreate(sizeof(PetscScalar), 1000, &x->segrecvscalar));
249   if (!x->segrecvframe) PetscCall(PetscSegBufferCreate(sizeof(VecAssemblyFrame), 50, &x->segrecvframe));
250   if (x->first_assembly_done) { /* this is not the first assembly */
251     PetscMPIInt tag[4];
252     for (i = 0; i < 4; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
253     for (i = 0; i < x->nsendranks; i++) PetscCall(VecAssemblySend_MPI_Private(comm, tag, i, x->sendranks[i], x->sendhdr + i, x->sendreqs + 4 * i, X));
254     for (i = 0; i < x->nrecvranks; i++) PetscCall(VecAssemblyRecv_MPI_Private(comm, tag, x->recvranks[i], x->recvhdr + i, x->recvreqs + 4 * i, X));
255     x->use_status = PETSC_TRUE;
256   } else { /* First time assembly */
257     PetscCall(PetscCommBuildTwoSidedFReq(comm, 3, MPIU_INT, x->nsendranks, x->sendranks, (PetscInt *)x->sendhdr, &x->nrecvranks, &x->recvranks, &x->recvhdr, 4, &x->sendreqs, &x->recvreqs, VecAssemblySend_MPI_Private, VecAssemblyRecv_MPI_Private, X));
258     x->use_status = PETSC_FALSE;
259   }
260 
261   /* The first_assembly_done flag is only meaningful when x->assembly_subset is set.
262      This line says when assembly_subset is set, then we mark that the first assembly is done.
263    */
264   x->first_assembly_done = x->assembly_subset;
265 
266   {
267     PetscInt nstash, reallocs;
268     PetscCall(VecStashGetInfo_Private(&X->stash, &nstash, &reallocs));
269     PetscCall(PetscInfo(X, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
270     PetscCall(VecStashGetInfo_Private(&X->bstash, &nstash, &reallocs));
271     PetscCall(PetscInfo(X, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
272   }
273   PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 
276 static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X)
277 {
278   Vec_MPI          *x  = (Vec_MPI *)X->data;
279   PetscInt          bs = X->map->bs;
280   PetscMPIInt       npending, *some_indices, r;
281   MPI_Status       *some_statuses;
282   PetscScalar      *xarray;
283   VecAssemblyFrame *frame;
284 
285   PetscFunctionBegin;
286   if (X->stash.donotstash) {
287     X->stash.insertmode  = NOT_SET_VALUES;
288     X->bstash.insertmode = NOT_SET_VALUES;
289     PetscFunctionReturn(PETSC_SUCCESS);
290   }
291 
292   PetscCheck(x->segrecvframe, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing segrecvframe! Probably you forgot to call VecAssemblyBegin first");
293   PetscCall(VecGetArray(X, &xarray));
294   PetscCall(PetscSegBufferExtractInPlace(x->segrecvframe, &frame));
295   PetscCall(PetscMalloc2(4 * x->nrecvranks, &some_indices, x->use_status ? 4 * x->nrecvranks : 0, &some_statuses));
296   for (r = 0, npending = 0; r < x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb;
297   while (npending > 0) {
298     PetscMPIInt ndone = 0, ii;
299     /* Filling MPI_Status fields requires some resources from the MPI library.  We skip it on the first assembly, or
300      * when VEC_SUBSET_OFF_PROC_ENTRIES has not been set, because we could exchange exact sizes in the initial
301      * rendezvous.  When the rendezvous is elided, however, we use MPI_Status to get actual message lengths, so that
302      * subsequent assembly can set a proper subset of the values. */
303     PetscCallMPI(MPI_Waitsome(4 * x->nrecvranks, x->recvreqs, &ndone, some_indices, x->use_status ? some_statuses : MPI_STATUSES_IGNORE));
304     for (ii = 0; ii < ndone; ii++) {
305       PetscInt     i     = some_indices[ii] / 4, j, k;
306       InsertMode   imode = (InsertMode)x->recvhdr[i].insertmode;
307       PetscInt    *recvint;
308       PetscScalar *recvscalar;
309       PetscBool    intmsg   = (PetscBool)(some_indices[ii] % 2 == 0);
310       PetscBool    blockmsg = (PetscBool)((some_indices[ii] % 4) / 2 == 1);
311       npending--;
312       if (!blockmsg) { /* Scalar stash */
313         PetscMPIInt count;
314         if (--frame[i].pendings > 0) continue;
315         if (x->use_status) {
316           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
317         } else count = x->recvhdr[i].count;
318         for (j = 0, recvint = frame[i].ints, recvscalar = frame[i].scalars; j < count; j++, recvint++) {
319           PetscInt loc = *recvint - X->map->rstart;
320           PetscCheck(*recvint >= X->map->rstart && X->map->rend > *recvint, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Received vector entry %" PetscInt_FMT " out of local range [%" PetscInt_FMT ",%" PetscInt_FMT ")]", *recvint, X->map->rstart, X->map->rend);
321           switch (imode) {
322           case ADD_VALUES:
323             xarray[loc] += *recvscalar++;
324             break;
325           case INSERT_VALUES:
326             xarray[loc] = *recvscalar++;
327             break;
328           default:
329             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
330           }
331         }
332       } else { /* Block stash */
333         PetscMPIInt count;
334         if (--frame[i].pendingb > 0) continue;
335         if (x->use_status) {
336           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
337           if (!intmsg) count /= bs; /* Convert from number of scalars to number of blocks */
338         } else count = x->recvhdr[i].bcount;
339         for (j = 0, recvint = frame[i].intb, recvscalar = frame[i].scalarb; j < count; j++, recvint++) {
340           PetscInt loc = (*recvint) * bs - X->map->rstart;
341           switch (imode) {
342           case ADD_VALUES:
343             for (k = loc; k < loc + bs; k++) xarray[k] += *recvscalar++;
344             break;
345           case INSERT_VALUES:
346             for (k = loc; k < loc + bs; k++) xarray[k] = *recvscalar++;
347             break;
348           default:
349             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
350           }
351         }
352       }
353     }
354   }
355   PetscCall(VecRestoreArray(X, &xarray));
356   PetscCallMPI(MPI_Waitall(4 * x->nsendranks, x->sendreqs, MPI_STATUSES_IGNORE));
357   PetscCall(PetscFree2(some_indices, some_statuses));
358   if (x->assembly_subset) {
359     PetscCall(PetscSegBufferExtractInPlace(x->segrecvint, NULL));
360     PetscCall(PetscSegBufferExtractInPlace(x->segrecvscalar, NULL));
361   } else {
362     PetscCall(VecAssemblyReset_MPI(X));
363   }
364 
365   X->stash.insertmode  = NOT_SET_VALUES;
366   X->bstash.insertmode = NOT_SET_VALUES;
367   PetscCall(VecStashScatterEnd_Private(&X->stash));
368   PetscCall(VecStashScatterEnd_Private(&X->bstash));
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
372 PetscErrorCode VecAssemblyReset_MPI(Vec X)
373 {
374   Vec_MPI *x = (Vec_MPI *)X->data;
375 
376   PetscFunctionBegin;
377   PetscCall(PetscFree(x->sendreqs));
378   PetscCall(PetscFree(x->recvreqs));
379   PetscCall(PetscFree(x->sendranks));
380   PetscCall(PetscFree(x->recvranks));
381   PetscCall(PetscFree(x->sendhdr));
382   PetscCall(PetscFree(x->recvhdr));
383   PetscCall(PetscFree(x->sendptrs));
384   PetscCall(PetscSegBufferDestroy(&x->segrecvint));
385   PetscCall(PetscSegBufferDestroy(&x->segrecvscalar));
386   PetscCall(PetscSegBufferDestroy(&x->segrecvframe));
387   PetscFunctionReturn(PETSC_SUCCESS);
388 }
389 
390 static PetscErrorCode VecSetFromOptions_MPI(Vec X, PetscOptionItems *PetscOptionsObject)
391 {
392 #if !defined(PETSC_HAVE_MPIUNI)
393   PetscBool flg = PETSC_FALSE, set;
394 
395   PetscFunctionBegin;
396   PetscOptionsHeadBegin(PetscOptionsObject, "VecMPI Options");
397   PetscCall(PetscOptionsBool("-vec_assembly_legacy", "Use MPI 1 version of assembly", "", flg, &flg, &set));
398   if (set) {
399     X->ops->assemblybegin = flg ? VecAssemblyBegin_MPI : VecAssemblyBegin_MPI_BTS;
400     X->ops->assemblyend   = flg ? VecAssemblyEnd_MPI : VecAssemblyEnd_MPI_BTS;
401   }
402   PetscOptionsHeadEnd();
403 #else
404   PetscFunctionBegin;
405   X->ops->assemblybegin = VecAssemblyBegin_MPI;
406   X->ops->assemblyend   = VecAssemblyEnd_MPI;
407 #endif
408   PetscFunctionReturn(PETSC_SUCCESS);
409 }
410 
411 static PetscErrorCode VecGetLocalToGlobalMapping_MPI_VecGhost(Vec X, ISLocalToGlobalMapping *ltg)
412 {
413   PetscInt       *indices, n, nghost, rstart, i;
414   IS              ghostis;
415   const PetscInt *ghostidx;
416   MPI_Comm        comm;
417 
418   PetscFunctionBegin;
419   if (X->map->mapping) {
420     *ltg = X->map->mapping;
421     PetscFunctionReturn(PETSC_SUCCESS);
422   }
423   PetscCall(VecGhostGetGhostIS(X, &ghostis));
424   PetscCall(ISGetLocalSize(ghostis, &nghost));
425   PetscCall(VecGetLocalSize(X, &n));
426   PetscCall(ISGetIndices(ghostis, &ghostidx));
427   /* set local to global mapping for ghosted vector */
428   PetscCall(PetscMalloc1(n + nghost, &indices));
429   PetscCall(VecGetOwnershipRange(X, &rstart, NULL));
430   for (i = 0; i < n; i++) { indices[i] = rstart + i; }
431   for (i = 0; i < nghost; i++) { indices[n + i] = ghostidx[i]; }
432   PetscCall(ISRestoreIndices(ghostis, &ghostidx));
433   PetscCall(PetscObjectGetComm((PetscObject)X, &comm));
434   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, n + nghost, indices, PETSC_OWN_POINTER, &X->map->mapping));
435   *ltg = X->map->mapping;
436   PetscFunctionReturn(PETSC_SUCCESS);
437 }
438 
439 static struct _VecOps DvOps = {PetscDesignatedInitializer(duplicate, VecDuplicate_MPI), /* 1 */
440                                PetscDesignatedInitializer(duplicatevecs, VecDuplicateVecs_Default),
441                                PetscDesignatedInitializer(destroyvecs, VecDestroyVecs_Default),
442                                PetscDesignatedInitializer(dot, VecDot_MPI),
443                                PetscDesignatedInitializer(mdot, VecMDot_MPI),
444                                PetscDesignatedInitializer(norm, VecNorm_MPI),
445                                PetscDesignatedInitializer(tdot, VecTDot_MPI),
446                                PetscDesignatedInitializer(mtdot, VecMTDot_MPI),
447                                PetscDesignatedInitializer(scale, VecScale_Seq),
448                                PetscDesignatedInitializer(copy, VecCopy_Seq), /* 10 */
449                                PetscDesignatedInitializer(set, VecSet_Seq),
450                                PetscDesignatedInitializer(swap, VecSwap_Seq),
451                                PetscDesignatedInitializer(axpy, VecAXPY_Seq),
452                                PetscDesignatedInitializer(axpby, VecAXPBY_Seq),
453                                PetscDesignatedInitializer(maxpy, VecMAXPY_Seq),
454                                PetscDesignatedInitializer(aypx, VecAYPX_Seq),
455                                PetscDesignatedInitializer(waxpy, VecWAXPY_Seq),
456                                PetscDesignatedInitializer(axpbypcz, VecAXPBYPCZ_Seq),
457                                PetscDesignatedInitializer(pointwisemult, VecPointwiseMult_Seq),
458                                PetscDesignatedInitializer(pointwisedivide, VecPointwiseDivide_Seq),
459                                PetscDesignatedInitializer(setvalues, VecSetValues_MPI), /* 20 */
460                                PetscDesignatedInitializer(assemblybegin, VecAssemblyBegin_MPI_BTS),
461                                PetscDesignatedInitializer(assemblyend, VecAssemblyEnd_MPI_BTS),
462                                PetscDesignatedInitializer(getarray, NULL),
463                                PetscDesignatedInitializer(getsize, VecGetSize_MPI),
464                                PetscDesignatedInitializer(getlocalsize, VecGetSize_Seq),
465                                PetscDesignatedInitializer(restorearray, NULL),
466                                PetscDesignatedInitializer(max, VecMax_MPI),
467                                PetscDesignatedInitializer(min, VecMin_MPI),
468                                PetscDesignatedInitializer(setrandom, VecSetRandom_Seq),
469                                PetscDesignatedInitializer(setoption, VecSetOption_MPI),
470                                PetscDesignatedInitializer(setvaluesblocked, VecSetValuesBlocked_MPI),
471                                PetscDesignatedInitializer(destroy, VecDestroy_MPI),
472                                PetscDesignatedInitializer(view, VecView_MPI),
473                                PetscDesignatedInitializer(placearray, VecPlaceArray_MPI),
474                                PetscDesignatedInitializer(replacearray, VecReplaceArray_Seq),
475                                PetscDesignatedInitializer(dot_local, VecDot_Seq),
476                                PetscDesignatedInitializer(tdot_local, VecTDot_Seq),
477                                PetscDesignatedInitializer(norm_local, VecNorm_Seq),
478                                PetscDesignatedInitializer(mdot_local, VecMDot_Seq),
479                                PetscDesignatedInitializer(mtdot_local, VecMTDot_Seq),
480                                PetscDesignatedInitializer(load, VecLoad_Default),
481                                PetscDesignatedInitializer(reciprocal, VecReciprocal_Default),
482                                PetscDesignatedInitializer(conjugate, VecConjugate_Seq),
483                                PetscDesignatedInitializer(setlocaltoglobalmapping, NULL),
484                                PetscDesignatedInitializer(getlocaltoglobalmapping, VecGetLocalToGlobalMapping_MPI_VecGhost),
485                                PetscDesignatedInitializer(setvalueslocal, NULL),
486                                PetscDesignatedInitializer(resetarray, VecResetArray_MPI),
487                                PetscDesignatedInitializer(setfromoptions, VecSetFromOptions_MPI), /*set from options */
488                                PetscDesignatedInitializer(maxpointwisedivide, VecMaxPointwiseDivide_Seq),
489                                PetscDesignatedInitializer(pointwisemax, VecPointwiseMax_Seq),
490                                PetscDesignatedInitializer(pointwisemaxabs, VecPointwiseMaxAbs_Seq),
491                                PetscDesignatedInitializer(pointwisemin, VecPointwiseMin_Seq),
492                                PetscDesignatedInitializer(getvalues, VecGetValues_MPI),
493                                PetscDesignatedInitializer(sqrt, NULL),
494                                PetscDesignatedInitializer(abs, NULL),
495                                PetscDesignatedInitializer(exp, NULL),
496                                PetscDesignatedInitializer(log, NULL),
497                                PetscDesignatedInitializer(shift, NULL),
498                                PetscDesignatedInitializer(create, NULL), /* really? */
499                                PetscDesignatedInitializer(stridegather, VecStrideGather_Default),
500                                PetscDesignatedInitializer(stridescatter, VecStrideScatter_Default),
501                                PetscDesignatedInitializer(dotnorm2, NULL),
502                                PetscDesignatedInitializer(getsubvector, NULL),
503                                PetscDesignatedInitializer(restoresubvector, NULL),
504                                PetscDesignatedInitializer(getarrayread, NULL),
505                                PetscDesignatedInitializer(restorearrayread, NULL),
506                                PetscDesignatedInitializer(stridesubsetgather, VecStrideSubSetGather_Default),
507                                PetscDesignatedInitializer(stridesubsetscatter, VecStrideSubSetScatter_Default),
508                                PetscDesignatedInitializer(viewnative, VecView_MPI),
509                                PetscDesignatedInitializer(loadnative, NULL),
510                                PetscDesignatedInitializer(createlocalvector, NULL),
511                                PetscDesignatedInitializer(getlocalvector, NULL),
512                                PetscDesignatedInitializer(restorelocalvector, NULL),
513                                PetscDesignatedInitializer(getlocalvectorread, NULL),
514                                PetscDesignatedInitializer(restorelocalvectorread, NULL),
515                                PetscDesignatedInitializer(bindtocpu, NULL),
516                                PetscDesignatedInitializer(getarraywrite, NULL),
517                                PetscDesignatedInitializer(restorearraywrite, NULL),
518                                PetscDesignatedInitializer(getarrayandmemtype, NULL),
519                                PetscDesignatedInitializer(restorearrayandmemtype, NULL),
520                                PetscDesignatedInitializer(getarrayreadandmemtype, NULL),
521                                PetscDesignatedInitializer(restorearrayreadandmemtype, NULL),
522                                PetscDesignatedInitializer(getarraywriteandmemtype, NULL),
523                                PetscDesignatedInitializer(restorearraywriteandmemtype, NULL),
524                                PetscDesignatedInitializer(concatenate, NULL),
525                                PetscDesignatedInitializer(sum, NULL),
526                                PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_MPI),
527                                PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_MPI),
528                                PetscDesignatedInitializer(errorwnorm, NULL)};
529 
530 /*
531     VecCreate_MPI_Private - Basic create routine called by VecCreate_MPI() (i.e. VecCreateMPI()),
532     VecCreateMPIWithArray(), VecCreate_Shared() (i.e. VecCreateShared()), VecCreateGhost(),
533     VecDuplicate_MPI(), VecCreateGhostWithArray(), VecDuplicate_MPI(), and VecDuplicate_Shared()
534 
535     If alloc is true and array is NULL then this routine allocates the space, otherwise
536     no space is allocated.
537 */
538 PetscErrorCode VecCreate_MPI_Private(Vec v, PetscBool alloc, PetscInt nghost, const PetscScalar array[])
539 {
540   Vec_MPI  *s;
541   PetscBool mdot_use_gemv  = PETSC_TRUE;
542   PetscBool maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
543 
544   PetscFunctionBegin;
545   PetscCall(PetscNew(&s));
546   v->data   = (void *)s;
547   v->ops[0] = DvOps;
548 
549   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
550   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));
551 
552   // allocate multiple vectors together
553   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPI_GEMV;
554 
555   if (mdot_use_gemv) {
556     v->ops[0].duplicatevecs = VecDuplicateVecs_MPI_GEMV;
557     v->ops[0].mdot          = VecMDot_MPI_GEMV;
558     v->ops[0].mdot_local    = VecMDot_Seq_GEMV;
559     v->ops[0].mtdot         = VecMTDot_MPI_GEMV;
560     v->ops[0].mtdot_local   = VecMTDot_Seq_GEMV;
561   }
562   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_Seq_GEMV;
563 
564   s->nghost      = nghost;
565   v->petscnative = PETSC_TRUE;
566   if (array) v->offloadmask = PETSC_OFFLOAD_CPU;
567 
568   PetscCall(PetscLayoutSetUp(v->map));
569 
570   s->array           = (PetscScalar *)array;
571   s->array_allocated = NULL;
572   if (alloc && !array) {
573     PetscInt n = v->map->n + nghost;
574     PetscCall(PetscCalloc1(n, &s->array));
575     s->array_allocated = s->array;
576     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_2], 0));
577     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_1], 0));
578     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_INFINITY], 0));
579   }
580 
581   /* By default parallel vectors do not have local representation */
582   s->localrep    = NULL;
583   s->localupdate = NULL;
584   s->ghost       = NULL;
585 
586   v->stash.insertmode  = NOT_SET_VALUES;
587   v->bstash.insertmode = NOT_SET_VALUES;
588   /* create the stashes. The block-size for bstash is set later when
589      VecSetValuesBlocked is called.
590   */
591   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), 1, &v->stash));
592   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), PetscAbs(v->map->bs), &v->bstash));
593 
594 #if defined(PETSC_HAVE_MATLAB)
595   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", VecMatlabEnginePut_Default));
596   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", VecMatlabEngineGet_Default));
597 #endif
598   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPI));
599   PetscFunctionReturn(PETSC_SUCCESS);
600 }
601 
602 /*
603   Create a VECMPI with the given layout and array
604 
605   Collective
606 
607   Input Parameter:
608 + map   - the layout
609 - array - the array on host
610 
611   Output Parameter:
612 . V  - The vector object
613 */
614 PetscErrorCode VecCreateMPIWithLayoutAndArray_Private(PetscLayout map, const PetscScalar array[], Vec *V)
615 {
616   PetscFunctionBegin;
617   PetscCall(VecCreateWithLayout_Private(map, V));
618   PetscCall(VecCreate_MPI_Private(*V, PETSC_FALSE, 0, array));
619   PetscFunctionReturn(PETSC_SUCCESS);
620 }
621 
622 /*MC
623    VECMPI - VECMPI = "mpi" - The basic parallel vector
624 
625    Options Database Key:
626 . -vec_type mpi - sets the vector type to `VECMPI` during a call to `VecSetFromOptions()`
627 
628   Level: beginner
629 
630 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
631 M*/
632 
633 PetscErrorCode VecCreate_MPI(Vec vv)
634 {
635   PetscFunctionBegin;
636   PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, 0, NULL));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /*MC
641    VECSTANDARD = "standard" - A `VECSEQ` on one process and `VECMPI` on more than one process
642 
643    Options Database Key:
644 . -vec_type standard - sets a vector type to standard on calls to `VecSetFromOptions()`
645 
646   Level: beginner
647 
648 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreateMPI()`
649 M*/
650 
651 PETSC_EXTERN PetscErrorCode VecCreate_Standard(Vec v)
652 {
653   PetscMPIInt size;
654 
655   PetscFunctionBegin;
656   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
657   if (size == 1) {
658     PetscCall(VecSetType(v, VECSEQ));
659   } else {
660     PetscCall(VecSetType(v, VECMPI));
661   }
662   PetscFunctionReturn(PETSC_SUCCESS);
663 }
664 
665 /*@C
666   VecCreateMPIWithArray - Creates a parallel, array-style vector,
667   where the user provides the array space to store the vector values.
668 
669   Collective
670 
671   Input Parameters:
672 + comm  - the MPI communicator to use
673 . bs    - block size, same meaning as `VecSetBlockSize()`
674 . n     - local vector length, cannot be `PETSC_DECIDE`
675 . N     - global vector length (or `PETSC_DETERMINE` to have calculated)
676 - array - the user provided array to store the vector values
677 
678   Output Parameter:
679 . vv - the vector
680 
681   Level: intermediate
682 
683   Notes:
684   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the
685   same type as an existing vector.
686 
687   If the user-provided array is `NULL`, then `VecPlaceArray()` can be used
688   at a later stage to SET the array for storing the vector values.
689 
690   PETSc does NOT free `array` when the vector is destroyed via `VecDestroy()`.
691 
692   The user should not free `array` until the vector is destroyed.
693 
694 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
695           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
696 @*/
697 PetscErrorCode VecCreateMPIWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar array[], Vec *vv)
698 {
699   PetscFunctionBegin;
700   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
701   PetscCall(PetscSplitOwnership(comm, &n, &N));
702   PetscCall(VecCreate(comm, vv));
703   PetscCall(VecSetSizes(*vv, n, N));
704   PetscCall(VecSetBlockSize(*vv, bs));
705   PetscCall(VecCreate_MPI_Private(*vv, PETSC_FALSE, 0, array));
706   PetscFunctionReturn(PETSC_SUCCESS);
707 }
708 
709 /*@C
710   VecCreateGhostWithArray - Creates a parallel vector with ghost padding on each processor;
711   the caller allocates the array space.
712 
713   Collective
714 
715   Input Parameters:
716 + comm   - the MPI communicator to use
717 . n      - local vector length
718 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
719 . nghost - number of local ghost points
720 . ghosts - global indices of ghost points (or `NULL` if not needed), these do not need to be in increasing order (sorted)
721 - array  - the space to store the vector values (as long as n + nghost)
722 
723   Output Parameter:
724 . vv - the global vector representation (without ghost points as part of vector)
725 
726   Level: advanced
727 
728   Notes:
729   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
730   of the vector.
731 
732   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
733 
734 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
735           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
736           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
737 @*/
738 PetscErrorCode VecCreateGhostWithArray(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
739 {
740   Vec_MPI     *w;
741   PetscScalar *larray;
742   IS           from, to;
743 
744   PetscFunctionBegin;
745   *vv = NULL;
746 
747   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
748   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
749   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
750   PetscCall(PetscSplitOwnership(comm, &n, &N));
751   /* Create global representation */
752   PetscCall(VecCreate(comm, vv));
753   PetscCall(VecSetSizes(*vv, n, N));
754   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost, array));
755   w = (Vec_MPI *)(*vv)->data;
756   /* Create local representation */
757   PetscCall(VecGetArray(*vv, &larray));
758   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
759   PetscCall(VecRestoreArray(*vv, &larray));
760 
761   /*
762        Create scatter context for scattering (updating) ghost values
763   */
764   PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
765   PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
766   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
767   PetscCall(ISDestroy(&to));
768 
769   w->ghost                            = from;
770   (*vv)->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 /*@
775   VecGhostGetGhostIS - Return ghosting indices of a ghost vector
776 
777   Input Parameters:
778 . X - ghost vector
779 
780   Output Parameter:
781 . ghost - ghosting indices
782 
783   Level: beginner
784 
785 .seealso: `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`
786 @*/
787 PetscErrorCode VecGhostGetGhostIS(Vec X, IS *ghost)
788 {
789   Vec_MPI  *w;
790   PetscBool flg;
791 
792   PetscFunctionBegin;
793   PetscValidType(X, 1);
794   PetscAssertPointer(ghost, 2);
795   PetscCall(PetscObjectTypeCompare((PetscObject)X, VECMPI, &flg));
796   PetscCheck(flg, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "VecGhostGetGhostIS was not supported for vec type %s\n", ((PetscObject)X)->type_name);
797   w      = (Vec_MPI *)(X)->data;
798   *ghost = w->ghost;
799   PetscFunctionReturn(PETSC_SUCCESS);
800 }
801 
802 /*@
803   VecCreateGhost - Creates a parallel vector with ghost padding on each processor.
804 
805   Collective
806 
807   Input Parameters:
808 + comm   - the MPI communicator to use
809 . n      - local vector length
810 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
811 . nghost - number of local ghost points
812 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
813 
814   Output Parameter:
815 . vv - the global vector representation (without ghost points as part of vector)
816 
817   Level: advanced
818 
819   Notes:
820   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
821   of the vector.
822 
823   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
824 
825 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
826           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
827           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
828           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
829 
830 @*/
831 PetscErrorCode VecCreateGhost(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
832 {
833   PetscFunctionBegin;
834   PetscCall(VecCreateGhostWithArray(comm, n, N, nghost, ghosts, NULL, vv));
835   PetscFunctionReturn(PETSC_SUCCESS);
836 }
837 
838 /*@
839   VecMPISetGhost - Sets the ghost points for an MPI ghost vector
840 
841   Collective
842 
843   Input Parameters:
844 + vv     - the MPI vector
845 . nghost - number of local ghost points
846 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
847 
848   Level: advanced
849 
850   Notes:
851   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
852   of the vector.
853 
854   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
855 
856   You must call this AFTER you have set the type of the vector (with` VecSetType()`) and the size (with `VecSetSizes()`).
857 
858 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
859           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
860           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
861           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`
862 @*/
863 PetscErrorCode VecMPISetGhost(Vec vv, PetscInt nghost, const PetscInt ghosts[])
864 {
865   PetscBool flg;
866 
867   PetscFunctionBegin;
868   PetscCall(PetscObjectTypeCompare((PetscObject)vv, VECMPI, &flg));
869   /* if already fully existent VECMPI then basically destroy it and rebuild with ghosting */
870   if (flg) {
871     PetscInt     n, N;
872     Vec_MPI     *w;
873     PetscScalar *larray;
874     IS           from, to;
875     MPI_Comm     comm;
876 
877     PetscCall(PetscObjectGetComm((PetscObject)vv, &comm));
878     n = vv->map->n;
879     N = vv->map->N;
880     PetscUseTypeMethod(vv, destroy);
881     PetscCall(VecSetSizes(vv, n, N));
882     PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, nghost, NULL));
883     w = (Vec_MPI *)(vv)->data;
884     /* Create local representation */
885     PetscCall(VecGetArray(vv, &larray));
886     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
887     PetscCall(VecRestoreArray(vv, &larray));
888 
889     /*
890      Create scatter context for scattering (updating) ghost values
891      */
892     PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
893     PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
894     PetscCall(VecScatterCreate(vv, from, w->localrep, to, &w->localupdate));
895     PetscCall(ISDestroy(&to));
896 
897     w->ghost                         = from;
898     vv->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
899   } else {
900     PetscCheck(vv->ops->create != VecCreate_MPI, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set local or global size before setting ghosting");
901     PetscCheck(((PetscObject)vv)->type_name, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set type to VECMPI before ghosting");
902   }
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 /* ------------------------------------------------------------------------------------------*/
907 /*@C
908   VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor;
909   the caller allocates the array space. Indices in the ghost region are based on blocks.
910 
911   Collective
912 
913   Input Parameters:
914 + comm   - the MPI communicator to use
915 . bs     - block size
916 . n      - local vector length
917 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
918 . nghost - number of local ghost blocks
919 . ghosts - global indices of ghost blocks (or `NULL` if not needed), counts are by block not by index, these do not need to be in increasing order (sorted)
920 - array  - the space to store the vector values (as long as `n + nghost*bs`)
921 
922   Output Parameter:
923 . vv - the global vector representation (without ghost points as part of vector)
924 
925   Level: advanced
926 
927   Notes:
928   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
929   of the vector.
930 
931   n is the local vector size (total local size not the number of blocks) while nghost
932   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
933   portion is bs*nghost
934 
935 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
936           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
937           `VecCreateGhostWithArray()`, `VecCreateGhostBlock()`
938 @*/
939 PetscErrorCode VecCreateGhostBlockWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
940 {
941   Vec_MPI               *w;
942   PetscScalar           *larray;
943   IS                     from, to;
944   ISLocalToGlobalMapping ltog;
945   PetscInt               rstart, i, nb, *indices;
946 
947   PetscFunctionBegin;
948   *vv = NULL;
949 
950   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
951   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
952   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
953   PetscCheck(n % bs == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local size must be a multiple of block size");
954   PetscCall(PetscSplitOwnership(comm, &n, &N));
955   /* Create global representation */
956   PetscCall(VecCreate(comm, vv));
957   PetscCall(VecSetSizes(*vv, n, N));
958   PetscCall(VecSetBlockSize(*vv, bs));
959   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost * bs, array));
960   w = (Vec_MPI *)(*vv)->data;
961   /* Create local representation */
962   PetscCall(VecGetArray(*vv, &larray));
963   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, bs, n + bs * nghost, larray, &w->localrep));
964   PetscCall(VecRestoreArray(*vv, &larray));
965 
966   /*
967        Create scatter context for scattering (updating) ghost values
968   */
969   PetscCall(ISCreateBlock(comm, bs, nghost, ghosts, PETSC_COPY_VALUES, &from));
970   PetscCall(ISCreateStride(PETSC_COMM_SELF, bs * nghost, n, 1, &to));
971   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
972   PetscCall(ISDestroy(&to));
973   PetscCall(ISDestroy(&from));
974 
975   /* set local to global mapping for ghosted vector */
976   nb = n / bs;
977   PetscCall(PetscMalloc1(nb + nghost, &indices));
978   PetscCall(VecGetOwnershipRange(*vv, &rstart, NULL));
979   rstart = rstart / bs;
980 
981   for (i = 0; i < nb; i++) indices[i] = rstart + i;
982   for (i = 0; i < nghost; i++) indices[nb + i] = ghosts[i];
983 
984   PetscCall(ISLocalToGlobalMappingCreate(comm, bs, nb + nghost, indices, PETSC_OWN_POINTER, &ltog));
985   PetscCall(VecSetLocalToGlobalMapping(*vv, ltog));
986   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
987   PetscFunctionReturn(PETSC_SUCCESS);
988 }
989 
990 /*@
991   VecCreateGhostBlock - Creates a parallel vector with ghost padding on each processor.
992   The indicing of the ghost points is done with blocks.
993 
994   Collective
995 
996   Input Parameters:
997 + comm   - the MPI communicator to use
998 . bs     - the block size
999 . n      - local vector length
1000 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
1001 . nghost - number of local ghost blocks
1002 - ghosts - global indices of ghost blocks, counts are by block, not by individual index, these do not need to be in increasing order (sorted)
1003 
1004   Output Parameter:
1005 . vv - the global vector representation (without ghost points as part of vector)
1006 
1007   Level: advanced
1008 
1009   Notes:
1010   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
1011   of the vector.
1012 
1013   `n` is the local vector size (total local size not the number of blocks) while `nghost`
1014   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
1015   portion is `bs*nghost`
1016 
1017 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
1018           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
1019           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecCreateGhostBlockWithArray()`
1020 @*/
1021 PetscErrorCode VecCreateGhostBlock(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
1022 {
1023   PetscFunctionBegin;
1024   PetscCall(VecCreateGhostBlockWithArray(comm, bs, n, N, nghost, ghosts, NULL, vv));
1025   PetscFunctionReturn(PETSC_SUCCESS);
1026 }
1027