xref: /petsc/src/vec/vec/impls/mpi/pbvec.c (revision dc9a610e8a93b2ceab441eb761644f4cbb77655c)
1 /*
2    This file contains routines for Parallel vector operations.
3  */
4 #include <petscsys.h>
5 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
6 
7 PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec, PetscViewer);
8 
9 PetscErrorCode VecPlaceArray_MPI(Vec vin, const PetscScalar *a)
10 {
11   Vec_MPI *v = (Vec_MPI *)vin->data;
12 
13   PetscFunctionBegin;
14   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
15   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
16   v->array         = (PetscScalar *)a;
17   if (v->localrep) PetscCall(VecPlaceArray(v->localrep, a));
18   PetscFunctionReturn(PETSC_SUCCESS);
19 }
20 
21 PetscErrorCode VecDuplicate_MPI(Vec win, Vec *v)
22 {
23   Vec_MPI     *vw, *w = (Vec_MPI *)win->data;
24   PetscScalar *array;
25 
26   PetscFunctionBegin;
27   PetscCall(VecCreateWithLayout_Private(win->map, v));
28 
29   PetscCall(VecCreate_MPI_Private(*v, PETSC_TRUE, w->nghost, NULL));
30   vw           = (Vec_MPI *)(*v)->data;
31   (*v)->ops[0] = win->ops[0];
32 
33   /* save local representation of the parallel vector (and scatter) if it exists */
34   if (w->localrep) {
35     PetscCall(VecGetArray(*v, &array));
36     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscAbs(win->map->bs), win->map->n + w->nghost, array, &vw->localrep));
37     vw->localrep->ops[0] = w->localrep->ops[0];
38     PetscCall(VecRestoreArray(*v, &array));
39 
40     vw->localupdate = w->localupdate;
41     if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
42 
43     vw->ghost = w->ghost;
44     if (vw->ghost) PetscCall(PetscObjectReference((PetscObject)vw->ghost));
45   }
46 
47   /* New vector should inherit stashing property of parent */
48   (*v)->stash.donotstash   = win->stash.donotstash;
49   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
50 
51   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
52   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
53 
54   (*v)->bstash.bs = win->bstash.bs;
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
58 static PetscErrorCode VecDuplicateVecs_MPI_GEMV(Vec w, PetscInt m, Vec *V[])
59 {
60   Vec_MPI *wmpi = (Vec_MPI *)w->data;
61 
62   PetscFunctionBegin;
63   // Currently only do GEMV for vectors without ghosts. Note w might be a VECMPI subclass object.
64   // This routine relies on the duplicate operation being VecDuplicate_MPI. If not, bail out to the default.
65   if (wmpi->nghost || w->ops->duplicate != VecDuplicate_MPI) {
66     w->ops->duplicatevecs = VecDuplicateVecs_Default;
67     PetscCall(VecDuplicateVecs(w, m, V));
68   } else {
69     PetscInt     nlocal;
70     PetscScalar *array;
71     PetscInt64   lda; // use 64-bit as we will do "m * lda"
72 
73     PetscCall(PetscMalloc1(m, V));
74     PetscCall(VecGetLocalSize(w, &nlocal));
75     lda = nlocal;
76     lda = ((lda + 31) / 32) * 32; // make every vector 32-elements aligned
77 
78     PetscCall(PetscCalloc1(m * lda, &array));
79     for (PetscInt i = 0; i < m; i++) {
80       Vec v;
81       PetscCall(VecCreateMPIWithLayoutAndArray_Private(w->map, PetscSafePointerPlusOffset(array, i * lda), &v));
82       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
83       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
84       v->ops->view          = w->ops->view;
85       v->stash.donotstash   = w->stash.donotstash;
86       v->stash.ignorenegidx = w->stash.ignorenegidx;
87       v->stash.bs           = w->stash.bs;
88       (*V)[i]               = v;
89     }
90     // So when the first vector is destroyed it will destroy the array
91     if (m) ((Vec_MPI *)(*V)[0]->data)->array_allocated = array;
92     // disable replacearray of the first vector, as freeing its memory also frees others in the group.
93     // But replacearray of others is ok, as they don't own their array.
94     if (m > 1) (*V)[0]->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
95   }
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 static PetscErrorCode VecSetOption_MPI(Vec V, VecOption op, PetscBool flag)
100 {
101   Vec_MPI *v = (Vec_MPI *)V->data;
102 
103   PetscFunctionBegin;
104   switch (op) {
105   case VEC_IGNORE_OFF_PROC_ENTRIES:
106     V->stash.donotstash = flag;
107     break;
108   case VEC_IGNORE_NEGATIVE_INDICES:
109     V->stash.ignorenegidx = flag;
110     break;
111   case VEC_SUBSET_OFF_PROC_ENTRIES:
112     v->assembly_subset = flag;              /* See the same logic in MatAssembly wrt MAT_SUBSET_OFF_PROC_ENTRIES */
113     if (!v->assembly_subset) {              /* User indicates "do not reuse the communication pattern" */
114       PetscCall(VecAssemblyReset_MPI(V));   /* Reset existing pattern to free memory */
115       v->first_assembly_done = PETSC_FALSE; /* Mark the first assembly is not done */
116     }
117     break;
118   }
119   PetscFunctionReturn(PETSC_SUCCESS);
120 }
121 
122 PetscErrorCode VecResetArray_MPI(Vec vin)
123 {
124   Vec_MPI *v = (Vec_MPI *)vin->data;
125 
126   PetscFunctionBegin;
127   v->array         = v->unplacedarray;
128   v->unplacedarray = NULL;
129   if (v->localrep) PetscCall(VecResetArray(v->localrep));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode VecAssemblySend_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
134 {
135   Vec                X   = (Vec)ctx;
136   Vec_MPI           *x   = (Vec_MPI *)X->data;
137   VecAssemblyHeader *hdr = (VecAssemblyHeader *)sdata;
138   PetscInt           bs  = X->map->bs;
139 
140   PetscFunctionBegin;
141   /* x->first_assembly_done indicates we are reusing a communication network. In that case, some
142      messages can be empty, but we have to send them this time if we sent them before because the
143      receiver is expecting them.
144    */
145   if (hdr->count || (x->first_assembly_done && x->sendptrs[rankid].ints)) {
146     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
147     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
148   }
149   if (hdr->bcount || (x->first_assembly_done && x->sendptrs[rankid].intb)) {
150     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
151     PetscCallMPI(MPI_Isend(x->sendptrs[rankid].scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
152   }
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 static PetscErrorCode VecAssemblyRecv_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
157 {
158   Vec                X   = (Vec)ctx;
159   Vec_MPI           *x   = (Vec_MPI *)X->data;
160   VecAssemblyHeader *hdr = (VecAssemblyHeader *)rdata;
161   PetscInt           bs  = X->map->bs;
162   VecAssemblyFrame  *frame;
163 
164   PetscFunctionBegin;
165   PetscCall(PetscSegBufferGet(x->segrecvframe, 1, &frame));
166 
167   if (hdr->count) {
168     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->count, &frame->ints));
169     PetscCallMPI(MPI_Irecv(frame->ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
170     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->count, &frame->scalars));
171     PetscCallMPI(MPI_Irecv(frame->scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
172     frame->pendings = 2;
173   } else {
174     frame->ints     = NULL;
175     frame->scalars  = NULL;
176     frame->pendings = 0;
177   }
178 
179   if (hdr->bcount) {
180     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->bcount, &frame->intb));
181     PetscCallMPI(MPI_Irecv(frame->intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
182     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->bcount * bs, &frame->scalarb));
183     PetscCallMPI(MPI_Irecv(frame->scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
184     frame->pendingb = 2;
185   } else {
186     frame->intb     = NULL;
187     frame->scalarb  = NULL;
188     frame->pendingb = 0;
189   }
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode VecAssemblyBegin_MPI_BTS(Vec X)
194 {
195   Vec_MPI *x = (Vec_MPI *)X->data;
196   MPI_Comm comm;
197   PetscInt i, j, jb, bs;
198 
199   PetscFunctionBegin;
200   if (X->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
201 
202   PetscCall(PetscObjectGetComm((PetscObject)X, &comm));
203   PetscCall(VecGetBlockSize(X, &bs));
204   if (PetscDefined(USE_DEBUG)) {
205     InsertMode addv;
206     PetscCall(MPIU_Allreduce((PetscEnum *)&X->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
207     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
208   }
209   X->bstash.insertmode = X->stash.insertmode; /* Block stash implicitly tracks InsertMode of scalar stash */
210 
211   PetscCall(VecStashSortCompress_Private(&X->stash));
212   PetscCall(VecStashSortCompress_Private(&X->bstash));
213 
214   if (!x->sendranks) {
215     PetscMPIInt nowners, bnowners, *owners, *bowners;
216     PetscInt    ntmp;
217     PetscCall(VecStashGetOwnerList_Private(&X->stash, X->map, &nowners, &owners));
218     PetscCall(VecStashGetOwnerList_Private(&X->bstash, X->map, &bnowners, &bowners));
219     PetscCall(PetscMergeMPIIntArray(nowners, owners, bnowners, bowners, &ntmp, &x->sendranks));
220     x->nsendranks = ntmp;
221     PetscCall(PetscFree(owners));
222     PetscCall(PetscFree(bowners));
223     PetscCall(PetscMalloc1(x->nsendranks, &x->sendhdr));
224     PetscCall(PetscCalloc1(x->nsendranks, &x->sendptrs));
225   }
226   for (i = 0, j = 0, jb = 0; i < x->nsendranks; i++) {
227     PetscMPIInt rank         = x->sendranks[i];
228     x->sendhdr[i].insertmode = X->stash.insertmode;
229     /* Initialize pointers for non-empty stashes the first time around.  Subsequent assemblies with
230      * VEC_SUBSET_OFF_PROC_ENTRIES will leave the old pointers (dangling because the stash has been collected) when
231      * there is nothing new to send, so that size-zero messages get sent instead. */
232     x->sendhdr[i].count = 0;
233     if (X->stash.n) {
234       x->sendptrs[i].ints    = &X->stash.idx[j];
235       x->sendptrs[i].scalars = &X->stash.array[j];
236       for (; j < X->stash.n && X->stash.idx[j] < X->map->range[rank + 1]; j++) x->sendhdr[i].count++;
237     }
238     x->sendhdr[i].bcount = 0;
239     if (X->bstash.n) {
240       x->sendptrs[i].intb    = &X->bstash.idx[jb];
241       x->sendptrs[i].scalarb = &X->bstash.array[jb * bs];
242       for (; jb < X->bstash.n && X->bstash.idx[jb] * bs < X->map->range[rank + 1]; jb++) x->sendhdr[i].bcount++;
243     }
244   }
245 
246   if (!x->segrecvint) PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &x->segrecvint));
247   if (!x->segrecvscalar) PetscCall(PetscSegBufferCreate(sizeof(PetscScalar), 1000, &x->segrecvscalar));
248   if (!x->segrecvframe) PetscCall(PetscSegBufferCreate(sizeof(VecAssemblyFrame), 50, &x->segrecvframe));
249   if (x->first_assembly_done) { /* this is not the first assembly */
250     PetscMPIInt tag[4];
251     for (i = 0; i < 4; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
252     for (i = 0; i < x->nsendranks; i++) PetscCall(VecAssemblySend_MPI_Private(comm, tag, i, x->sendranks[i], x->sendhdr + i, x->sendreqs + 4 * i, X));
253     for (i = 0; i < x->nrecvranks; i++) PetscCall(VecAssemblyRecv_MPI_Private(comm, tag, x->recvranks[i], x->recvhdr + i, x->recvreqs + 4 * i, X));
254     x->use_status = PETSC_TRUE;
255   } else { /* First time assembly */
256     PetscCall(PetscCommBuildTwoSidedFReq(comm, 3, MPIU_INT, x->nsendranks, x->sendranks, (PetscInt *)x->sendhdr, &x->nrecvranks, &x->recvranks, &x->recvhdr, 4, &x->sendreqs, &x->recvreqs, VecAssemblySend_MPI_Private, VecAssemblyRecv_MPI_Private, X));
257     x->use_status = PETSC_FALSE;
258   }
259 
260   /* The first_assembly_done flag is only meaningful when x->assembly_subset is set.
261      This line says when assembly_subset is set, then we mark that the first assembly is done.
262    */
263   x->first_assembly_done = x->assembly_subset;
264 
265   {
266     PetscInt nstash, reallocs;
267     PetscCall(VecStashGetInfo_Private(&X->stash, &nstash, &reallocs));
268     PetscCall(PetscInfo(X, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
269     PetscCall(VecStashGetInfo_Private(&X->bstash, &nstash, &reallocs));
270     PetscCall(PetscInfo(X, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
271   }
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X)
276 {
277   Vec_MPI          *x  = (Vec_MPI *)X->data;
278   PetscInt          bs = X->map->bs;
279   PetscMPIInt       npending, *some_indices, r;
280   MPI_Status       *some_statuses;
281   PetscScalar      *xarray;
282   VecAssemblyFrame *frame;
283 
284   PetscFunctionBegin;
285   if (X->stash.donotstash) {
286     X->stash.insertmode  = NOT_SET_VALUES;
287     X->bstash.insertmode = NOT_SET_VALUES;
288     PetscFunctionReturn(PETSC_SUCCESS);
289   }
290 
291   PetscCheck(x->segrecvframe, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing segrecvframe! Probably you forgot to call VecAssemblyBegin first");
292   PetscCall(VecGetArray(X, &xarray));
293   PetscCall(PetscSegBufferExtractInPlace(x->segrecvframe, &frame));
294   PetscCall(PetscMalloc2(4 * x->nrecvranks, &some_indices, x->use_status ? 4 * x->nrecvranks : 0, &some_statuses));
295   for (r = 0, npending = 0; r < x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb;
296   while (npending > 0) {
297     PetscMPIInt ndone = 0, ii;
298     /* Filling MPI_Status fields requires some resources from the MPI library.  We skip it on the first assembly, or
299      * when VEC_SUBSET_OFF_PROC_ENTRIES has not been set, because we could exchange exact sizes in the initial
300      * rendezvous.  When the rendezvous is elided, however, we use MPI_Status to get actual message lengths, so that
301      * subsequent assembly can set a proper subset of the values. */
302     PetscCallMPI(MPI_Waitsome(4 * x->nrecvranks, x->recvreqs, &ndone, some_indices, x->use_status ? some_statuses : MPI_STATUSES_IGNORE));
303     for (ii = 0; ii < ndone; ii++) {
304       PetscInt     i     = some_indices[ii] / 4, j, k;
305       InsertMode   imode = (InsertMode)x->recvhdr[i].insertmode;
306       PetscInt    *recvint;
307       PetscScalar *recvscalar;
308       PetscBool    intmsg   = (PetscBool)(some_indices[ii] % 2 == 0);
309       PetscBool    blockmsg = (PetscBool)((some_indices[ii] % 4) / 2 == 1);
310       npending--;
311       if (!blockmsg) { /* Scalar stash */
312         PetscMPIInt count;
313         if (--frame[i].pendings > 0) continue;
314         if (x->use_status) {
315           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
316         } else count = x->recvhdr[i].count;
317         for (j = 0, recvint = frame[i].ints, recvscalar = frame[i].scalars; j < count; j++, recvint++) {
318           PetscInt loc = *recvint - X->map->rstart;
319           PetscCheck(*recvint >= X->map->rstart && X->map->rend > *recvint, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Received vector entry %" PetscInt_FMT " out of local range [%" PetscInt_FMT ",%" PetscInt_FMT ")]", *recvint, X->map->rstart, X->map->rend);
320           switch (imode) {
321           case ADD_VALUES:
322             xarray[loc] += *recvscalar++;
323             break;
324           case INSERT_VALUES:
325             xarray[loc] = *recvscalar++;
326             break;
327           default:
328             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
329           }
330         }
331       } else { /* Block stash */
332         PetscMPIInt count;
333         if (--frame[i].pendingb > 0) continue;
334         if (x->use_status) {
335           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
336           if (!intmsg) count /= bs; /* Convert from number of scalars to number of blocks */
337         } else count = x->recvhdr[i].bcount;
338         for (j = 0, recvint = frame[i].intb, recvscalar = frame[i].scalarb; j < count; j++, recvint++) {
339           PetscInt loc = (*recvint) * bs - X->map->rstart;
340           switch (imode) {
341           case ADD_VALUES:
342             for (k = loc; k < loc + bs; k++) xarray[k] += *recvscalar++;
343             break;
344           case INSERT_VALUES:
345             for (k = loc; k < loc + bs; k++) xarray[k] = *recvscalar++;
346             break;
347           default:
348             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
349           }
350         }
351       }
352     }
353   }
354   PetscCall(VecRestoreArray(X, &xarray));
355   PetscCallMPI(MPI_Waitall(4 * x->nsendranks, x->sendreqs, MPI_STATUSES_IGNORE));
356   PetscCall(PetscFree2(some_indices, some_statuses));
357   if (x->assembly_subset) {
358     PetscCall(PetscSegBufferExtractInPlace(x->segrecvint, NULL));
359     PetscCall(PetscSegBufferExtractInPlace(x->segrecvscalar, NULL));
360   } else {
361     PetscCall(VecAssemblyReset_MPI(X));
362   }
363 
364   X->stash.insertmode  = NOT_SET_VALUES;
365   X->bstash.insertmode = NOT_SET_VALUES;
366   PetscCall(VecStashScatterEnd_Private(&X->stash));
367   PetscCall(VecStashScatterEnd_Private(&X->bstash));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 PetscErrorCode VecAssemblyReset_MPI(Vec X)
372 {
373   Vec_MPI *x = (Vec_MPI *)X->data;
374 
375   PetscFunctionBegin;
376   PetscCall(PetscFree(x->sendreqs));
377   PetscCall(PetscFree(x->recvreqs));
378   PetscCall(PetscFree(x->sendranks));
379   PetscCall(PetscFree(x->recvranks));
380   PetscCall(PetscFree(x->sendhdr));
381   PetscCall(PetscFree(x->recvhdr));
382   PetscCall(PetscFree(x->sendptrs));
383   PetscCall(PetscSegBufferDestroy(&x->segrecvint));
384   PetscCall(PetscSegBufferDestroy(&x->segrecvscalar));
385   PetscCall(PetscSegBufferDestroy(&x->segrecvframe));
386   PetscFunctionReturn(PETSC_SUCCESS);
387 }
388 
389 static PetscErrorCode VecSetFromOptions_MPI(Vec X, PetscOptionItems *PetscOptionsObject)
390 {
391 #if !defined(PETSC_HAVE_MPIUNI)
392   PetscBool flg = PETSC_FALSE, set;
393 
394   PetscFunctionBegin;
395   PetscOptionsHeadBegin(PetscOptionsObject, "VecMPI Options");
396   PetscCall(PetscOptionsBool("-vec_assembly_legacy", "Use MPI 1 version of assembly", "", flg, &flg, &set));
397   if (set) {
398     X->ops->assemblybegin = flg ? VecAssemblyBegin_MPI : VecAssemblyBegin_MPI_BTS;
399     X->ops->assemblyend   = flg ? VecAssemblyEnd_MPI : VecAssemblyEnd_MPI_BTS;
400   }
401   PetscOptionsHeadEnd();
402 #else
403   PetscFunctionBegin;
404   X->ops->assemblybegin = VecAssemblyBegin_MPI;
405   X->ops->assemblyend   = VecAssemblyEnd_MPI;
406 #endif
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 
410 static PetscErrorCode VecGetLocalToGlobalMapping_MPI_VecGhost(Vec X, ISLocalToGlobalMapping *ltg)
411 {
412   PetscInt       *indices, n, nghost, rstart, i;
413   IS              ghostis;
414   const PetscInt *ghostidx;
415   MPI_Comm        comm;
416 
417   PetscFunctionBegin;
418   if (X->map->mapping) {
419     *ltg = X->map->mapping;
420     PetscFunctionReturn(PETSC_SUCCESS);
421   }
422   PetscCall(VecGhostGetGhostIS(X, &ghostis));
423   PetscCall(ISGetLocalSize(ghostis, &nghost));
424   PetscCall(VecGetLocalSize(X, &n));
425   PetscCall(ISGetIndices(ghostis, &ghostidx));
426   /* set local to global mapping for ghosted vector */
427   PetscCall(PetscMalloc1(n + nghost, &indices));
428   PetscCall(VecGetOwnershipRange(X, &rstart, NULL));
429   for (i = 0; i < n; i++) { indices[i] = rstart + i; }
430   for (i = 0; i < nghost; i++) { indices[n + i] = ghostidx[i]; }
431   PetscCall(ISRestoreIndices(ghostis, &ghostidx));
432   PetscCall(PetscObjectGetComm((PetscObject)X, &comm));
433   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, n + nghost, indices, PETSC_OWN_POINTER, &X->map->mapping));
434   *ltg = X->map->mapping;
435   PetscFunctionReturn(PETSC_SUCCESS);
436 }
437 
438 static struct _VecOps DvOps = {PetscDesignatedInitializer(duplicate, VecDuplicate_MPI), /* 1 */
439                                PetscDesignatedInitializer(duplicatevecs, VecDuplicateVecs_Default),
440                                PetscDesignatedInitializer(destroyvecs, VecDestroyVecs_Default),
441                                PetscDesignatedInitializer(dot, VecDot_MPI),
442                                PetscDesignatedInitializer(mdot, VecMDot_MPI),
443                                PetscDesignatedInitializer(norm, VecNorm_MPI),
444                                PetscDesignatedInitializer(tdot, VecTDot_MPI),
445                                PetscDesignatedInitializer(mtdot, VecMTDot_MPI),
446                                PetscDesignatedInitializer(scale, VecScale_Seq),
447                                PetscDesignatedInitializer(copy, VecCopy_Seq), /* 10 */
448                                PetscDesignatedInitializer(set, VecSet_Seq),
449                                PetscDesignatedInitializer(swap, VecSwap_Seq),
450                                PetscDesignatedInitializer(axpy, VecAXPY_Seq),
451                                PetscDesignatedInitializer(axpby, VecAXPBY_Seq),
452                                PetscDesignatedInitializer(maxpy, VecMAXPY_Seq),
453                                PetscDesignatedInitializer(aypx, VecAYPX_Seq),
454                                PetscDesignatedInitializer(waxpy, VecWAXPY_Seq),
455                                PetscDesignatedInitializer(axpbypcz, VecAXPBYPCZ_Seq),
456                                PetscDesignatedInitializer(pointwisemult, VecPointwiseMult_Seq),
457                                PetscDesignatedInitializer(pointwisedivide, VecPointwiseDivide_Seq),
458                                PetscDesignatedInitializer(setvalues, VecSetValues_MPI), /* 20 */
459                                PetscDesignatedInitializer(assemblybegin, VecAssemblyBegin_MPI_BTS),
460                                PetscDesignatedInitializer(assemblyend, VecAssemblyEnd_MPI_BTS),
461                                PetscDesignatedInitializer(getarray, NULL),
462                                PetscDesignatedInitializer(getsize, VecGetSize_MPI),
463                                PetscDesignatedInitializer(getlocalsize, VecGetSize_Seq),
464                                PetscDesignatedInitializer(restorearray, NULL),
465                                PetscDesignatedInitializer(max, VecMax_MPI),
466                                PetscDesignatedInitializer(min, VecMin_MPI),
467                                PetscDesignatedInitializer(setrandom, VecSetRandom_Seq),
468                                PetscDesignatedInitializer(setoption, VecSetOption_MPI),
469                                PetscDesignatedInitializer(setvaluesblocked, VecSetValuesBlocked_MPI),
470                                PetscDesignatedInitializer(destroy, VecDestroy_MPI),
471                                PetscDesignatedInitializer(view, VecView_MPI),
472                                PetscDesignatedInitializer(placearray, VecPlaceArray_MPI),
473                                PetscDesignatedInitializer(replacearray, VecReplaceArray_Seq),
474                                PetscDesignatedInitializer(dot_local, VecDot_Seq),
475                                PetscDesignatedInitializer(tdot_local, VecTDot_Seq),
476                                PetscDesignatedInitializer(norm_local, VecNorm_Seq),
477                                PetscDesignatedInitializer(mdot_local, VecMDot_Seq),
478                                PetscDesignatedInitializer(mtdot_local, VecMTDot_Seq),
479                                PetscDesignatedInitializer(load, VecLoad_Default),
480                                PetscDesignatedInitializer(reciprocal, VecReciprocal_Default),
481                                PetscDesignatedInitializer(conjugate, VecConjugate_Seq),
482                                PetscDesignatedInitializer(setlocaltoglobalmapping, NULL),
483                                PetscDesignatedInitializer(getlocaltoglobalmapping, VecGetLocalToGlobalMapping_MPI_VecGhost),
484                                PetscDesignatedInitializer(setvalueslocal, NULL),
485                                PetscDesignatedInitializer(resetarray, VecResetArray_MPI),
486                                PetscDesignatedInitializer(setfromoptions, VecSetFromOptions_MPI), /*set from options */
487                                PetscDesignatedInitializer(maxpointwisedivide, VecMaxPointwiseDivide_Seq),
488                                PetscDesignatedInitializer(pointwisemax, VecPointwiseMax_Seq),
489                                PetscDesignatedInitializer(pointwisemaxabs, VecPointwiseMaxAbs_Seq),
490                                PetscDesignatedInitializer(pointwisemin, VecPointwiseMin_Seq),
491                                PetscDesignatedInitializer(getvalues, VecGetValues_MPI),
492                                PetscDesignatedInitializer(sqrt, NULL),
493                                PetscDesignatedInitializer(abs, NULL),
494                                PetscDesignatedInitializer(exp, NULL),
495                                PetscDesignatedInitializer(log, NULL),
496                                PetscDesignatedInitializer(shift, NULL),
497                                PetscDesignatedInitializer(create, NULL), /* really? */
498                                PetscDesignatedInitializer(stridegather, VecStrideGather_Default),
499                                PetscDesignatedInitializer(stridescatter, VecStrideScatter_Default),
500                                PetscDesignatedInitializer(dotnorm2, NULL),
501                                PetscDesignatedInitializer(getsubvector, NULL),
502                                PetscDesignatedInitializer(restoresubvector, NULL),
503                                PetscDesignatedInitializer(getarrayread, NULL),
504                                PetscDesignatedInitializer(restorearrayread, NULL),
505                                PetscDesignatedInitializer(stridesubsetgather, VecStrideSubSetGather_Default),
506                                PetscDesignatedInitializer(stridesubsetscatter, VecStrideSubSetScatter_Default),
507                                PetscDesignatedInitializer(viewnative, VecView_MPI),
508                                PetscDesignatedInitializer(loadnative, NULL),
509                                PetscDesignatedInitializer(createlocalvector, NULL),
510                                PetscDesignatedInitializer(getlocalvector, NULL),
511                                PetscDesignatedInitializer(restorelocalvector, NULL),
512                                PetscDesignatedInitializer(getlocalvectorread, NULL),
513                                PetscDesignatedInitializer(restorelocalvectorread, NULL),
514                                PetscDesignatedInitializer(bindtocpu, NULL),
515                                PetscDesignatedInitializer(getarraywrite, NULL),
516                                PetscDesignatedInitializer(restorearraywrite, NULL),
517                                PetscDesignatedInitializer(getarrayandmemtype, NULL),
518                                PetscDesignatedInitializer(restorearrayandmemtype, NULL),
519                                PetscDesignatedInitializer(getarrayreadandmemtype, NULL),
520                                PetscDesignatedInitializer(restorearrayreadandmemtype, NULL),
521                                PetscDesignatedInitializer(getarraywriteandmemtype, NULL),
522                                PetscDesignatedInitializer(restorearraywriteandmemtype, NULL),
523                                PetscDesignatedInitializer(concatenate, NULL),
524                                PetscDesignatedInitializer(sum, NULL),
525                                PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_MPI),
526                                PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_MPI),
527                                PetscDesignatedInitializer(errorwnorm, NULL)};
528 
529 /*
530     VecCreate_MPI_Private - Basic create routine called by VecCreate_MPI() (i.e. VecCreateMPI()),
531     VecCreateMPIWithArray(), VecCreate_Shared() (i.e. VecCreateShared()), VecCreateGhost(),
532     VecDuplicate_MPI(), VecCreateGhostWithArray(), VecDuplicate_MPI(), and VecDuplicate_Shared()
533 
534     If alloc is true and array is NULL then this routine allocates the space, otherwise
535     no space is allocated.
536 */
537 PetscErrorCode VecCreate_MPI_Private(Vec v, PetscBool alloc, PetscInt nghost, const PetscScalar array[])
538 {
539   Vec_MPI  *s;
540   PetscBool mdot_use_gemv  = PETSC_TRUE;
541   PetscBool maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
542 
543   PetscFunctionBegin;
544   PetscCall(PetscNew(&s));
545   v->data   = (void *)s;
546   v->ops[0] = DvOps;
547 
548   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
549   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));
550 
551   // allocate multiple vectors together
552   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPI_GEMV;
553 
554   if (mdot_use_gemv) {
555     v->ops[0].duplicatevecs = VecDuplicateVecs_MPI_GEMV;
556     v->ops[0].mdot          = VecMDot_MPI_GEMV;
557     v->ops[0].mdot_local    = VecMDot_Seq_GEMV;
558     v->ops[0].mtdot         = VecMTDot_MPI_GEMV;
559     v->ops[0].mtdot_local   = VecMTDot_Seq_GEMV;
560   }
561   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_Seq_GEMV;
562 
563   s->nghost      = nghost;
564   v->petscnative = PETSC_TRUE;
565   if (array) v->offloadmask = PETSC_OFFLOAD_CPU;
566 
567   PetscCall(PetscLayoutSetUp(v->map));
568 
569   s->array           = (PetscScalar *)array;
570   s->array_allocated = NULL;
571   if (alloc && !array) {
572     PetscInt n = v->map->n + nghost;
573     PetscCall(PetscCalloc1(n, &s->array));
574     s->array_allocated = s->array;
575     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_2], 0));
576     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_1], 0));
577     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_INFINITY], 0));
578   }
579 
580   /* By default parallel vectors do not have local representation */
581   s->localrep    = NULL;
582   s->localupdate = NULL;
583   s->ghost       = NULL;
584 
585   v->stash.insertmode  = NOT_SET_VALUES;
586   v->bstash.insertmode = NOT_SET_VALUES;
587   /* create the stashes. The block-size for bstash is set later when
588      VecSetValuesBlocked is called.
589   */
590   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), 1, &v->stash));
591   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), PetscAbs(v->map->bs), &v->bstash));
592 
593 #if defined(PETSC_HAVE_MATLAB)
594   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", VecMatlabEnginePut_Default));
595   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", VecMatlabEngineGet_Default));
596 #endif
597   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPI));
598   PetscFunctionReturn(PETSC_SUCCESS);
599 }
600 
601 /*
602   Create a VECMPI with the given layout and array
603 
604   Collective
605 
606   Input Parameter:
607 + map   - the layout
608 - array - the array on host
609 
610   Output Parameter:
611 . V  - The vector object
612 */
613 PetscErrorCode VecCreateMPIWithLayoutAndArray_Private(PetscLayout map, const PetscScalar array[], Vec *V)
614 {
615   PetscFunctionBegin;
616   PetscCall(VecCreateWithLayout_Private(map, V));
617   PetscCall(VecCreate_MPI_Private(*V, PETSC_FALSE, 0, array));
618   PetscFunctionReturn(PETSC_SUCCESS);
619 }
620 
621 /*MC
622    VECMPI - VECMPI = "mpi" - The basic parallel vector
623 
624    Options Database Key:
625 . -vec_type mpi - sets the vector type to `VECMPI` during a call to `VecSetFromOptions()`
626 
627   Level: beginner
628 
629 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
630 M*/
631 
632 PetscErrorCode VecCreate_MPI(Vec vv)
633 {
634   PetscFunctionBegin;
635   PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, 0, NULL));
636   PetscFunctionReturn(PETSC_SUCCESS);
637 }
638 
639 /*MC
640    VECSTANDARD = "standard" - A `VECSEQ` on one process and `VECMPI` on more than one process
641 
642    Options Database Key:
643 . -vec_type standard - sets a vector type to standard on calls to `VecSetFromOptions()`
644 
645   Level: beginner
646 
647 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreateMPI()`
648 M*/
649 
650 PETSC_EXTERN PetscErrorCode VecCreate_Standard(Vec v)
651 {
652   PetscMPIInt size;
653 
654   PetscFunctionBegin;
655   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
656   if (size == 1) {
657     PetscCall(VecSetType(v, VECSEQ));
658   } else {
659     PetscCall(VecSetType(v, VECMPI));
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*@C
665   VecCreateMPIWithArray - Creates a parallel, array-style vector,
666   where the user provides the array space to store the vector values.
667 
668   Collective
669 
670   Input Parameters:
671 + comm  - the MPI communicator to use
672 . bs    - block size, same meaning as `VecSetBlockSize()`
673 . n     - local vector length, cannot be `PETSC_DECIDE`
674 . N     - global vector length (or `PETSC_DETERMINE` to have calculated)
675 - array - the user provided array to store the vector values
676 
677   Output Parameter:
678 . vv - the vector
679 
680   Level: intermediate
681 
682   Notes:
683   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the
684   same type as an existing vector.
685 
686   If the user-provided array is `NULL`, then `VecPlaceArray()` can be used
687   at a later stage to SET the array for storing the vector values.
688 
689   PETSc does NOT free `array` when the vector is destroyed via `VecDestroy()`.
690 
691   The user should not free `array` until the vector is destroyed.
692 
693 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
694           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
695 @*/
696 PetscErrorCode VecCreateMPIWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar array[], Vec *vv)
697 {
698   PetscFunctionBegin;
699   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
700   PetscCall(PetscSplitOwnership(comm, &n, &N));
701   PetscCall(VecCreate(comm, vv));
702   PetscCall(VecSetSizes(*vv, n, N));
703   PetscCall(VecSetBlockSize(*vv, bs));
704   PetscCall(VecCreate_MPI_Private(*vv, PETSC_FALSE, 0, array));
705   PetscFunctionReturn(PETSC_SUCCESS);
706 }
707 
708 /*@C
709   VecCreateGhostWithArray - Creates a parallel vector with ghost padding on each processor;
710   the caller allocates the array space.
711 
712   Collective
713 
714   Input Parameters:
715 + comm   - the MPI communicator to use
716 . n      - local vector length
717 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
718 . nghost - number of local ghost points
719 . ghosts - global indices of ghost points (or `NULL` if not needed), these do not need to be in increasing order (sorted)
720 - array  - the space to store the vector values (as long as n + nghost)
721 
722   Output Parameter:
723 . vv - the global vector representation (without ghost points as part of vector)
724 
725   Level: advanced
726 
727   Notes:
728   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
729   of the vector.
730 
731   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
732 
733 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
734           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
735           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
736 @*/
737 PetscErrorCode VecCreateGhostWithArray(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
738 {
739   Vec_MPI     *w;
740   PetscScalar *larray;
741   IS           from, to;
742 
743   PetscFunctionBegin;
744   *vv = NULL;
745 
746   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
747   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
748   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
749   PetscCall(PetscSplitOwnership(comm, &n, &N));
750   /* Create global representation */
751   PetscCall(VecCreate(comm, vv));
752   PetscCall(VecSetSizes(*vv, n, N));
753   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost, array));
754   w = (Vec_MPI *)(*vv)->data;
755   /* Create local representation */
756   PetscCall(VecGetArray(*vv, &larray));
757   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
758   PetscCall(VecRestoreArray(*vv, &larray));
759 
760   /*
761        Create scatter context for scattering (updating) ghost values
762   */
763   PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
764   PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
765   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
766   PetscCall(ISDestroy(&to));
767 
768   w->ghost                            = from;
769   (*vv)->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 /*@
774   VecGhostGetGhostIS - Return ghosting indices of a ghost vector
775 
776   Input Parameters:
777 . X - ghost vector
778 
779   Output Parameter:
780 . ghost - ghosting indices
781 
782   Level: beginner
783 
784 .seealso: `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`
785 @*/
786 PetscErrorCode VecGhostGetGhostIS(Vec X, IS *ghost)
787 {
788   Vec_MPI  *w;
789   PetscBool flg;
790 
791   PetscFunctionBegin;
792   PetscValidType(X, 1);
793   PetscAssertPointer(ghost, 2);
794   PetscCall(PetscObjectTypeCompare((PetscObject)X, VECMPI, &flg));
795   PetscCheck(flg, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "VecGhostGetGhostIS was not supported for vec type %s", ((PetscObject)X)->type_name);
796   w      = (Vec_MPI *)(X)->data;
797   *ghost = w->ghost;
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 /*@
802   VecCreateGhost - Creates a parallel vector with ghost padding on each processor.
803 
804   Collective
805 
806   Input Parameters:
807 + comm   - the MPI communicator to use
808 . n      - local vector length
809 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
810 . nghost - number of local ghost points
811 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
812 
813   Output Parameter:
814 . vv - the global vector representation (without ghost points as part of vector)
815 
816   Level: advanced
817 
818   Notes:
819   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
820   of the vector.
821 
822   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
823 
824 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
825           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
826           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
827           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
828 
829 @*/
830 PetscErrorCode VecCreateGhost(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
831 {
832   PetscFunctionBegin;
833   PetscCall(VecCreateGhostWithArray(comm, n, N, nghost, ghosts, NULL, vv));
834   PetscFunctionReturn(PETSC_SUCCESS);
835 }
836 
837 /*@
838   VecMPISetGhost - Sets the ghost points for an MPI ghost vector
839 
840   Collective
841 
842   Input Parameters:
843 + vv     - the MPI vector
844 . nghost - number of local ghost points
845 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
846 
847   Level: advanced
848 
849   Notes:
850   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
851   of the vector.
852 
853   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
854 
855   You must call this AFTER you have set the type of the vector (with` VecSetType()`) and the size (with `VecSetSizes()`).
856 
857 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
858           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
859           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
860           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`
861 @*/
862 PetscErrorCode VecMPISetGhost(Vec vv, PetscInt nghost, const PetscInt ghosts[])
863 {
864   PetscBool flg;
865 
866   PetscFunctionBegin;
867   PetscCall(PetscObjectTypeCompare((PetscObject)vv, VECMPI, &flg));
868   /* if already fully existent VECMPI then basically destroy it and rebuild with ghosting */
869   if (flg) {
870     PetscInt     n, N;
871     Vec_MPI     *w;
872     PetscScalar *larray;
873     IS           from, to;
874     MPI_Comm     comm;
875 
876     PetscCall(PetscObjectGetComm((PetscObject)vv, &comm));
877     n = vv->map->n;
878     N = vv->map->N;
879     PetscUseTypeMethod(vv, destroy);
880     PetscCall(VecSetSizes(vv, n, N));
881     PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, nghost, NULL));
882     w = (Vec_MPI *)(vv)->data;
883     /* Create local representation */
884     PetscCall(VecGetArray(vv, &larray));
885     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
886     PetscCall(VecRestoreArray(vv, &larray));
887 
888     /*
889      Create scatter context for scattering (updating) ghost values
890      */
891     PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
892     PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
893     PetscCall(VecScatterCreate(vv, from, w->localrep, to, &w->localupdate));
894     PetscCall(ISDestroy(&to));
895 
896     w->ghost                         = from;
897     vv->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
898   } else {
899     PetscCheck(vv->ops->create != VecCreate_MPI, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set local or global size before setting ghosting");
900     PetscCheck(((PetscObject)vv)->type_name, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set type to VECMPI before ghosting");
901   }
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 /* ------------------------------------------------------------------------------------------*/
906 /*@C
907   VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor;
908   the caller allocates the array space. Indices in the ghost region are based on blocks.
909 
910   Collective
911 
912   Input Parameters:
913 + comm   - the MPI communicator to use
914 . bs     - block size
915 . n      - local vector length
916 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
917 . nghost - number of local ghost blocks
918 . ghosts - global indices of ghost blocks (or `NULL` if not needed), counts are by block not by index, these do not need to be in increasing order (sorted)
919 - array  - the space to store the vector values (as long as `n + nghost*bs`)
920 
921   Output Parameter:
922 . vv - the global vector representation (without ghost points as part of vector)
923 
924   Level: advanced
925 
926   Notes:
927   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
928   of the vector.
929 
930   n is the local vector size (total local size not the number of blocks) while nghost
931   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
932   portion is bs*nghost
933 
934 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
935           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
936           `VecCreateGhostWithArray()`, `VecCreateGhostBlock()`
937 @*/
938 PetscErrorCode VecCreateGhostBlockWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
939 {
940   Vec_MPI               *w;
941   PetscScalar           *larray;
942   IS                     from, to;
943   ISLocalToGlobalMapping ltog;
944   PetscInt               rstart, i, nb, *indices;
945 
946   PetscFunctionBegin;
947   *vv = NULL;
948 
949   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
950   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
951   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
952   PetscCheck(n % bs == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local size must be a multiple of block size");
953   PetscCall(PetscSplitOwnership(comm, &n, &N));
954   /* Create global representation */
955   PetscCall(VecCreate(comm, vv));
956   PetscCall(VecSetSizes(*vv, n, N));
957   PetscCall(VecSetBlockSize(*vv, bs));
958   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost * bs, array));
959   w = (Vec_MPI *)(*vv)->data;
960   /* Create local representation */
961   PetscCall(VecGetArray(*vv, &larray));
962   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, bs, n + bs * nghost, larray, &w->localrep));
963   PetscCall(VecRestoreArray(*vv, &larray));
964 
965   /*
966        Create scatter context for scattering (updating) ghost values
967   */
968   PetscCall(ISCreateBlock(comm, bs, nghost, ghosts, PETSC_COPY_VALUES, &from));
969   PetscCall(ISCreateStride(PETSC_COMM_SELF, bs * nghost, n, 1, &to));
970   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
971   PetscCall(ISDestroy(&to));
972   PetscCall(ISDestroy(&from));
973 
974   /* set local to global mapping for ghosted vector */
975   nb = n / bs;
976   PetscCall(PetscMalloc1(nb + nghost, &indices));
977   PetscCall(VecGetOwnershipRange(*vv, &rstart, NULL));
978   rstart = rstart / bs;
979 
980   for (i = 0; i < nb; i++) indices[i] = rstart + i;
981   for (i = 0; i < nghost; i++) indices[nb + i] = ghosts[i];
982 
983   PetscCall(ISLocalToGlobalMappingCreate(comm, bs, nb + nghost, indices, PETSC_OWN_POINTER, &ltog));
984   PetscCall(VecSetLocalToGlobalMapping(*vv, ltog));
985   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
989 /*@
990   VecCreateGhostBlock - Creates a parallel vector with ghost padding on each processor.
991   The indicing of the ghost points is done with blocks.
992 
993   Collective
994 
995   Input Parameters:
996 + comm   - the MPI communicator to use
997 . bs     - the block size
998 . n      - local vector length
999 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
1000 . nghost - number of local ghost blocks
1001 - ghosts - global indices of ghost blocks, counts are by block, not by individual index, these do not need to be in increasing order (sorted)
1002 
1003   Output Parameter:
1004 . vv - the global vector representation (without ghost points as part of vector)
1005 
1006   Level: advanced
1007 
1008   Notes:
1009   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
1010   of the vector.
1011 
1012   `n` is the local vector size (total local size not the number of blocks) while `nghost`
1013   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
1014   portion is `bs*nghost`
1015 
1016 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
1017           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
1018           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecCreateGhostBlockWithArray()`
1019 @*/
1020 PetscErrorCode VecCreateGhostBlock(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
1021 {
1022   PetscFunctionBegin;
1023   PetscCall(VecCreateGhostBlockWithArray(comm, bs, n, N, nghost, ghosts, NULL, vv));
1024   PetscFunctionReturn(PETSC_SUCCESS);
1025 }
1026