xref: /petsc/src/vec/vec/impls/mpi/pbvec.c (revision 0d5ef98addb54ced821e7e5fade9ccac0ec8ccde)
1 /*
2    This file contains routines for Parallel vector operations.
3  */
4 #include <petscsys.h>
5 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
6 
7 PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec, PetscViewer);
8 
9 PetscErrorCode VecPlaceArray_MPI(Vec vin, const PetscScalar *a)
10 {
11   Vec_MPI *v = (Vec_MPI *)vin->data;
12 
13   PetscFunctionBegin;
14   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
15   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
16   v->array         = (PetscScalar *)a;
17   if (v->localrep) PetscCall(VecPlaceArray(v->localrep, a));
18   PetscFunctionReturn(PETSC_SUCCESS);
19 }
20 
21 PetscErrorCode VecDuplicate_MPI(Vec win, Vec *v)
22 {
23   Vec_MPI     *vw, *w = (Vec_MPI *)win->data;
24   PetscScalar *array;
25 
26   PetscFunctionBegin;
27   PetscCall(VecCreateWithLayout_Private(win->map, v));
28 
29   PetscCall(VecCreate_MPI_Private(*v, PETSC_TRUE, w->nghost, NULL));
30   vw           = (Vec_MPI *)(*v)->data;
31   (*v)->ops[0] = win->ops[0];
32 
33   /* save local representation of the parallel vector (and scatter) if it exists */
34   if (w->localrep) {
35     PetscCall(VecGetArray(*v, &array));
36     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, PetscAbs(win->map->bs), win->map->n + w->nghost, array, &vw->localrep));
37     vw->localrep->ops[0] = w->localrep->ops[0];
38     PetscCall(VecRestoreArray(*v, &array));
39 
40     vw->localupdate = w->localupdate;
41     if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
42 
43     vw->ghost = w->ghost;
44     if (vw->ghost) PetscCall(PetscObjectReference((PetscObject)vw->ghost));
45   }
46 
47   /* New vector should inherit stashing property of parent */
48   (*v)->stash.donotstash   = win->stash.donotstash;
49   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
50 
51   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
52   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
53 
54   (*v)->stash.bs  = win->stash.bs;
55   (*v)->bstash.bs = win->bstash.bs;
56   PetscFunctionReturn(PETSC_SUCCESS);
57 }
58 
59 static PetscErrorCode VecDuplicateVecs_MPI_GEMV(Vec w, PetscInt m, Vec *V[])
60 {
61   Vec_MPI *wmpi = (Vec_MPI *)w->data;
62 
63   PetscFunctionBegin;
64   // Currently only do GEMV for vectors without ghosts. Note w might be a VECMPI subclass object.
65   // This routine relies on the duplicate operation being VecDuplicate_MPI. If not, bail out to the default.
66   if (wmpi->localrep || w->ops->duplicate != VecDuplicate_MPI) {
67     w->ops->duplicatevecs = VecDuplicateVecs_Default;
68     PetscCall(VecDuplicateVecs(w, m, V));
69   } else {
70     PetscScalar *array;
71     PetscInt64   lda; // use 64-bit as we will do "m * lda"
72 
73     PetscCall(PetscMalloc1(m, V));
74     VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size
75 
76     PetscCall(PetscCalloc1(m * lda, &array));
77     for (PetscInt i = 0; i < m; i++) {
78       Vec v;
79       PetscCall(VecCreateMPIWithLayoutAndArray_Private(w->map, PetscSafePointerPlusOffset(array, i * lda), &v));
80       PetscCall(VecSetType(v, ((PetscObject)w)->type_name));
81       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
82       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
83       v->ops[0]             = w->ops[0];
84       v->stash.donotstash   = w->stash.donotstash;
85       v->stash.ignorenegidx = w->stash.ignorenegidx;
86       v->stash.bs           = w->stash.bs;
87       v->bstash.bs          = w->bstash.bs;
88       (*V)[i]               = v;
89     }
90     // So when the first vector is destroyed it will destroy the array
91     if (m) ((Vec_MPI *)(*V)[0]->data)->array_allocated = array;
92     // disable replacearray of the first vector, as freeing its memory also frees others in the group.
93     // But replacearray of others is ok, as they don't own their array.
94     if (m > 1) (*V)[0]->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
95   }
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 static PetscErrorCode VecSetOption_MPI(Vec V, VecOption op, PetscBool flag)
100 {
101   Vec_MPI *v = (Vec_MPI *)V->data;
102 
103   PetscFunctionBegin;
104   switch (op) {
105   case VEC_IGNORE_OFF_PROC_ENTRIES:
106     V->stash.donotstash = flag;
107     break;
108   case VEC_IGNORE_NEGATIVE_INDICES:
109     V->stash.ignorenegidx = flag;
110     break;
111   case VEC_SUBSET_OFF_PROC_ENTRIES:
112     v->assembly_subset = flag;              /* See the same logic in MatAssembly wrt MAT_SUBSET_OFF_PROC_ENTRIES */
113     if (!v->assembly_subset) {              /* User indicates "do not reuse the communication pattern" */
114       PetscCall(VecAssemblyReset_MPI(V));   /* Reset existing pattern to free memory */
115       v->first_assembly_done = PETSC_FALSE; /* Mark the first assembly is not done */
116     }
117     break;
118   }
119   PetscFunctionReturn(PETSC_SUCCESS);
120 }
121 
122 PetscErrorCode VecResetArray_MPI(Vec vin)
123 {
124   Vec_MPI *v = (Vec_MPI *)vin->data;
125 
126   PetscFunctionBegin;
127   v->array         = v->unplacedarray;
128   v->unplacedarray = NULL;
129   if (v->localrep) PetscCall(VecResetArray(v->localrep));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode VecAssemblySend_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
134 {
135   Vec                X   = (Vec)ctx;
136   Vec_MPI           *x   = (Vec_MPI *)X->data;
137   VecAssemblyHeader *hdr = (VecAssemblyHeader *)sdata;
138   PetscInt           bs  = X->map->bs;
139 
140   PetscFunctionBegin;
141   /* x->first_assembly_done indicates we are reusing a communication network. In that case, some
142      messages can be empty, but we have to send them this time if we sent them before because the
143      receiver is expecting them.
144    */
145   if (hdr->count || (x->first_assembly_done && x->sendptrs[rankid].ints)) {
146     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
147     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
148   }
149   if (hdr->bcount || (x->first_assembly_done && x->sendptrs[rankid].intb)) {
150     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
151     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
152   }
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 static PetscErrorCode VecAssemblyRecv_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
157 {
158   Vec                X   = (Vec)ctx;
159   Vec_MPI           *x   = (Vec_MPI *)X->data;
160   VecAssemblyHeader *hdr = (VecAssemblyHeader *)rdata;
161   PetscInt           bs  = X->map->bs;
162   VecAssemblyFrame  *frame;
163 
164   PetscFunctionBegin;
165   PetscCall(PetscSegBufferGet(x->segrecvframe, 1, &frame));
166 
167   if (hdr->count) {
168     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->count, &frame->ints));
169     PetscCallMPI(MPIU_Irecv(frame->ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
170     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->count, &frame->scalars));
171     PetscCallMPI(MPIU_Irecv(frame->scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
172     frame->pendings = 2;
173   } else {
174     frame->ints     = NULL;
175     frame->scalars  = NULL;
176     frame->pendings = 0;
177   }
178 
179   if (hdr->bcount) {
180     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->bcount, &frame->intb));
181     PetscCallMPI(MPIU_Irecv(frame->intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
182     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->bcount * bs, &frame->scalarb));
183     PetscCallMPI(MPIU_Irecv(frame->scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
184     frame->pendingb = 2;
185   } else {
186     frame->intb     = NULL;
187     frame->scalarb  = NULL;
188     frame->pendingb = 0;
189   }
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode VecAssemblyBegin_MPI_BTS(Vec X)
194 {
195   Vec_MPI    *x = (Vec_MPI *)X->data;
196   MPI_Comm    comm;
197   PetscMPIInt i;
198   PetscInt    j, jb, bs;
199 
200   PetscFunctionBegin;
201   if (X->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
202 
203   PetscCall(PetscObjectGetComm((PetscObject)X, &comm));
204   PetscCall(VecGetBlockSize(X, &bs));
205   if (PetscDefined(USE_DEBUG)) {
206     InsertMode addv;
207 
208     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&X->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
209     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
210   }
211   X->bstash.insertmode = X->stash.insertmode; /* Block stash implicitly tracks InsertMode of scalar stash */
212 
213   PetscCall(VecStashSortCompress_Private(&X->stash));
214   PetscCall(VecStashSortCompress_Private(&X->bstash));
215 
216   if (!x->sendranks) {
217     PetscMPIInt nowners, bnowners, *owners, *bowners;
218     PetscInt    ntmp;
219 
220     PetscCall(VecStashGetOwnerList_Private(&X->stash, X->map, &nowners, &owners));
221     PetscCall(VecStashGetOwnerList_Private(&X->bstash, X->map, &bnowners, &bowners));
222     PetscCall(PetscMergeMPIIntArray(nowners, owners, bnowners, bowners, &ntmp, &x->sendranks));
223     PetscCall(PetscMPIIntCast(ntmp, &x->nsendranks));
224     PetscCall(PetscFree(owners));
225     PetscCall(PetscFree(bowners));
226     PetscCall(PetscMalloc1(x->nsendranks, &x->sendhdr));
227     PetscCall(PetscCalloc1(x->nsendranks, &x->sendptrs));
228   }
229   for (i = 0, j = 0, jb = 0; i < x->nsendranks; i++) {
230     PetscMPIInt rank = x->sendranks[i];
231 
232     x->sendhdr[i].insertmode = X->stash.insertmode;
233     /* Initialize pointers for non-empty stashes the first time around.  Subsequent assemblies with
234      * VEC_SUBSET_OFF_PROC_ENTRIES will leave the old pointers (dangling because the stash has been collected) when
235      * there is nothing new to send, so that size-zero messages get sent instead. */
236     x->sendhdr[i].count = 0;
237     if (X->stash.n) {
238       x->sendptrs[i].ints    = &X->stash.idx[j];
239       x->sendptrs[i].scalars = &X->stash.array[j];
240       for (; j < X->stash.n && X->stash.idx[j] < X->map->range[rank + 1]; j++) x->sendhdr[i].count++;
241     }
242     x->sendhdr[i].bcount = 0;
243     if (X->bstash.n) {
244       x->sendptrs[i].intb    = &X->bstash.idx[jb];
245       x->sendptrs[i].scalarb = &X->bstash.array[jb * bs];
246       for (; jb < X->bstash.n && X->bstash.idx[jb] * bs < X->map->range[rank + 1]; jb++) x->sendhdr[i].bcount++;
247     }
248   }
249 
250   if (!x->segrecvint) PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &x->segrecvint));
251   if (!x->segrecvscalar) PetscCall(PetscSegBufferCreate(sizeof(PetscScalar), 1000, &x->segrecvscalar));
252   if (!x->segrecvframe) PetscCall(PetscSegBufferCreate(sizeof(VecAssemblyFrame), 50, &x->segrecvframe));
253   if (x->first_assembly_done) { /* this is not the first assembly */
254     PetscMPIInt tag[4];
255     for (i = 0; i < 4; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
256     for (i = 0; i < x->nsendranks; i++) PetscCall(VecAssemblySend_MPI_Private(comm, tag, i, x->sendranks[i], x->sendhdr + i, x->sendreqs + 4 * i, X));
257     for (i = 0; i < x->nrecvranks; i++) PetscCall(VecAssemblyRecv_MPI_Private(comm, tag, x->recvranks[i], x->recvhdr + i, x->recvreqs + 4 * i, X));
258     x->use_status = PETSC_TRUE;
259   } else { /* First time assembly */
260     PetscCall(PetscCommBuildTwoSidedFReq(comm, 3, MPIU_INT, x->nsendranks, x->sendranks, (PetscInt *)x->sendhdr, &x->nrecvranks, &x->recvranks, &x->recvhdr, 4, &x->sendreqs, &x->recvreqs, VecAssemblySend_MPI_Private, VecAssemblyRecv_MPI_Private, X));
261     x->use_status = PETSC_FALSE;
262   }
263 
264   /* The first_assembly_done flag is only meaningful when x->assembly_subset is set.
265      This line says when assembly_subset is set, then we mark that the first assembly is done.
266    */
267   x->first_assembly_done = x->assembly_subset;
268 
269   {
270     PetscInt nstash, reallocs;
271     PetscCall(VecStashGetInfo_Private(&X->stash, &nstash, &reallocs));
272     PetscCall(PetscInfo(X, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
273     PetscCall(VecStashGetInfo_Private(&X->bstash, &nstash, &reallocs));
274     PetscCall(PetscInfo(X, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
275   }
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X)
280 {
281   Vec_MPI          *x  = (Vec_MPI *)X->data;
282   PetscInt          bs = X->map->bs;
283   PetscMPIInt       npending, *some_indices, r;
284   MPI_Status       *some_statuses;
285   PetscScalar      *xarray;
286   VecAssemblyFrame *frame;
287 
288   PetscFunctionBegin;
289   if (X->stash.donotstash) {
290     X->stash.insertmode  = NOT_SET_VALUES;
291     X->bstash.insertmode = NOT_SET_VALUES;
292     PetscFunctionReturn(PETSC_SUCCESS);
293   }
294 
295   PetscCheck(x->segrecvframe, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing segrecvframe! Probably you forgot to call VecAssemblyBegin() first");
296   PetscCall(VecGetArray(X, &xarray));
297   PetscCall(PetscSegBufferExtractInPlace(x->segrecvframe, &frame));
298   PetscCall(PetscMalloc2(4 * x->nrecvranks, &some_indices, x->use_status ? 4 * x->nrecvranks : 0, &some_statuses));
299   for (r = 0, npending = 0; r < x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb;
300   while (npending > 0) {
301     PetscMPIInt ndone = 0, ii;
302     /* Filling MPI_Status fields requires some resources from the MPI library.  We skip it on the first assembly, or
303      * when VEC_SUBSET_OFF_PROC_ENTRIES has not been set, because we could exchange exact sizes in the initial
304      * rendezvous.  When the rendezvous is elided, however, we use MPI_Status to get actual message lengths, so that
305      * subsequent assembly can set a proper subset of the values. */
306     PetscCallMPI(MPI_Waitsome(4 * x->nrecvranks, x->recvreqs, &ndone, some_indices, x->use_status ? some_statuses : MPI_STATUSES_IGNORE));
307     for (ii = 0; ii < ndone; ii++) {
308       PetscInt     i     = some_indices[ii] / 4, j, k;
309       InsertMode   imode = (InsertMode)x->recvhdr[i].insertmode;
310       PetscInt    *recvint;
311       PetscScalar *recvscalar;
312       PetscBool    intmsg   = (PetscBool)(some_indices[ii] % 2 == 0);
313       PetscBool    blockmsg = (PetscBool)((some_indices[ii] % 4) / 2 == 1);
314 
315       npending--;
316       if (!blockmsg) { /* Scalar stash */
317         PetscMPIInt count;
318 
319         if (--frame[i].pendings > 0) continue;
320         if (x->use_status) {
321           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
322         } else {
323           PetscCall(PetscMPIIntCast(x->recvhdr[i].count, &count));
324         }
325         for (j = 0, recvint = frame[i].ints, recvscalar = frame[i].scalars; j < count; j++, recvint++) {
326           PetscInt loc = *recvint - X->map->rstart;
327 
328           PetscCheck(*recvint >= X->map->rstart && X->map->rend > *recvint, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Received vector entry %" PetscInt_FMT " out of local range [%" PetscInt_FMT ",%" PetscInt_FMT ")]", *recvint, X->map->rstart, X->map->rend);
329           switch (imode) {
330           case ADD_VALUES:
331             xarray[loc] += *recvscalar++;
332             break;
333           case INSERT_VALUES:
334             xarray[loc] = *recvscalar++;
335             break;
336           default:
337             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
338           }
339         }
340       } else { /* Block stash */
341         PetscMPIInt count;
342         if (--frame[i].pendingb > 0) continue;
343         if (x->use_status) {
344           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
345           if (!intmsg) count /= bs; /* Convert from number of scalars to number of blocks */
346         } else {
347           PetscCall(PetscMPIIntCast(x->recvhdr[i].bcount, &count));
348         }
349         for (j = 0, recvint = frame[i].intb, recvscalar = frame[i].scalarb; j < count; j++, recvint++) {
350           PetscInt loc = (*recvint) * bs - X->map->rstart;
351           switch (imode) {
352           case ADD_VALUES:
353             for (k = loc; k < loc + bs; k++) xarray[k] += *recvscalar++;
354             break;
355           case INSERT_VALUES:
356             for (k = loc; k < loc + bs; k++) xarray[k] = *recvscalar++;
357             break;
358           default:
359             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
360           }
361         }
362       }
363     }
364   }
365   PetscCall(VecRestoreArray(X, &xarray));
366   PetscCallMPI(MPI_Waitall(4 * x->nsendranks, x->sendreqs, MPI_STATUSES_IGNORE));
367   PetscCall(PetscFree2(some_indices, some_statuses));
368   if (x->assembly_subset) {
369     PetscCall(PetscSegBufferExtractInPlace(x->segrecvint, NULL));
370     PetscCall(PetscSegBufferExtractInPlace(x->segrecvscalar, NULL));
371   } else {
372     PetscCall(VecAssemblyReset_MPI(X));
373   }
374 
375   X->stash.insertmode  = NOT_SET_VALUES;
376   X->bstash.insertmode = NOT_SET_VALUES;
377   PetscCall(VecStashScatterEnd_Private(&X->stash));
378   PetscCall(VecStashScatterEnd_Private(&X->bstash));
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 PetscErrorCode VecAssemblyReset_MPI(Vec X)
383 {
384   Vec_MPI *x = (Vec_MPI *)X->data;
385 
386   PetscFunctionBegin;
387   PetscCall(PetscFree(x->sendreqs));
388   PetscCall(PetscFree(x->recvreqs));
389   PetscCall(PetscFree(x->sendranks));
390   PetscCall(PetscFree(x->recvranks));
391   PetscCall(PetscFree(x->sendhdr));
392   PetscCall(PetscFree(x->recvhdr));
393   PetscCall(PetscFree(x->sendptrs));
394   PetscCall(PetscSegBufferDestroy(&x->segrecvint));
395   PetscCall(PetscSegBufferDestroy(&x->segrecvscalar));
396   PetscCall(PetscSegBufferDestroy(&x->segrecvframe));
397   PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 
400 static PetscErrorCode VecSetFromOptions_MPI(Vec X, PetscOptionItems *PetscOptionsObject)
401 {
402 #if !defined(PETSC_HAVE_MPIUNI)
403   PetscBool flg = PETSC_FALSE, set;
404 
405   PetscFunctionBegin;
406   PetscOptionsHeadBegin(PetscOptionsObject, "VecMPI Options");
407   PetscCall(PetscOptionsBool("-vec_assembly_legacy", "Use MPI 1 version of assembly", "", flg, &flg, &set));
408   if (set) {
409     X->ops->assemblybegin = flg ? VecAssemblyBegin_MPI : VecAssemblyBegin_MPI_BTS;
410     X->ops->assemblyend   = flg ? VecAssemblyEnd_MPI : VecAssemblyEnd_MPI_BTS;
411   }
412   PetscOptionsHeadEnd();
413 #else
414   PetscFunctionBegin;
415   X->ops->assemblybegin = VecAssemblyBegin_MPI;
416   X->ops->assemblyend   = VecAssemblyEnd_MPI;
417 #endif
418   PetscFunctionReturn(PETSC_SUCCESS);
419 }
420 
421 static PetscErrorCode VecGetLocalToGlobalMapping_MPI_VecGhost(Vec X, ISLocalToGlobalMapping *ltg)
422 {
423   PetscInt       *indices, n, nghost, rstart, i;
424   IS              ghostis;
425   const PetscInt *ghostidx;
426 
427   PetscFunctionBegin;
428   if (X->map->mapping) {
429     *ltg = X->map->mapping;
430     PetscFunctionReturn(PETSC_SUCCESS);
431   }
432   PetscCall(VecGhostGetGhostIS(X, &ghostis));
433   PetscCall(ISGetLocalSize(ghostis, &nghost));
434   PetscCall(VecGetLocalSize(X, &n));
435   PetscCall(ISGetIndices(ghostis, &ghostidx));
436   /* set local to global mapping for ghosted vector */
437   PetscCall(PetscMalloc1(n + nghost, &indices));
438   PetscCall(VecGetOwnershipRange(X, &rstart, NULL));
439   for (i = 0; i < n; i++) indices[i] = rstart + i;
440   PetscCall(PetscArraycpy(indices + n, ghostidx, nghost));
441   PetscCall(ISRestoreIndices(ghostis, &ghostidx));
442   PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, n + nghost, indices, PETSC_OWN_POINTER, &X->map->mapping));
443   *ltg = X->map->mapping;
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 static struct _VecOps DvOps = {PetscDesignatedInitializer(duplicate, VecDuplicate_MPI), /* 1 */
448                                PetscDesignatedInitializer(duplicatevecs, VecDuplicateVecs_Default),
449                                PetscDesignatedInitializer(destroyvecs, VecDestroyVecs_Default),
450                                PetscDesignatedInitializer(dot, VecDot_MPI),
451                                PetscDesignatedInitializer(mdot, VecMDot_MPI),
452                                PetscDesignatedInitializer(norm, VecNorm_MPI),
453                                PetscDesignatedInitializer(tdot, VecTDot_MPI),
454                                PetscDesignatedInitializer(mtdot, VecMTDot_MPI),
455                                PetscDesignatedInitializer(scale, VecScale_Seq),
456                                PetscDesignatedInitializer(copy, VecCopy_Seq), /* 10 */
457                                PetscDesignatedInitializer(set, VecSet_Seq),
458                                PetscDesignatedInitializer(swap, VecSwap_Seq),
459                                PetscDesignatedInitializer(axpy, VecAXPY_Seq),
460                                PetscDesignatedInitializer(axpby, VecAXPBY_Seq),
461                                PetscDesignatedInitializer(maxpy, VecMAXPY_Seq),
462                                PetscDesignatedInitializer(aypx, VecAYPX_Seq),
463                                PetscDesignatedInitializer(waxpy, VecWAXPY_Seq),
464                                PetscDesignatedInitializer(axpbypcz, VecAXPBYPCZ_Seq),
465                                PetscDesignatedInitializer(pointwisemult, VecPointwiseMult_Seq),
466                                PetscDesignatedInitializer(pointwisedivide, VecPointwiseDivide_Seq),
467                                PetscDesignatedInitializer(setvalues, VecSetValues_MPI), /* 20 */
468                                PetscDesignatedInitializer(assemblybegin, VecAssemblyBegin_MPI_BTS),
469                                PetscDesignatedInitializer(assemblyend, VecAssemblyEnd_MPI_BTS),
470                                PetscDesignatedInitializer(getarray, NULL),
471                                PetscDesignatedInitializer(getsize, VecGetSize_MPI),
472                                PetscDesignatedInitializer(getlocalsize, VecGetSize_Seq),
473                                PetscDesignatedInitializer(restorearray, NULL),
474                                PetscDesignatedInitializer(max, VecMax_MPI),
475                                PetscDesignatedInitializer(min, VecMin_MPI),
476                                PetscDesignatedInitializer(setrandom, VecSetRandom_Seq),
477                                PetscDesignatedInitializer(setoption, VecSetOption_MPI),
478                                PetscDesignatedInitializer(setvaluesblocked, VecSetValuesBlocked_MPI),
479                                PetscDesignatedInitializer(destroy, VecDestroy_MPI),
480                                PetscDesignatedInitializer(view, VecView_MPI),
481                                PetscDesignatedInitializer(placearray, VecPlaceArray_MPI),
482                                PetscDesignatedInitializer(replacearray, VecReplaceArray_Seq),
483                                PetscDesignatedInitializer(dot_local, VecDot_Seq),
484                                PetscDesignatedInitializer(tdot_local, VecTDot_Seq),
485                                PetscDesignatedInitializer(norm_local, VecNorm_Seq),
486                                PetscDesignatedInitializer(mdot_local, VecMDot_Seq),
487                                PetscDesignatedInitializer(mtdot_local, VecMTDot_Seq),
488                                PetscDesignatedInitializer(load, VecLoad_Default),
489                                PetscDesignatedInitializer(reciprocal, VecReciprocal_Default),
490                                PetscDesignatedInitializer(conjugate, VecConjugate_Seq),
491                                PetscDesignatedInitializer(setlocaltoglobalmapping, NULL),
492                                PetscDesignatedInitializer(getlocaltoglobalmapping, VecGetLocalToGlobalMapping_MPI_VecGhost),
493                                PetscDesignatedInitializer(setvalueslocal, NULL),
494                                PetscDesignatedInitializer(resetarray, VecResetArray_MPI),
495                                PetscDesignatedInitializer(setfromoptions, VecSetFromOptions_MPI), /*set from options */
496                                PetscDesignatedInitializer(maxpointwisedivide, VecMaxPointwiseDivide_Seq),
497                                PetscDesignatedInitializer(pointwisemax, VecPointwiseMax_Seq),
498                                PetscDesignatedInitializer(pointwisemaxabs, VecPointwiseMaxAbs_Seq),
499                                PetscDesignatedInitializer(pointwisemin, VecPointwiseMin_Seq),
500                                PetscDesignatedInitializer(getvalues, VecGetValues_MPI),
501                                PetscDesignatedInitializer(sqrt, NULL),
502                                PetscDesignatedInitializer(abs, NULL),
503                                PetscDesignatedInitializer(exp, NULL),
504                                PetscDesignatedInitializer(log, NULL),
505                                PetscDesignatedInitializer(shift, NULL),
506                                PetscDesignatedInitializer(create, NULL), /* really? */
507                                PetscDesignatedInitializer(stridegather, VecStrideGather_Default),
508                                PetscDesignatedInitializer(stridescatter, VecStrideScatter_Default),
509                                PetscDesignatedInitializer(dotnorm2, NULL),
510                                PetscDesignatedInitializer(getsubvector, NULL),
511                                PetscDesignatedInitializer(restoresubvector, NULL),
512                                PetscDesignatedInitializer(getarrayread, NULL),
513                                PetscDesignatedInitializer(restorearrayread, NULL),
514                                PetscDesignatedInitializer(stridesubsetgather, VecStrideSubSetGather_Default),
515                                PetscDesignatedInitializer(stridesubsetscatter, VecStrideSubSetScatter_Default),
516                                PetscDesignatedInitializer(viewnative, VecView_MPI),
517                                PetscDesignatedInitializer(loadnative, NULL),
518                                PetscDesignatedInitializer(createlocalvector, NULL),
519                                PetscDesignatedInitializer(getlocalvector, NULL),
520                                PetscDesignatedInitializer(restorelocalvector, NULL),
521                                PetscDesignatedInitializer(getlocalvectorread, NULL),
522                                PetscDesignatedInitializer(restorelocalvectorread, NULL),
523                                PetscDesignatedInitializer(bindtocpu, NULL),
524                                PetscDesignatedInitializer(getarraywrite, NULL),
525                                PetscDesignatedInitializer(restorearraywrite, NULL),
526                                PetscDesignatedInitializer(getarrayandmemtype, NULL),
527                                PetscDesignatedInitializer(restorearrayandmemtype, NULL),
528                                PetscDesignatedInitializer(getarrayreadandmemtype, NULL),
529                                PetscDesignatedInitializer(restorearrayreadandmemtype, NULL),
530                                PetscDesignatedInitializer(getarraywriteandmemtype, NULL),
531                                PetscDesignatedInitializer(restorearraywriteandmemtype, NULL),
532                                PetscDesignatedInitializer(concatenate, NULL),
533                                PetscDesignatedInitializer(sum, NULL),
534                                PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_MPI),
535                                PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_MPI),
536                                PetscDesignatedInitializer(errorwnorm, NULL)};
537 
538 /*
539     VecCreate_MPI_Private - Basic create routine called by VecCreate_MPI() (i.e. VecCreateMPI()),
540     VecCreateMPIWithArray(), VecCreate_Shared() (i.e. VecCreateShared()), VecCreateGhost(),
541     VecDuplicate_MPI(), VecCreateGhostWithArray(), VecDuplicate_MPI(), and VecDuplicate_Shared()
542 
543     If alloc is true and array is NULL then this routine allocates the space, otherwise
544     no space is allocated.
545 */
546 PetscErrorCode VecCreate_MPI_Private(Vec v, PetscBool alloc, PetscInt nghost, const PetscScalar array[])
547 {
548   Vec_MPI  *s;
549   PetscBool mdot_use_gemv  = PETSC_TRUE;
550   PetscBool maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
551 
552   PetscFunctionBegin;
553   PetscCall(PetscNew(&s));
554   v->data   = (void *)s;
555   v->ops[0] = DvOps;
556 
557   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
558   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));
559 
560   // allocate multiple vectors together
561   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPI_GEMV;
562 
563   if (mdot_use_gemv) {
564     v->ops[0].mdot        = VecMDot_MPI_GEMV;
565     v->ops[0].mdot_local  = VecMDot_Seq_GEMV;
566     v->ops[0].mtdot       = VecMTDot_MPI_GEMV;
567     v->ops[0].mtdot_local = VecMTDot_Seq_GEMV;
568   }
569   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_Seq_GEMV;
570 
571   s->nghost      = nghost;
572   v->petscnative = PETSC_TRUE;
573   if (array) v->offloadmask = PETSC_OFFLOAD_CPU;
574 
575   PetscCall(PetscLayoutSetUp(v->map));
576 
577   s->array           = (PetscScalar *)array;
578   s->array_allocated = NULL;
579   if (alloc && !array) {
580     PetscInt n = v->map->n + nghost;
581     PetscCall(PetscCalloc1(n, &s->array));
582     s->array_allocated = s->array;
583     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_2], 0));
584     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_1], 0));
585     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_INFINITY], 0));
586   }
587 
588   /* By default parallel vectors do not have local representation */
589   s->localrep    = NULL;
590   s->localupdate = NULL;
591   s->ghost       = NULL;
592 
593   v->stash.insertmode  = NOT_SET_VALUES;
594   v->bstash.insertmode = NOT_SET_VALUES;
595   /* create the stashes. The block-size for bstash is set later when
596      VecSetValuesBlocked is called.
597   */
598   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), 1, &v->stash));
599   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), PetscAbs(v->map->bs), &v->bstash));
600 
601 #if defined(PETSC_HAVE_MATLAB)
602   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", VecMatlabEnginePut_Default));
603   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", VecMatlabEngineGet_Default));
604 #endif
605   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPI));
606   PetscFunctionReturn(PETSC_SUCCESS);
607 }
608 
609 /*
610   Create a VECMPI with the given layout and array
611 
612   Collective
613 
614   Input Parameter:
615 + map   - the layout
616 - array - the array on host
617 
618   Output Parameter:
619 . V  - The vector object
620 */
621 PetscErrorCode VecCreateMPIWithLayoutAndArray_Private(PetscLayout map, const PetscScalar array[], Vec *V)
622 {
623   PetscFunctionBegin;
624   PetscCall(VecCreateWithLayout_Private(map, V));
625   PetscCall(VecCreate_MPI_Private(*V, PETSC_FALSE, 0, array));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 /*MC
630    VECMPI - VECMPI = "mpi" - The basic parallel vector
631 
632    Options Database Key:
633 . -vec_type mpi - sets the vector type to `VECMPI` during a call to `VecSetFromOptions()`
634 
635   Level: beginner
636 
637 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
638 M*/
639 
640 PetscErrorCode VecCreate_MPI(Vec vv)
641 {
642   PetscFunctionBegin;
643   PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, 0, NULL));
644   PetscFunctionReturn(PETSC_SUCCESS);
645 }
646 
647 /*MC
648    VECSTANDARD = "standard" - A `VECSEQ` on one process and `VECMPI` on more than one process
649 
650    Options Database Key:
651 . -vec_type standard - sets a vector type to standard on calls to `VecSetFromOptions()`
652 
653   Level: beginner
654 
655 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreateMPI()`
656 M*/
657 
658 PETSC_EXTERN PetscErrorCode VecCreate_Standard(Vec v)
659 {
660   PetscMPIInt size;
661 
662   PetscFunctionBegin;
663   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
664   if (size == 1) {
665     PetscCall(VecSetType(v, VECSEQ));
666   } else {
667     PetscCall(VecSetType(v, VECMPI));
668   }
669   PetscFunctionReturn(PETSC_SUCCESS);
670 }
671 
672 /*@
673   VecCreateMPIWithArray - Creates a parallel, array-style vector,
674   where the user provides the array space to store the vector values.
675 
676   Collective
677 
678   Input Parameters:
679 + comm  - the MPI communicator to use
680 . bs    - block size, same meaning as `VecSetBlockSize()`
681 . n     - local vector length, cannot be `PETSC_DECIDE`
682 . N     - global vector length (or `PETSC_DETERMINE` to have calculated)
683 - array - the user provided array to store the vector values
684 
685   Output Parameter:
686 . vv - the vector
687 
688   Level: intermediate
689 
690   Notes:
691   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the
692   same type as an existing vector.
693 
694   If the user-provided array is `NULL`, then `VecPlaceArray()` can be used
695   at a later stage to SET the array for storing the vector values.
696 
697   PETSc does NOT free `array` when the vector is destroyed via `VecDestroy()`.
698 
699   The user should not free `array` until the vector is destroyed.
700 
701 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
702           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
703 @*/
704 PetscErrorCode VecCreateMPIWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar array[], Vec *vv)
705 {
706   PetscFunctionBegin;
707   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
708   PetscCall(PetscSplitOwnership(comm, &n, &N));
709   PetscCall(VecCreate(comm, vv));
710   PetscCall(VecSetSizes(*vv, n, N));
711   PetscCall(VecSetBlockSize(*vv, bs));
712   PetscCall(VecCreate_MPI_Private(*vv, PETSC_FALSE, 0, array));
713   PetscFunctionReturn(PETSC_SUCCESS);
714 }
715 
716 /*@
717   VecCreateGhostWithArray - Creates a parallel vector with ghost padding on each processor;
718   the caller allocates the array space.
719 
720   Collective
721 
722   Input Parameters:
723 + comm   - the MPI communicator to use
724 . n      - local vector length
725 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
726 . nghost - number of local ghost points
727 . ghosts - global indices of ghost points (or `NULL` if not needed), these do not need to be in increasing order (sorted)
728 - array  - the space to store the vector values (as long as n + nghost)
729 
730   Output Parameter:
731 . vv - the global vector representation (without ghost points as part of vector)
732 
733   Level: advanced
734 
735   Notes:
736   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
737   of the vector.
738 
739   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
740 
741 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
742           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
743           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
744 @*/
745 PetscErrorCode VecCreateGhostWithArray(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
746 {
747   Vec_MPI     *w;
748   PetscScalar *larray;
749   IS           from, to;
750 
751   PetscFunctionBegin;
752   *vv = NULL;
753   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
754   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
755   PetscCall(PetscSplitOwnership(comm, &n, &N));
756   /* Create global representation */
757   PetscCall(VecCreate(comm, vv));
758   PetscCall(VecSetSizes(*vv, n, N));
759   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost, array));
760   w = (Vec_MPI *)(*vv)->data;
761   /* Create local representation */
762   PetscCall(VecGetArray(*vv, &larray));
763   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
764   PetscCall(VecRestoreArray(*vv, &larray));
765 
766   /*
767        Create scatter context for scattering (updating) ghost values
768   */
769   PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
770   PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
771   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
772   PetscCall(ISDestroy(&to));
773 
774   w->ghost                            = from;
775   (*vv)->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
776   PetscFunctionReturn(PETSC_SUCCESS);
777 }
778 
779 /*@
780   VecGhostGetGhostIS - Return ghosting indices of a ghost vector
781 
782   Input Parameters:
783 . X - ghost vector
784 
785   Output Parameter:
786 . ghost - ghosting indices
787 
788   Level: beginner
789 
790 .seealso: `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`
791 @*/
792 PetscErrorCode VecGhostGetGhostIS(Vec X, IS *ghost)
793 {
794   Vec_MPI  *w;
795   PetscBool flg;
796 
797   PetscFunctionBegin;
798   PetscValidType(X, 1);
799   PetscAssertPointer(ghost, 2);
800   PetscCall(PetscObjectTypeCompare((PetscObject)X, VECMPI, &flg));
801   PetscCheck(flg, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "VecGhostGetGhostIS was not supported for vec type %s", ((PetscObject)X)->type_name);
802   w      = (Vec_MPI *)X->data;
803   *ghost = w->ghost;
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 /*@
808   VecCreateGhost - Creates a parallel vector with ghost padding on each processor.
809 
810   Collective
811 
812   Input Parameters:
813 + comm   - the MPI communicator to use
814 . n      - local vector length
815 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
816 . nghost - number of local ghost points
817 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
818 
819   Output Parameter:
820 . vv - the global vector representation (without ghost points as part of vector)
821 
822   Level: advanced
823 
824   Notes:
825   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
826   of the vector.
827 
828   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
829 
830 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
831           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
832           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
833           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
834 
835 @*/
836 PetscErrorCode VecCreateGhost(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
837 {
838   PetscFunctionBegin;
839   PetscCall(VecCreateGhostWithArray(comm, n, N, nghost, ghosts, NULL, vv));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 /*@
844   VecMPISetGhost - Sets the ghost points for an MPI ghost vector
845 
846   Collective
847 
848   Input Parameters:
849 + vv     - the MPI vector
850 . nghost - number of local ghost points
851 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
852 
853   Level: advanced
854 
855   Notes:
856   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
857   of the vector.
858 
859   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
860 
861   You must call this AFTER you have set the type of the vector (with` VecSetType()`) and the size (with `VecSetSizes()`).
862 
863 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
864           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
865           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
866           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`
867 @*/
868 PetscErrorCode VecMPISetGhost(Vec vv, PetscInt nghost, const PetscInt ghosts[])
869 {
870   PetscBool flg;
871 
872   PetscFunctionBegin;
873   PetscCall(PetscObjectTypeCompare((PetscObject)vv, VECMPI, &flg));
874   /* if already fully existent VECMPI then basically destroy it and rebuild with ghosting */
875   if (flg) {
876     PetscInt     n, N;
877     Vec_MPI     *w;
878     PetscScalar *larray;
879     IS           from, to;
880     MPI_Comm     comm;
881 
882     PetscCall(PetscObjectGetComm((PetscObject)vv, &comm));
883     n = vv->map->n;
884     N = vv->map->N;
885     PetscUseTypeMethod(vv, destroy);
886     PetscCall(VecSetSizes(vv, n, N));
887     PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, nghost, NULL));
888     w = (Vec_MPI *)vv->data;
889     /* Create local representation */
890     PetscCall(VecGetArray(vv, &larray));
891     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
892     PetscCall(VecRestoreArray(vv, &larray));
893 
894     /*
895      Create scatter context for scattering (updating) ghost values
896      */
897     PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
898     PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
899     PetscCall(VecScatterCreate(vv, from, w->localrep, to, &w->localupdate));
900     PetscCall(ISDestroy(&to));
901 
902     w->ghost                         = from;
903     vv->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
904   } else {
905     PetscCheck(vv->ops->create != VecCreate_MPI, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set local or global size before setting ghosting");
906     PetscCheck(((PetscObject)vv)->type_name, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set type to VECMPI before ghosting");
907   }
908   PetscFunctionReturn(PETSC_SUCCESS);
909 }
910 
911 /*@
912   VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor;
913   the caller allocates the array space. Indices in the ghost region are based on blocks.
914 
915   Collective
916 
917   Input Parameters:
918 + comm   - the MPI communicator to use
919 . bs     - block size
920 . n      - local vector length
921 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
922 . nghost - number of local ghost blocks
923 . ghosts - global indices of ghost blocks (or `NULL` if not needed), counts are by block not by index, these do not need to be in increasing order (sorted)
924 - array  - the space to store the vector values (as long as `n + nghost*bs`)
925 
926   Output Parameter:
927 . vv - the global vector representation (without ghost points as part of vector)
928 
929   Level: advanced
930 
931   Notes:
932   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
933   of the vector.
934 
935   n is the local vector size (total local size not the number of blocks) while nghost
936   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
937   portion is bs*nghost
938 
939 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
940           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
941           `VecCreateGhostWithArray()`, `VecCreateGhostBlock()`
942 @*/
943 PetscErrorCode VecCreateGhostBlockWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
944 {
945   Vec_MPI               *w;
946   PetscScalar           *larray;
947   IS                     from, to;
948   ISLocalToGlobalMapping ltog;
949   PetscInt               rstart, i, nb, *indices;
950 
951   PetscFunctionBegin;
952   *vv = NULL;
953 
954   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
955   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
956   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
957   PetscCheck(n % bs == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local size must be a multiple of block size");
958   PetscCall(PetscSplitOwnership(comm, &n, &N));
959   /* Create global representation */
960   PetscCall(VecCreate(comm, vv));
961   PetscCall(VecSetSizes(*vv, n, N));
962   PetscCall(VecSetBlockSize(*vv, bs));
963   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost * bs, array));
964   w = (Vec_MPI *)(*vv)->data;
965   /* Create local representation */
966   PetscCall(VecGetArray(*vv, &larray));
967   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, bs, n + bs * nghost, larray, &w->localrep));
968   PetscCall(VecRestoreArray(*vv, &larray));
969 
970   /*
971        Create scatter context for scattering (updating) ghost values
972   */
973   PetscCall(ISCreateBlock(comm, bs, nghost, ghosts, PETSC_COPY_VALUES, &from));
974   PetscCall(ISCreateStride(PETSC_COMM_SELF, bs * nghost, n, 1, &to));
975   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
976   PetscCall(ISDestroy(&to));
977   PetscCall(ISDestroy(&from));
978 
979   /* set local to global mapping for ghosted vector */
980   nb = n / bs;
981   PetscCall(PetscMalloc1(nb + nghost, &indices));
982   PetscCall(VecGetOwnershipRange(*vv, &rstart, NULL));
983   rstart = rstart / bs;
984 
985   for (i = 0; i < nb; i++) indices[i] = rstart + i;
986   for (i = 0; i < nghost; i++) indices[nb + i] = ghosts[i];
987 
988   PetscCall(ISLocalToGlobalMappingCreate(comm, bs, nb + nghost, indices, PETSC_OWN_POINTER, &ltog));
989   PetscCall(VecSetLocalToGlobalMapping(*vv, ltog));
990   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
991   PetscFunctionReturn(PETSC_SUCCESS);
992 }
993 
994 /*@
995   VecCreateGhostBlock - Creates a parallel vector with ghost padding on each processor.
996   The indicing of the ghost points is done with blocks.
997 
998   Collective
999 
1000   Input Parameters:
1001 + comm   - the MPI communicator to use
1002 . bs     - the block size
1003 . n      - local vector length
1004 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
1005 . nghost - number of local ghost blocks
1006 - ghosts - global indices of ghost blocks, counts are by block, not by individual index, these do not need to be in increasing order (sorted)
1007 
1008   Output Parameter:
1009 . vv - the global vector representation (without ghost points as part of vector)
1010 
1011   Level: advanced
1012 
1013   Notes:
1014   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
1015   of the vector.
1016 
1017   `n` is the local vector size (total local size not the number of blocks) while `nghost`
1018   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
1019   portion is `bs*nghost`
1020 
1021 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
1022           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
1023           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecCreateGhostBlockWithArray()`
1024 @*/
1025 PetscErrorCode VecCreateGhostBlock(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
1026 {
1027   PetscFunctionBegin;
1028   PetscCall(VecCreateGhostBlockWithArray(comm, bs, n, N, nghost, ghosts, NULL, vv));
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031