xref: /petsc/src/vec/vec/impls/mpi/pbvec.c (revision b89a13d22df4882bcbf047fcff9c1fc5e2981141)
1 /*
2    This file contains routines for Parallel vector operations.
3  */
4 #include <petscsys.h>
5 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
6 
7 PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec, PetscViewer);
8 
9 PetscErrorCode VecPlaceArray_MPI(Vec vin, const PetscScalar *a)
10 {
11   Vec_MPI *v = (Vec_MPI *)vin->data;
12 
13   PetscFunctionBegin;
14   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
15   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
16   v->array         = (PetscScalar *)a;
17   if (v->localrep) PetscCall(VecPlaceArray(v->localrep, a));
18   PetscFunctionReturn(PETSC_SUCCESS);
19 }
20 
21 // Duplicate a vector with the provided array on host (if not NULL) for the new vector.
22 // In that case, the array should be long enough to take into account ghost points if any.
23 // If array is NULL, the routine will allocate memory itself.
24 PetscErrorCode VecDuplicateWithArray_MPI(Vec win, const PetscScalar *array, Vec *v)
25 {
26   Vec_MPI *vw, *w = (Vec_MPI *)win->data;
27 
28   PetscFunctionBegin;
29   PetscCall(VecCreateWithLayout_Private(win->map, v));
30 
31   PetscCall(VecCreate_MPI_Private(*v, PETSC_TRUE, w->nghost, array)); // array could be NULL
32   vw           = (Vec_MPI *)(*v)->data;
33   (*v)->ops[0] = win->ops[0];
34 
35   /* save local representation of the parallel vector (and scatter) if it exists */
36   if (w->localrep) {
37     PetscScalar *arr = ((Vec_MPI *)(*v)->data)->array;
38 
39     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, win->map->bs, win->map->n + w->nghost, arr, &vw->localrep));
40     vw->localrep->ops[0] = w->localrep->ops[0];
41 
42     vw->localupdate = w->localupdate;
43     if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
44 
45     vw->ghost = w->ghost;
46     if (vw->ghost) PetscCall(PetscObjectReference((PetscObject)vw->ghost));
47   }
48 
49   /* New vector should inherit stashing property of parent */
50   (*v)->stash.donotstash   = win->stash.donotstash;
51   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
52 
53   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
54   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
55 
56   (*v)->stash.bs  = win->stash.bs;
57   (*v)->bstash.bs = win->bstash.bs;
58   PetscFunctionReturn(PETSC_SUCCESS);
59 }
60 
61 PetscErrorCode VecDuplicate_MPI(Vec win, Vec *v)
62 {
63   PetscFunctionBegin;
64   PetscCall(VecDuplicateWithArray_MPI(win, NULL, v));
65   PetscFunctionReturn(PETSC_SUCCESS);
66 }
67 
68 static PetscErrorCode VecDuplicateVecs_MPI_GEMV(Vec w, PetscInt m, Vec *V[])
69 {
70   Vec_MPI *wmpi = (Vec_MPI *)w->data;
71 
72   PetscFunctionBegin;
73   // Currently only do GEMV for vectors without ghosts. Note w might be a VECMPI subclass object.
74   // This routine relies on the duplicate operation being VecDuplicate_MPI. If not, bail out to the default.
75   if (wmpi->localrep || w->ops->duplicate != VecDuplicate_MPI) {
76     w->ops->duplicatevecs = VecDuplicateVecs_Default;
77     PetscCall(VecDuplicateVecs(w, m, V));
78   } else {
79     PetscScalar *array;
80     PetscInt64   lda; // use 64-bit as we will do "m * lda"
81 
82     PetscCall(PetscMalloc1(m, V));
83     VecGetLocalSizeAligned(w, 64, &lda); // get in lda the 64-bytes aligned local size
84 
85     PetscCall(PetscCalloc1(m * lda, &array));
86     for (PetscInt i = 0; i < m; i++) {
87       Vec v;
88       PetscCall(VecCreateMPIWithLayoutAndArray_Private(w->map, PetscSafePointerPlusOffset(array, i * lda), &v));
89       PetscCall(VecSetType(v, ((PetscObject)w)->type_name));
90       PetscCall(PetscObjectListDuplicate(((PetscObject)w)->olist, &((PetscObject)v)->olist));
91       PetscCall(PetscFunctionListDuplicate(((PetscObject)w)->qlist, &((PetscObject)v)->qlist));
92       v->ops[0]             = w->ops[0];
93       v->stash.donotstash   = w->stash.donotstash;
94       v->stash.ignorenegidx = w->stash.ignorenegidx;
95       v->stash.bs           = w->stash.bs;
96       v->bstash.bs          = w->bstash.bs;
97       (*V)[i]               = v;
98     }
99     // So when the first vector is destroyed it will destroy the array
100     if (m) ((Vec_MPI *)(*V)[0]->data)->array_allocated = array;
101     // disable replacearray of the first vector, as freeing its memory also frees others in the group.
102     // But replacearray of others is ok, as they don't own their array.
103     if (m > 1) (*V)[0]->ops->replacearray = VecReplaceArray_Default_GEMV_Error;
104   }
105   PetscFunctionReturn(PETSC_SUCCESS);
106 }
107 
108 static PetscErrorCode VecSetOption_MPI(Vec V, VecOption op, PetscBool flag)
109 {
110   Vec_MPI *v = (Vec_MPI *)V->data;
111 
112   PetscFunctionBegin;
113   switch (op) {
114   case VEC_IGNORE_OFF_PROC_ENTRIES:
115     V->stash.donotstash = flag;
116     break;
117   case VEC_IGNORE_NEGATIVE_INDICES:
118     V->stash.ignorenegidx = flag;
119     break;
120   case VEC_SUBSET_OFF_PROC_ENTRIES:
121     v->assembly_subset = flag;              /* See the same logic in MatAssembly wrt MAT_SUBSET_OFF_PROC_ENTRIES */
122     if (!v->assembly_subset) {              /* User indicates "do not reuse the communication pattern" */
123       PetscCall(VecAssemblyReset_MPI(V));   /* Reset existing pattern to free memory */
124       v->first_assembly_done = PETSC_FALSE; /* Mark the first assembly is not done */
125     }
126     break;
127   }
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 PetscErrorCode VecResetArray_MPI(Vec vin)
132 {
133   Vec_MPI *v = (Vec_MPI *)vin->data;
134 
135   PetscFunctionBegin;
136   v->array         = v->unplacedarray;
137   v->unplacedarray = NULL;
138   if (v->localrep) PetscCall(VecResetArray(v->localrep));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 static PetscErrorCode VecAssemblySend_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
143 {
144   Vec                X   = (Vec)ctx;
145   Vec_MPI           *x   = (Vec_MPI *)X->data;
146   VecAssemblyHeader *hdr = (VecAssemblyHeader *)sdata;
147   PetscInt           bs  = X->map->bs;
148 
149   PetscFunctionBegin;
150   /* x->first_assembly_done indicates we are reusing a communication network. In that case, some
151      messages can be empty, but we have to send them this time if we sent them before because the
152      receiver is expecting them.
153    */
154   if (hdr->count || (x->first_assembly_done && x->sendptrs[rankid].ints)) {
155     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
156     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
157   }
158   if (hdr->bcount || (x->first_assembly_done && x->sendptrs[rankid].intb)) {
159     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
160     PetscCallMPI(MPIU_Isend(x->sendptrs[rankid].scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
161   }
162   PetscFunctionReturn(PETSC_SUCCESS);
163 }
164 
165 static PetscErrorCode VecAssemblyRecv_MPI_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
166 {
167   Vec                X   = (Vec)ctx;
168   Vec_MPI           *x   = (Vec_MPI *)X->data;
169   VecAssemblyHeader *hdr = (VecAssemblyHeader *)rdata;
170   PetscInt           bs  = X->map->bs;
171   VecAssemblyFrame  *frame;
172 
173   PetscFunctionBegin;
174   PetscCall(PetscSegBufferGet(x->segrecvframe, 1, &frame));
175 
176   if (hdr->count) {
177     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->count, &frame->ints));
178     PetscCallMPI(MPIU_Irecv(frame->ints, hdr->count, MPIU_INT, rank, tag[0], comm, &req[0]));
179     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->count, &frame->scalars));
180     PetscCallMPI(MPIU_Irecv(frame->scalars, hdr->count, MPIU_SCALAR, rank, tag[1], comm, &req[1]));
181     frame->pendings = 2;
182   } else {
183     frame->ints     = NULL;
184     frame->scalars  = NULL;
185     frame->pendings = 0;
186   }
187 
188   if (hdr->bcount) {
189     PetscCall(PetscSegBufferGet(x->segrecvint, hdr->bcount, &frame->intb));
190     PetscCallMPI(MPIU_Irecv(frame->intb, hdr->bcount, MPIU_INT, rank, tag[2], comm, &req[2]));
191     PetscCall(PetscSegBufferGet(x->segrecvscalar, hdr->bcount * bs, &frame->scalarb));
192     PetscCallMPI(MPIU_Irecv(frame->scalarb, hdr->bcount * bs, MPIU_SCALAR, rank, tag[3], comm, &req[3]));
193     frame->pendingb = 2;
194   } else {
195     frame->intb     = NULL;
196     frame->scalarb  = NULL;
197     frame->pendingb = 0;
198   }
199   PetscFunctionReturn(PETSC_SUCCESS);
200 }
201 
202 static PetscErrorCode VecAssemblyBegin_MPI_BTS(Vec X)
203 {
204   Vec_MPI    *x = (Vec_MPI *)X->data;
205   MPI_Comm    comm;
206   PetscMPIInt i;
207   PetscInt    j, jb, bs;
208 
209   PetscFunctionBegin;
210   if (X->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
211 
212   PetscCall(PetscObjectGetComm((PetscObject)X, &comm));
213   PetscCall(VecGetBlockSize(X, &bs));
214   if (PetscDefined(USE_DEBUG)) {
215     InsertMode addv;
216 
217     PetscCallMPI(MPIU_Allreduce((PetscEnum *)&X->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
218     PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
219   }
220   X->bstash.insertmode = X->stash.insertmode; /* Block stash implicitly tracks InsertMode of scalar stash */
221 
222   PetscCall(VecStashSortCompress_Private(&X->stash));
223   PetscCall(VecStashSortCompress_Private(&X->bstash));
224 
225   if (!x->sendranks) {
226     PetscMPIInt nowners, bnowners, *owners, *bowners;
227     PetscInt    ntmp;
228 
229     PetscCall(VecStashGetOwnerList_Private(&X->stash, X->map, &nowners, &owners));
230     PetscCall(VecStashGetOwnerList_Private(&X->bstash, X->map, &bnowners, &bowners));
231     PetscCall(PetscMergeMPIIntArray(nowners, owners, bnowners, bowners, &ntmp, &x->sendranks));
232     PetscCall(PetscMPIIntCast(ntmp, &x->nsendranks));
233     PetscCall(PetscFree(owners));
234     PetscCall(PetscFree(bowners));
235     PetscCall(PetscMalloc1(x->nsendranks, &x->sendhdr));
236     PetscCall(PetscCalloc1(x->nsendranks, &x->sendptrs));
237   }
238   for (i = 0, j = 0, jb = 0; i < x->nsendranks; i++) {
239     PetscMPIInt rank = x->sendranks[i];
240 
241     x->sendhdr[i].insertmode = X->stash.insertmode;
242     /* Initialize pointers for non-empty stashes the first time around.  Subsequent assemblies with
243      * VEC_SUBSET_OFF_PROC_ENTRIES will leave the old pointers (dangling because the stash has been collected) when
244      * there is nothing new to send, so that size-zero messages get sent instead. */
245     x->sendhdr[i].count = 0;
246     if (X->stash.n) {
247       x->sendptrs[i].ints    = &X->stash.idx[j];
248       x->sendptrs[i].scalars = &X->stash.array[j];
249       for (; j < X->stash.n && X->stash.idx[j] < X->map->range[rank + 1]; j++) x->sendhdr[i].count++;
250     }
251     x->sendhdr[i].bcount = 0;
252     if (X->bstash.n) {
253       x->sendptrs[i].intb    = &X->bstash.idx[jb];
254       x->sendptrs[i].scalarb = &X->bstash.array[jb * bs];
255       for (; jb < X->bstash.n && X->bstash.idx[jb] * bs < X->map->range[rank + 1]; jb++) x->sendhdr[i].bcount++;
256     }
257   }
258 
259   if (!x->segrecvint) PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &x->segrecvint));
260   if (!x->segrecvscalar) PetscCall(PetscSegBufferCreate(sizeof(PetscScalar), 1000, &x->segrecvscalar));
261   if (!x->segrecvframe) PetscCall(PetscSegBufferCreate(sizeof(VecAssemblyFrame), 50, &x->segrecvframe));
262   if (x->first_assembly_done) { /* this is not the first assembly */
263     PetscMPIInt tag[4];
264     for (i = 0; i < 4; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
265     for (i = 0; i < x->nsendranks; i++) PetscCall(VecAssemblySend_MPI_Private(comm, tag, i, x->sendranks[i], x->sendhdr + i, x->sendreqs + 4 * i, X));
266     for (i = 0; i < x->nrecvranks; i++) PetscCall(VecAssemblyRecv_MPI_Private(comm, tag, x->recvranks[i], x->recvhdr + i, x->recvreqs + 4 * i, X));
267     x->use_status = PETSC_TRUE;
268   } else { /* First time assembly */
269     PetscCall(PetscCommBuildTwoSidedFReq(comm, 3, MPIU_INT, x->nsendranks, x->sendranks, (PetscInt *)x->sendhdr, &x->nrecvranks, &x->recvranks, &x->recvhdr, 4, &x->sendreqs, &x->recvreqs, VecAssemblySend_MPI_Private, VecAssemblyRecv_MPI_Private, X));
270     x->use_status = PETSC_FALSE;
271   }
272 
273   /* The first_assembly_done flag is only meaningful when x->assembly_subset is set.
274      This line says when assembly_subset is set, then we mark that the first assembly is done.
275    */
276   x->first_assembly_done = x->assembly_subset;
277 
278   {
279     PetscInt nstash, reallocs;
280     PetscCall(VecStashGetInfo_Private(&X->stash, &nstash, &reallocs));
281     PetscCall(PetscInfo(X, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
282     PetscCall(VecStashGetInfo_Private(&X->bstash, &nstash, &reallocs));
283     PetscCall(PetscInfo(X, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
284   }
285   PetscFunctionReturn(PETSC_SUCCESS);
286 }
287 
288 static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X)
289 {
290   Vec_MPI          *x  = (Vec_MPI *)X->data;
291   PetscInt          bs = X->map->bs;
292   PetscMPIInt       npending, *some_indices, r;
293   MPI_Status       *some_statuses;
294   PetscScalar      *xarray;
295   VecAssemblyFrame *frame;
296 
297   PetscFunctionBegin;
298   if (X->stash.donotstash) {
299     X->stash.insertmode  = NOT_SET_VALUES;
300     X->bstash.insertmode = NOT_SET_VALUES;
301     PetscFunctionReturn(PETSC_SUCCESS);
302   }
303 
304   PetscCheck(x->segrecvframe, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing segrecvframe! Probably you forgot to call VecAssemblyBegin() first");
305   PetscCall(VecGetArray(X, &xarray));
306   PetscCall(PetscSegBufferExtractInPlace(x->segrecvframe, &frame));
307   PetscCall(PetscMalloc2(4 * x->nrecvranks, &some_indices, (x->use_status ? (size_t)4 * x->nrecvranks : 0), &some_statuses));
308   for (r = 0, npending = 0; r < x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb;
309   while (npending > 0) {
310     PetscMPIInt ndone = 0, ii;
311     /* Filling MPI_Status fields requires some resources from the MPI library.  We skip it on the first assembly, or
312      * when VEC_SUBSET_OFF_PROC_ENTRIES has not been set, because we could exchange exact sizes in the initial
313      * rendezvous.  When the rendezvous is elided, however, we use MPI_Status to get actual message lengths, so that
314      * subsequent assembly can set a proper subset of the values. */
315     PetscCallMPI(MPI_Waitsome(4 * x->nrecvranks, x->recvreqs, &ndone, some_indices, x->use_status ? some_statuses : MPI_STATUSES_IGNORE));
316     for (ii = 0; ii < ndone; ii++) {
317       PetscInt     i     = some_indices[ii] / 4, j, k;
318       InsertMode   imode = (InsertMode)x->recvhdr[i].insertmode;
319       PetscInt    *recvint;
320       PetscScalar *recvscalar;
321       PetscBool    intmsg   = (PetscBool)(some_indices[ii] % 2 == 0);
322       PetscBool    blockmsg = (PetscBool)((some_indices[ii] % 4) / 2 == 1);
323 
324       npending--;
325       if (!blockmsg) { /* Scalar stash */
326         PetscMPIInt count;
327 
328         if (--frame[i].pendings > 0) continue;
329         if (x->use_status) {
330           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
331         } else {
332           PetscCall(PetscMPIIntCast(x->recvhdr[i].count, &count));
333         }
334         for (j = 0, recvint = frame[i].ints, recvscalar = frame[i].scalars; j < count; j++, recvint++) {
335           PetscInt loc = *recvint - X->map->rstart;
336 
337           PetscCheck(*recvint >= X->map->rstart && X->map->rend > *recvint, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Received vector entry %" PetscInt_FMT " out of local range [%" PetscInt_FMT ",%" PetscInt_FMT ")]", *recvint, X->map->rstart, X->map->rend);
338           switch (imode) {
339           case ADD_VALUES:
340             xarray[loc] += *recvscalar++;
341             break;
342           case INSERT_VALUES:
343             xarray[loc] = *recvscalar++;
344             break;
345           default:
346             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
347           }
348         }
349       } else { /* Block stash */
350         PetscMPIInt count;
351         if (--frame[i].pendingb > 0) continue;
352         if (x->use_status) {
353           PetscCallMPI(MPI_Get_count(&some_statuses[ii], intmsg ? MPIU_INT : MPIU_SCALAR, &count));
354           if (!intmsg) count /= bs; /* Convert from number of scalars to number of blocks */
355         } else {
356           PetscCall(PetscMPIIntCast(x->recvhdr[i].bcount, &count));
357         }
358         for (j = 0, recvint = frame[i].intb, recvscalar = frame[i].scalarb; j < count; j++, recvint++) {
359           PetscInt loc = (*recvint) * bs - X->map->rstart;
360           switch (imode) {
361           case ADD_VALUES:
362             for (k = loc; k < loc + bs; k++) xarray[k] += *recvscalar++;
363             break;
364           case INSERT_VALUES:
365             for (k = loc; k < loc + bs; k++) xarray[k] = *recvscalar++;
366             break;
367           default:
368             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", imode);
369           }
370         }
371       }
372     }
373   }
374   PetscCall(VecRestoreArray(X, &xarray));
375   PetscCallMPI(MPI_Waitall(4 * x->nsendranks, x->sendreqs, MPI_STATUSES_IGNORE));
376   PetscCall(PetscFree2(some_indices, some_statuses));
377   if (x->assembly_subset) {
378     PetscCall(PetscSegBufferExtractInPlace(x->segrecvint, NULL));
379     PetscCall(PetscSegBufferExtractInPlace(x->segrecvscalar, NULL));
380   } else {
381     PetscCall(VecAssemblyReset_MPI(X));
382   }
383 
384   X->stash.insertmode  = NOT_SET_VALUES;
385   X->bstash.insertmode = NOT_SET_VALUES;
386   PetscCall(VecStashScatterEnd_Private(&X->stash));
387   PetscCall(VecStashScatterEnd_Private(&X->bstash));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 PetscErrorCode VecAssemblyReset_MPI(Vec X)
392 {
393   Vec_MPI *x = (Vec_MPI *)X->data;
394 
395   PetscFunctionBegin;
396   PetscCall(PetscFree(x->sendreqs));
397   PetscCall(PetscFree(x->recvreqs));
398   PetscCall(PetscFree(x->sendranks));
399   PetscCall(PetscFree(x->recvranks));
400   PetscCall(PetscFree(x->sendhdr));
401   PetscCall(PetscFree(x->recvhdr));
402   PetscCall(PetscFree(x->sendptrs));
403   PetscCall(PetscSegBufferDestroy(&x->segrecvint));
404   PetscCall(PetscSegBufferDestroy(&x->segrecvscalar));
405   PetscCall(PetscSegBufferDestroy(&x->segrecvframe));
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408 
409 static PetscErrorCode VecSetFromOptions_MPI(Vec X, PetscOptionItems PetscOptionsObject)
410 {
411 #if !defined(PETSC_HAVE_MPIUNI)
412   PetscBool flg = PETSC_FALSE, set;
413 
414   PetscFunctionBegin;
415   PetscOptionsHeadBegin(PetscOptionsObject, "VecMPI Options");
416   PetscCall(PetscOptionsBool("-vec_assembly_legacy", "Use MPI 1 version of assembly", "", flg, &flg, &set));
417   if (set) {
418     X->ops->assemblybegin = flg ? VecAssemblyBegin_MPI : VecAssemblyBegin_MPI_BTS;
419     X->ops->assemblyend   = flg ? VecAssemblyEnd_MPI : VecAssemblyEnd_MPI_BTS;
420   }
421   PetscOptionsHeadEnd();
422 #else
423   PetscFunctionBegin;
424   X->ops->assemblybegin = VecAssemblyBegin_MPI;
425   X->ops->assemblyend   = VecAssemblyEnd_MPI;
426 #endif
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode VecGetLocalToGlobalMapping_MPI_VecGhost(Vec X, ISLocalToGlobalMapping *ltg)
431 {
432   PetscInt       *indices, n, nghost, rstart, i;
433   IS              ghostis;
434   const PetscInt *ghostidx;
435 
436   PetscFunctionBegin;
437   *ltg = X->map->mapping;
438   if (X->map->mapping) PetscFunctionReturn(PETSC_SUCCESS);
439   PetscCall(VecGhostGetGhostIS(X, &ghostis));
440   if (!ghostis) PetscFunctionReturn(PETSC_SUCCESS);
441   PetscCall(ISGetLocalSize(ghostis, &nghost));
442   PetscCall(VecGetLocalSize(X, &n));
443   PetscCall(ISGetIndices(ghostis, &ghostidx));
444   /* set local to global mapping for ghosted vector */
445   PetscCall(PetscMalloc1(n + nghost, &indices));
446   PetscCall(VecGetOwnershipRange(X, &rstart, NULL));
447   for (i = 0; i < n; i++) indices[i] = rstart + i;
448   PetscCall(PetscArraycpy(indices + n, ghostidx, nghost));
449   PetscCall(ISRestoreIndices(ghostis, &ghostidx));
450   PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, n + nghost, indices, PETSC_OWN_POINTER, &X->map->mapping));
451   *ltg = X->map->mapping;
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 static struct _VecOps DvOps = {
456   PetscDesignatedInitializer(duplicate, VecDuplicate_MPI), /* 1 */
457   PetscDesignatedInitializer(duplicatevecs, VecDuplicateVecs_Default),
458   PetscDesignatedInitializer(destroyvecs, VecDestroyVecs_Default),
459   PetscDesignatedInitializer(dot, VecDot_MPI),
460   PetscDesignatedInitializer(mdot, VecMDot_MPI),
461   PetscDesignatedInitializer(norm, VecNorm_MPI),
462   PetscDesignatedInitializer(tdot, VecTDot_MPI),
463   PetscDesignatedInitializer(mtdot, VecMTDot_MPI),
464   PetscDesignatedInitializer(scale, VecScale_Seq),
465   PetscDesignatedInitializer(copy, VecCopy_Seq), /* 10 */
466   PetscDesignatedInitializer(set, VecSet_Seq),
467   PetscDesignatedInitializer(swap, VecSwap_Seq),
468   PetscDesignatedInitializer(axpy, VecAXPY_Seq),
469   PetscDesignatedInitializer(axpby, VecAXPBY_Seq),
470   PetscDesignatedInitializer(maxpy, VecMAXPY_Seq),
471   PetscDesignatedInitializer(aypx, VecAYPX_Seq),
472   PetscDesignatedInitializer(waxpy, VecWAXPY_Seq),
473   PetscDesignatedInitializer(axpbypcz, VecAXPBYPCZ_Seq),
474   PetscDesignatedInitializer(pointwisemult, VecPointwiseMult_Seq),
475   PetscDesignatedInitializer(pointwisedivide, VecPointwiseDivide_Seq),
476   PetscDesignatedInitializer(setvalues, VecSetValues_MPI), /* 20 */
477   PetscDesignatedInitializer(assemblybegin, VecAssemblyBegin_MPI_BTS),
478   PetscDesignatedInitializer(assemblyend, VecAssemblyEnd_MPI_BTS),
479   PetscDesignatedInitializer(getarray, NULL),
480   PetscDesignatedInitializer(getsize, VecGetSize_MPI),
481   PetscDesignatedInitializer(getlocalsize, VecGetSize_Seq),
482   PetscDesignatedInitializer(restorearray, NULL),
483   PetscDesignatedInitializer(max, VecMax_MPI),
484   PetscDesignatedInitializer(min, VecMin_MPI),
485   PetscDesignatedInitializer(setrandom, VecSetRandom_Seq),
486   PetscDesignatedInitializer(setoption, VecSetOption_MPI),
487   PetscDesignatedInitializer(setvaluesblocked, VecSetValuesBlocked_MPI),
488   PetscDesignatedInitializer(destroy, VecDestroy_MPI),
489   PetscDesignatedInitializer(view, VecView_MPI),
490   PetscDesignatedInitializer(placearray, VecPlaceArray_MPI),
491   PetscDesignatedInitializer(replacearray, VecReplaceArray_Seq),
492   PetscDesignatedInitializer(dot_local, VecDot_Seq),
493   PetscDesignatedInitializer(tdot_local, VecTDot_Seq),
494   PetscDesignatedInitializer(norm_local, VecNorm_Seq),
495   PetscDesignatedInitializer(mdot_local, VecMDot_Seq),
496   PetscDesignatedInitializer(mtdot_local, VecMTDot_Seq),
497   PetscDesignatedInitializer(load, VecLoad_Default),
498   PetscDesignatedInitializer(reciprocal, VecReciprocal_Default),
499   PetscDesignatedInitializer(conjugate, VecConjugate_Seq),
500   PetscDesignatedInitializer(setlocaltoglobalmapping, NULL),
501   PetscDesignatedInitializer(getlocaltoglobalmapping, VecGetLocalToGlobalMapping_MPI_VecGhost),
502   PetscDesignatedInitializer(setvalueslocal, NULL),
503   PetscDesignatedInitializer(resetarray, VecResetArray_MPI),
504   PetscDesignatedInitializer(setfromoptions, VecSetFromOptions_MPI), /*set from options */
505   PetscDesignatedInitializer(maxpointwisedivide, VecMaxPointwiseDivide_MPI),
506   PetscDesignatedInitializer(pointwisemax, VecPointwiseMax_Seq),
507   PetscDesignatedInitializer(pointwisemaxabs, VecPointwiseMaxAbs_Seq),
508   PetscDesignatedInitializer(pointwisemin, VecPointwiseMin_Seq),
509   PetscDesignatedInitializer(getvalues, VecGetValues_MPI),
510   PetscDesignatedInitializer(sqrt, NULL),
511   PetscDesignatedInitializer(abs, NULL),
512   PetscDesignatedInitializer(exp, NULL),
513   PetscDesignatedInitializer(log, NULL),
514   PetscDesignatedInitializer(shift, NULL),
515   PetscDesignatedInitializer(create, NULL), /* really? */
516   PetscDesignatedInitializer(stridegather, VecStrideGather_Default),
517   PetscDesignatedInitializer(stridescatter, VecStrideScatter_Default),
518   PetscDesignatedInitializer(dotnorm2, NULL),
519   PetscDesignatedInitializer(getsubvector, NULL),
520   PetscDesignatedInitializer(restoresubvector, NULL),
521   PetscDesignatedInitializer(getarrayread, NULL),
522   PetscDesignatedInitializer(restorearrayread, NULL),
523   PetscDesignatedInitializer(stridesubsetgather, VecStrideSubSetGather_Default),
524   PetscDesignatedInitializer(stridesubsetscatter, VecStrideSubSetScatter_Default),
525   PetscDesignatedInitializer(viewnative, VecView_MPI),
526   PetscDesignatedInitializer(loadnative, NULL),
527   PetscDesignatedInitializer(createlocalvector, NULL),
528   PetscDesignatedInitializer(getlocalvector, NULL),
529   PetscDesignatedInitializer(restorelocalvector, NULL),
530   PetscDesignatedInitializer(getlocalvectorread, NULL),
531   PetscDesignatedInitializer(restorelocalvectorread, NULL),
532   PetscDesignatedInitializer(bindtocpu, NULL),
533   PetscDesignatedInitializer(getarraywrite, NULL),
534   PetscDesignatedInitializer(restorearraywrite, NULL),
535   PetscDesignatedInitializer(getarrayandmemtype, NULL),
536   PetscDesignatedInitializer(restorearrayandmemtype, NULL),
537   PetscDesignatedInitializer(getarrayreadandmemtype, NULL),
538   PetscDesignatedInitializer(restorearrayreadandmemtype, NULL),
539   PetscDesignatedInitializer(getarraywriteandmemtype, NULL),
540   PetscDesignatedInitializer(restorearraywriteandmemtype, NULL),
541   PetscDesignatedInitializer(concatenate, NULL),
542   PetscDesignatedInitializer(sum, NULL),
543   PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_MPI),
544   PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_MPI),
545   PetscDesignatedInitializer(errorwnorm, NULL),
546   PetscDesignatedInitializer(maxpby, NULL),
547 };
548 
549 /*
550     VecCreate_MPI_Private - Basic create routine called by VecCreate_MPI() (i.e. VecCreateMPI()),
551     VecCreateMPIWithArray(), VecCreate_Shared() (i.e. VecCreateShared()), VecCreateGhost(),
552     VecDuplicate_MPI(), VecCreateGhostWithArray(), VecDuplicate_MPI(), and VecDuplicate_Shared()
553 
554     If alloc is true and array is NULL then this routine allocates the space, otherwise
555     no space is allocated.
556 */
557 PetscErrorCode VecCreate_MPI_Private(Vec v, PetscBool alloc, PetscInt nghost, const PetscScalar array[])
558 {
559   Vec_MPI  *s;
560   PetscBool mdot_use_gemv  = PETSC_TRUE;
561   PetscBool maxpy_use_gemv = PETSC_FALSE; // default is false as we saw bad performance with vendors' GEMV with tall skinny matrices.
562 
563   PetscFunctionBegin;
564   PetscCall(PetscNew(&s));
565   v->data   = (void *)s;
566   v->ops[0] = DvOps;
567 
568   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_mdot_use_gemv", &mdot_use_gemv, NULL));
569   PetscCall(PetscOptionsGetBool(NULL, NULL, "-vec_maxpy_use_gemv", &maxpy_use_gemv, NULL));
570 
571   // allocate multiple vectors together
572   if (mdot_use_gemv || maxpy_use_gemv) v->ops[0].duplicatevecs = VecDuplicateVecs_MPI_GEMV;
573 
574   if (mdot_use_gemv) {
575     v->ops[0].mdot        = VecMDot_MPI_GEMV;
576     v->ops[0].mdot_local  = VecMDot_Seq_GEMV;
577     v->ops[0].mtdot       = VecMTDot_MPI_GEMV;
578     v->ops[0].mtdot_local = VecMTDot_Seq_GEMV;
579   }
580   if (maxpy_use_gemv) v->ops[0].maxpy = VecMAXPY_Seq_GEMV;
581 
582   s->nghost      = nghost;
583   v->petscnative = PETSC_TRUE;
584   if (array) v->offloadmask = PETSC_OFFLOAD_CPU;
585 
586   PetscCall(PetscLayoutSetUp(v->map));
587 
588   s->array           = (PetscScalar *)array;
589   s->array_allocated = NULL;
590   if (alloc && !array) {
591     PetscInt n = v->map->n + nghost;
592     PetscCall(PetscCalloc1(n, &s->array));
593     s->array_allocated = s->array;
594     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_2], 0));
595     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_1], 0));
596     PetscCall(PetscObjectComposedDataSetReal((PetscObject)v, NormIds[NORM_INFINITY], 0));
597   }
598 
599   /* By default parallel vectors do not have local representation */
600   s->localrep    = NULL;
601   s->localupdate = NULL;
602   s->ghost       = NULL;
603 
604   v->stash.insertmode  = NOT_SET_VALUES;
605   v->bstash.insertmode = NOT_SET_VALUES;
606   /* create the stashes. The block-size for bstash is set later when
607      VecSetValuesBlocked is called.
608   */
609   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), 1, &v->stash));
610   PetscCall(VecStashCreate_Private(PetscObjectComm((PetscObject)v), v->map->bs, &v->bstash));
611 
612 #if defined(PETSC_HAVE_MATLAB)
613   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", VecMatlabEnginePut_Default));
614   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", VecMatlabEngineGet_Default));
615 #endif
616   PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPI));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 /*
621   Create a VECMPI with the given layout and array
622 
623   Collective
624 
625   Input Parameter:
626 + map   - the layout
627 - array - the array on host
628 
629   Output Parameter:
630 . V  - The vector object
631 */
632 PetscErrorCode VecCreateMPIWithLayoutAndArray_Private(PetscLayout map, const PetscScalar array[], Vec *V)
633 {
634   PetscFunctionBegin;
635   PetscCall(VecCreateWithLayout_Private(map, V));
636   PetscCall(VecCreate_MPI_Private(*V, PETSC_FALSE, 0, array));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /*MC
641    VECMPI - VECMPI = "mpi" - The basic parallel vector
642 
643    Options Database Key:
644 . -vec_type mpi - sets the vector type to `VECMPI` during a call to `VecSetFromOptions()`
645 
646   Level: beginner
647 
648 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
649 M*/
650 
651 PetscErrorCode VecCreate_MPI(Vec vv)
652 {
653   PetscFunctionBegin;
654   PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, 0, NULL));
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*MC
659    VECSTANDARD = "standard" - A `VECSEQ` on one process and `VECMPI` on more than one process
660 
661    Options Database Key:
662 . -vec_type standard - sets a vector type to standard on calls to `VecSetFromOptions()`
663 
664   Level: beginner
665 
666 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreateMPI()`
667 M*/
668 
669 PETSC_EXTERN PetscErrorCode VecCreate_Standard(Vec v)
670 {
671   PetscMPIInt size;
672 
673   PetscFunctionBegin;
674   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
675   if (size == 1) {
676     PetscCall(VecSetType(v, VECSEQ));
677   } else {
678     PetscCall(VecSetType(v, VECMPI));
679   }
680   PetscFunctionReturn(PETSC_SUCCESS);
681 }
682 
683 /*@
684   VecCreateMPIWithArray - Creates a parallel, array-style vector,
685   where the user provides the array space to store the vector values.
686 
687   Collective
688 
689   Input Parameters:
690 + comm  - the MPI communicator to use
691 . bs    - block size, same meaning as `VecSetBlockSize()`
692 . n     - local vector length, cannot be `PETSC_DECIDE`
693 . N     - global vector length (or `PETSC_DETERMINE` to have calculated)
694 - array - the user provided array to store the vector values
695 
696   Output Parameter:
697 . vv - the vector
698 
699   Level: intermediate
700 
701   Notes:
702   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the
703   same type as an existing vector.
704 
705   If the user-provided array is `NULL`, then `VecPlaceArray()` can be used
706   at a later stage to SET the array for storing the vector values.
707 
708   PETSc does NOT free `array` when the vector is destroyed via `VecDestroy()`.
709 
710   The user should not free `array` until the vector is destroyed.
711 
712 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
713           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
714 @*/
715 PetscErrorCode VecCreateMPIWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar array[], Vec *vv)
716 {
717   PetscFunctionBegin;
718   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
719   PetscCall(PetscSplitOwnership(comm, &n, &N));
720   PetscCall(VecCreate(comm, vv));
721   PetscCall(VecSetSizes(*vv, n, N));
722   PetscCall(VecSetBlockSize(*vv, bs));
723   PetscCall(VecCreate_MPI_Private(*vv, PETSC_FALSE, 0, array));
724   PetscFunctionReturn(PETSC_SUCCESS);
725 }
726 
727 /*@
728   VecCreateGhostWithArray - Creates a parallel vector with ghost padding on each processor;
729   the caller allocates the array space.
730 
731   Collective
732 
733   Input Parameters:
734 + comm   - the MPI communicator to use
735 . n      - local vector length
736 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
737 . nghost - number of local ghost points
738 . ghosts - global indices of ghost points (or `NULL` if not needed), these do not need to be in increasing order (sorted)
739 - array  - the space to store the vector values (as long as n + nghost)
740 
741   Output Parameter:
742 . vv - the global vector representation (without ghost points as part of vector)
743 
744   Level: advanced
745 
746   Notes:
747   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
748   of the vector.
749 
750   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
751 
752 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
753           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
754           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
755 @*/
756 PetscErrorCode VecCreateGhostWithArray(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
757 {
758   Vec_MPI     *w;
759   PetscScalar *larray;
760   IS           from, to;
761 
762   PetscFunctionBegin;
763   *vv = NULL;
764   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
765   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
766   PetscCall(PetscSplitOwnership(comm, &n, &N));
767   /* Create global representation */
768   PetscCall(VecCreate(comm, vv));
769   PetscCall(VecSetSizes(*vv, n, N));
770   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost, array));
771   w = (Vec_MPI *)(*vv)->data;
772   /* Create local representation */
773   PetscCall(VecGetArray(*vv, &larray));
774   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
775   PetscCall(VecRestoreArray(*vv, &larray));
776 
777   /*
778        Create scatter context for scattering (updating) ghost values
779   */
780   PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
781   PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
782   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
783   PetscCall(ISDestroy(&to));
784 
785   w->ghost                            = from;
786   (*vv)->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 /*@
791   VecGhostGetGhostIS - Return ghosting indices of a ghost vector
792 
793   Input Parameters:
794 . X - ghost vector
795 
796   Output Parameter:
797 . ghost - ghosting indices
798 
799   Level: beginner
800 
801 .seealso: `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`
802 @*/
803 PetscErrorCode VecGhostGetGhostIS(Vec X, IS *ghost)
804 {
805   Vec_MPI  *w;
806   PetscBool flg;
807 
808   PetscFunctionBegin;
809   PetscValidType(X, 1);
810   PetscAssertPointer(ghost, 2);
811   PetscCall(PetscObjectTypeCompare((PetscObject)X, VECMPI, &flg));
812   PetscCheck(flg, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "VecGhostGetGhostIS was not supported for vec type %s", ((PetscObject)X)->type_name);
813   w      = (Vec_MPI *)X->data;
814   *ghost = w->ghost;
815   PetscFunctionReturn(PETSC_SUCCESS);
816 }
817 
818 /*@
819   VecCreateGhost - Creates a parallel vector with ghost padding on each processor.
820 
821   Collective
822 
823   Input Parameters:
824 + comm   - the MPI communicator to use
825 . n      - local vector length
826 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
827 . nghost - number of local ghost points
828 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
829 
830   Output Parameter:
831 . vv - the global vector representation (without ghost points as part of vector)
832 
833   Level: advanced
834 
835   Notes:
836   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
837   of the vector.
838 
839   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
840 
841 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
842           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
843           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
844           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`, `VecMPISetGhost()`
845 
846 @*/
847 PetscErrorCode VecCreateGhost(MPI_Comm comm, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
848 {
849   PetscFunctionBegin;
850   PetscCall(VecCreateGhostWithArray(comm, n, N, nghost, ghosts, NULL, vv));
851   PetscFunctionReturn(PETSC_SUCCESS);
852 }
853 
854 /*@
855   VecMPISetGhost - Sets the ghost points for an MPI ghost vector
856 
857   Collective
858 
859   Input Parameters:
860 + vv     - the MPI vector
861 . nghost - number of local ghost points
862 - ghosts - global indices of ghost points, these do not need to be in increasing order (sorted)
863 
864   Level: advanced
865 
866   Notes:
867   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
868   of the vector.
869 
870   This also automatically sets the `ISLocalToGlobalMapping()` for this vector.
871 
872   You must call this AFTER you have set the type of the vector (with` VecSetType()`) and the size (with `VecSetSizes()`).
873 
874 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
875           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`, `VecGhostUpdateBegin()`,
876           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecGhostUpdateEnd()`,
877           `VecCreateGhostBlock()`, `VecCreateGhostBlockWithArray()`
878 @*/
879 PetscErrorCode VecMPISetGhost(Vec vv, PetscInt nghost, const PetscInt ghosts[])
880 {
881   PetscBool flg;
882 
883   PetscFunctionBegin;
884   PetscCall(PetscObjectTypeCompare((PetscObject)vv, VECMPI, &flg));
885   /* if already fully existent VECMPI then basically destroy it and rebuild with ghosting */
886   if (flg) {
887     PetscInt     n, N;
888     Vec_MPI     *w;
889     PetscScalar *larray;
890     IS           from, to;
891     MPI_Comm     comm;
892 
893     PetscCall(PetscObjectGetComm((PetscObject)vv, &comm));
894     n = vv->map->n;
895     N = vv->map->N;
896     PetscUseTypeMethod(vv, destroy);
897     PetscCall(VecSetSizes(vv, n, N));
898     PetscCall(VecCreate_MPI_Private(vv, PETSC_TRUE, nghost, NULL));
899     w = (Vec_MPI *)vv->data;
900     /* Create local representation */
901     PetscCall(VecGetArray(vv, &larray));
902     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n + nghost, larray, &w->localrep));
903     PetscCall(VecRestoreArray(vv, &larray));
904 
905     /*
906      Create scatter context for scattering (updating) ghost values
907      */
908     PetscCall(ISCreateGeneral(comm, nghost, ghosts, PETSC_COPY_VALUES, &from));
909     PetscCall(ISCreateStride(PETSC_COMM_SELF, nghost, n, 1, &to));
910     PetscCall(VecScatterCreate(vv, from, w->localrep, to, &w->localupdate));
911     PetscCall(ISDestroy(&to));
912 
913     w->ghost                         = from;
914     vv->ops->getlocaltoglobalmapping = VecGetLocalToGlobalMapping_MPI_VecGhost;
915   } else {
916     PetscCheck(vv->ops->create != VecCreate_MPI, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set local or global size before setting ghosting");
917     PetscCheck(((PetscObject)vv)->type_name, PetscObjectComm((PetscObject)vv), PETSC_ERR_ARG_WRONGSTATE, "Must set type to VECMPI before ghosting");
918   }
919   PetscFunctionReturn(PETSC_SUCCESS);
920 }
921 
922 /*@
923   VecCreateGhostBlockWithArray - Creates a parallel vector with ghost padding on each processor;
924   the caller allocates the array space. Indices in the ghost region are based on blocks.
925 
926   Collective
927 
928   Input Parameters:
929 + comm   - the MPI communicator to use
930 . bs     - block size
931 . n      - local vector length
932 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
933 . nghost - number of local ghost blocks
934 . ghosts - global indices of ghost blocks (or `NULL` if not needed), counts are by block not by index, these do not need to be in increasing order (sorted)
935 - array  - the space to store the vector values (as long as `n + nghost*bs`)
936 
937   Output Parameter:
938 . vv - the global vector representation (without ghost points as part of vector)
939 
940   Level: advanced
941 
942   Notes:
943   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
944   of the vector.
945 
946   n is the local vector size (total local size not the number of blocks) while nghost
947   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
948   portion is bs*nghost
949 
950 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreate()`, `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
951           `VecCreateGhost()`, `VecCreateSeqWithArray()`, `VecCreateMPIWithArray()`,
952           `VecCreateGhostWithArray()`, `VecCreateGhostBlock()`
953 @*/
954 PetscErrorCode VecCreateGhostBlockWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], const PetscScalar array[], Vec *vv)
955 {
956   Vec_MPI               *w;
957   PetscScalar           *larray;
958   IS                     from, to;
959   ISLocalToGlobalMapping ltog;
960   PetscInt               rstart, i, nb, *indices;
961 
962   PetscFunctionBegin;
963   *vv = NULL;
964 
965   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size");
966   PetscCheck(nghost != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local ghost size");
967   PetscCheck(nghost >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ghost length must be >= 0");
968   PetscCheck(n % bs == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local size must be a multiple of block size");
969   PetscCall(PetscSplitOwnership(comm, &n, &N));
970   /* Create global representation */
971   PetscCall(VecCreate(comm, vv));
972   PetscCall(VecSetSizes(*vv, n, N));
973   PetscCall(VecSetBlockSize(*vv, bs));
974   PetscCall(VecCreate_MPI_Private(*vv, PETSC_TRUE, nghost * bs, array));
975   w = (Vec_MPI *)(*vv)->data;
976   /* Create local representation */
977   PetscCall(VecGetArray(*vv, &larray));
978   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, bs, n + bs * nghost, larray, &w->localrep));
979   PetscCall(VecRestoreArray(*vv, &larray));
980 
981   /*
982        Create scatter context for scattering (updating) ghost values
983   */
984   PetscCall(ISCreateBlock(comm, bs, nghost, ghosts, PETSC_COPY_VALUES, &from));
985   PetscCall(ISCreateStride(PETSC_COMM_SELF, bs * nghost, n, 1, &to));
986   PetscCall(VecScatterCreate(*vv, from, w->localrep, to, &w->localupdate));
987   PetscCall(ISDestroy(&to));
988   PetscCall(ISDestroy(&from));
989 
990   /* set local to global mapping for ghosted vector */
991   nb = n / bs;
992   PetscCall(PetscMalloc1(nb + nghost, &indices));
993   PetscCall(VecGetOwnershipRange(*vv, &rstart, NULL));
994   rstart = rstart / bs;
995 
996   for (i = 0; i < nb; i++) indices[i] = rstart + i;
997   for (i = 0; i < nghost; i++) indices[nb + i] = ghosts[i];
998 
999   PetscCall(ISLocalToGlobalMappingCreate(comm, bs, nb + nghost, indices, PETSC_OWN_POINTER, &ltog));
1000   PetscCall(VecSetLocalToGlobalMapping(*vv, ltog));
1001   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
1002   PetscFunctionReturn(PETSC_SUCCESS);
1003 }
1004 
1005 /*@
1006   VecCreateGhostBlock - Creates a parallel vector with ghost padding on each processor.
1007   The indicing of the ghost points is done with blocks.
1008 
1009   Collective
1010 
1011   Input Parameters:
1012 + comm   - the MPI communicator to use
1013 . bs     - the block size
1014 . n      - local vector length
1015 . N      - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
1016 . nghost - number of local ghost blocks
1017 - ghosts - global indices of ghost blocks, counts are by block, not by individual index, these do not need to be in increasing order (sorted)
1018 
1019   Output Parameter:
1020 . vv - the global vector representation (without ghost points as part of vector)
1021 
1022   Level: advanced
1023 
1024   Notes:
1025   Use `VecGhostGetLocalForm()` to access the local, ghosted representation
1026   of the vector.
1027 
1028   `n` is the local vector size (total local size not the number of blocks) while `nghost`
1029   is the number of blocks in the ghost portion, i.e. the number of elements in the ghost
1030   portion is `bs*nghost`
1031 
1032 .seealso: [](ch_vectors), `Vec`, `VecType`, `VecCreateSeq()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateMPI()`,
1033           `VecGhostGetLocalForm()`, `VecGhostRestoreLocalForm()`,
1034           `VecCreateGhostWithArray()`, `VecCreateMPIWithArray()`, `VecCreateGhostBlockWithArray()`
1035 @*/
1036 PetscErrorCode VecCreateGhostBlock(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, PetscInt nghost, const PetscInt ghosts[], Vec *vv)
1037 {
1038   PetscFunctionBegin;
1039   PetscCall(VecCreateGhostBlockWithArray(comm, bs, n, N, nghost, ghosts, NULL, vv));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042