xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1588c8cc7fcfc1d6558451356013fa3f4ce18577)
1 #ifndef lint
2 static char vcid[] = "$Id: $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 #define CHUNCKSIZE   100
10 /*
11    This is a simple minded stash. Do a linear search to determine if
12  in stash, if not add to end.
13 */
14 static int StashValues(Stash *stash,int row,int n, int *idxn,
15                        Scalar *values,InsertMode addv)
16 {
17   int    i,j,N = stash->n,found,*n_idx, *n_idy;
18   Scalar val,*n_array;
19 
20   for ( i=0; i<n; i++ ) {
21     found = 0;
22     val = *values++;
23     for ( j=0; j<N; j++ ) {
24       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
25         /* found a match */
26         if (addv == AddValues) stash->array[j] += val;
27         else stash->array[j] = val;
28         found = 1;
29         break;
30       }
31     }
32     if (!found) { /* not found so add to end */
33       if ( stash->n == stash->nmax ) {
34         /* allocate a larger stash */
35         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
36                                      2*sizeof(int) + sizeof(Scalar)));
37         CHKPTR(n_array);
38         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
39         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
40         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
41         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
42         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
43         if (stash->array) FREE(stash->array);
44         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
45         stash->nmax += CHUNCKSIZE;
46       }
47       stash->array[stash->n]   = val;
48       stash->idx[stash->n]     = row;
49       stash->idy[stash->n++]   = idxn[i];
50     }
51   }
52   return 0;
53 }
54 
55 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
56                             int *idxn,Scalar *v,InsertMode addv)
57 {
58   Matimpiaij *aij = (Matimpiaij *) mat->data;
59   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
60   int        cstart = aij->cstart, cend = aij->cend,row,col;
61 
62   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
63     SETERR(1,"You cannot mix inserts and adds");
64   }
65   aij->insertmode = addv;
66   for ( i=0; i<m; i++ ) {
67     if (idxm[i] < 0) SETERR(1,"Negative row index");
68     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
69     if (idxm[i] >= rstart && idxm[i] < rend) {
70       row = idxm[i] - rstart;
71       for ( j=0; j<n; j++ ) {
72         if (idxn[j] < 0) SETERR(1,"Negative column index");
73         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
74         if (idxn[j] >= cstart && idxn[j] < cend){
75           col = idxn[j] - cstart;
76           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
77         }
78         else {
79           if (aij->assembled) {
80             SETERR(1,"Cannot insert off diagonal block in already\
81                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
82                      if your need this feature");
83           }
84           col = idxn[j];
85           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
86         }
87       }
88     }
89     else {
90       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
91     }
92   }
93   return 0;
94 }
95 
96 /*
97     the assembly code is alot like the code for vectors, we should
98     sometime derive a single assembly code that can be used for
99     either case.
100 */
101 
102 static int MatiAIJBeginAssemble(Mat mat)
103 {
104   Matimpiaij  *aij = (Matimpiaij *) mat->data;
105   MPI_Comm    comm = mat->comm;
106   int         numtids = aij->numtids, *owners = aij->rowners;
107   int         mytid = aij->mytid;
108   MPI_Request *send_waits,*recv_waits;
109   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
110   int         tag = 50, *owner,*starts,count;
111   InsertMode  addv;
112   Scalar      *rvalues,*svalues;
113 
114   /* make sure all processors are either in INSERTMODE or ADDMODE */
115   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT,
116                 MPI_BOR,comm);
117   if (addv == (AddValues|InsertValues)) {
118     SETERR(1,"Some processors have inserted while others have added");
119   }
120   aij->insertmode = addv; /* in case this processor had no cache */
121 
122   /*  first count number of contributors to each processor */
123   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
124   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
125   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
126   for ( i=0; i<aij->stash.n; i++ ) {
127     idx = aij->stash.idx[i];
128     for ( j=0; j<numtids; j++ ) {
129       if (idx >= owners[j] && idx < owners[j+1]) {
130         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
131       }
132     }
133   }
134   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
135 
136   /* inform other processors of number of messages and max length*/
137   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
138   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
139   nreceives = work[mytid];
140   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
141   nmax = work[mytid];
142   FREE(work);
143 
144   /* post receives:
145        1) each message will consist of ordered pairs
146      (global index,value) we store the global index as a double
147      to simplify the message passing.
148        2) since we don't know how long each individual message is we
149      allocate the largest needed buffer for each receive. Potentially
150      this is a lot of wasted space.
151 
152 
153        This could be done better.
154   */
155   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar));
156   CHKPTR(rvalues);
157   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
158   CHKPTR(recv_waits);
159   for ( i=0; i<nreceives; i++ ) {
160     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
161               comm,recv_waits+i);
162   }
163 
164   /* do sends:
165       1) starts[i] gives the starting index in svalues for stuff going to
166          the ith processor
167   */
168   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
169   CHKPTR(svalues);
170   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
171   CHKPTR(send_waits);
172   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
173   starts[0] = 0;
174   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
175   for ( i=0; i<aij->stash.n; i++ ) {
176     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
177     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
178     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
179   }
180   FREE(owner);
181   starts[0] = 0;
182   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
183   count = 0;
184   for ( i=0; i<numtids; i++ ) {
185     if (procs[i]) {
186       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
187                 comm,send_waits+count++);
188     }
189   }
190   FREE(starts); FREE(nprocs);
191 
192   /* Free cache space */
193   aij->stash.nmax = aij->stash.n = 0;
194   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
195 
196   aij->svalues    = svalues;       aij->rvalues = rvalues;
197   aij->nsends     = nsends;         aij->nrecvs = nreceives;
198   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
199   aij->rmax       = nmax;
200 
201   return 0;
202 }
203 extern int MPIAIJSetUpMultiply(Mat);
204 
205 static int MatiAIJEndAssemble(Mat mat)
206 {
207   int        ierr;
208   Matimpiaij *aij = (Matimpiaij *) mat->data;
209 
210   MPI_Status  *send_status,recv_status;
211   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
212   int         row,col;
213   Scalar      *values,val;
214   InsertMode  addv = aij->insertmode;
215 
216   /*  wait on receives */
217   while (count) {
218     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
219     /* unpack receives into our local space */
220     values = aij->rvalues + 3*imdex*aij->rmax;
221     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
222     n = n/3;
223     for ( i=0; i<n; i++ ) {
224       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
225       col = (int) PETSCREAL(values[3*i+1]);
226       val = values[3*i+2];
227       if (col >= aij->cstart && col < aij->cend) {
228           col -= aij->cstart;
229         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
230       }
231       else {
232         if (aij->assembled) {
233           SETERR(1,"Cannot insert off diagonal block in already\
234                    assembled matrix. Contact petsc-maint@mcs.anl.gov\
235                    if your need this feature");
236         }
237         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
238       }
239     }
240     count--;
241   }
242   FREE(aij->recv_waits); FREE(aij->rvalues);
243 
244   /* wait on sends */
245   if (aij->nsends) {
246     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
247     CHKPTR(send_status);
248     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
249     FREE(send_status);
250   }
251   FREE(aij->send_waits); FREE(aij->svalues);
252 
253   aij->insertmode = NotSetValues;
254   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
255   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
256 
257 
258   if (aij->assembled) {
259     /* this is because the present implimentation doesn't support
260      inserting values off the diagonal block.*/
261     return 0;
262   }
263 
264   ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
265   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
266   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
267 
268   aij->assembled = 1;
269   return 0;
270 }
271 
272 static int MatiZero(Mat A)
273 {
274   Matimpiaij *l = (Matimpiaij *) A->data;
275 
276   MatZeroEntries(l->A); MatZeroEntries(l->B);
277   return 0;
278 }
279 
280 /* again this uses the same basic stratagy as in the assembly and
281    scatter create routines, we should try to do it systemamatically
282    if we can figure out the proper level of generality. */
283 
284 /* the code does not do the diagonal entries correctly unless the
285    matrix is square and the column and row owerships are identical.
286    This is a BUG. The only way to fix it seems to be to access
287    aij->A and aij->B directly and not through the MatZeroRows()
288    routine.
289 */
290 static int MatiZerorows(Mat A,IS is,Scalar *diag)
291 {
292   Matimpiaij     *l = (Matimpiaij *) A->data;
293   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
294   int            *procs,*nprocs,j,found,idx,nsends,*work;
295   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
296   int            *rvalues,tag = 67,count,base,slen,n,*source;
297   int            *lens,imdex,*lrows,*values;
298   MPI_Comm       comm = A->comm;
299   MPI_Request    *send_waits,*recv_waits;
300   MPI_Status     recv_status,*send_status;
301   IS             istmp;
302 
303   if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first");
304   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
305   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
306 
307   /*  first count number of contributors to each processor */
308   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
309   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
310   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
311   for ( i=0; i<N; i++ ) {
312     idx = rows[i];
313     found = 0;
314     for ( j=0; j<numtids; j++ ) {
315       if (idx >= owners[j] && idx < owners[j+1]) {
316         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
317       }
318     }
319     if (!found) SETERR(1,"Imdex out of range");
320   }
321   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
322 
323   /* inform other processors of number of messages and max length*/
324   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
325   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
326   nrecvs = work[mytid];
327   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
328   nmax = work[mytid];
329   FREE(work);
330 
331   /* post receives:   */
332   rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */
333   CHKPTR(rvalues);
334   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
335   CHKPTR(recv_waits);
336   for ( i=0; i<nrecvs; i++ ) {
337     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
338               comm,recv_waits+i);
339   }
340 
341   /* do sends:
342       1) starts[i] gives the starting index in svalues for stuff going to
343          the ith processor
344   */
345   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
346   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
347   CHKPTR(send_waits);
348   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
349   starts[0] = 0;
350   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
351   for ( i=0; i<N; i++ ) {
352     svalues[starts[owner[i]]++] = rows[i];
353   }
354   ISRestoreIndices(is,&rows);
355 
356   starts[0] = 0;
357   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
358   count = 0;
359   for ( i=0; i<numtids; i++ ) {
360     if (procs[i]) {
361       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
362                 comm,send_waits+count++);
363     }
364   }
365   FREE(starts);
366 
367   base = owners[mytid];
368 
369   /*  wait on receives */
370   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
371   source = lens + nrecvs;
372   count = nrecvs; slen = 0;
373   while (count) {
374     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
375     /* unpack receives into our local space */
376     MPI_Get_count(&recv_status,MPI_INT,&n);
377     source[imdex]  = recv_status.MPI_SOURCE;
378     lens[imdex]  = n;
379     slen += n;
380     count--;
381   }
382   FREE(recv_waits);
383 
384   /* move the data into the send scatter */
385   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
386   count = 0;
387   for ( i=0; i<nrecvs; i++ ) {
388     values = rvalues + i*nmax;
389     for ( j=0; j<lens[i]; j++ ) {
390       lrows[count++] = values[j] - base;
391     }
392   }
393   FREE(rvalues); FREE(lens);
394   FREE(owner); FREE(nprocs);
395 
396   /* actually zap the local rows */
397   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
398   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
399   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
400   ierr = ISDestroy(istmp); CHKERR(ierr);
401 
402   /* wait on sends */
403   if (nsends) {
404     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
405     CHKPTR(send_status);
406     MPI_Waitall(nsends,send_waits,send_status);
407     FREE(send_status);
408   }
409   FREE(send_waits); FREE(svalues);
410 
411 
412   return 0;
413 }
414 
415 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
416 {
417   Matimpiaij *aij = (Matimpiaij *) aijin->data;
418   int        ierr;
419   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
420   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
421   CHKERR(ierr);
422   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
423   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
424   CHKERR(ierr);
425   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
426   return 0;
427 }
428 
429 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz)
430 {
431   Matimpiaij *aij = (Matimpiaij *) aijin->data;
432   int        ierr;
433   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
434   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
435   CHKERR(ierr);
436   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
437   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
438   CHKERR(ierr);
439   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
440   return 0;
441 }
442 
443 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy)
444 {
445   Matimpiaij *aij = (Matimpiaij *) aijin->data;
446   int        ierr;
447 
448   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
449   /* do nondiagonal part */
450   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
451   /* send it on its way */
452   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
453                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
454   /* do local part */
455   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
456   /* receive remote parts: note this assumes the values are not actually */
457   /* inserted in yy until the next line, which is true for my implementation*/
458   /* but is not perhaps always true. */
459   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
460                          aij->Mvctx); CHKERR(ierr);
461   return 0;
462 }
463 
464 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz)
465 {
466   Matimpiaij *aij = (Matimpiaij *) aijin->data;
467   int        ierr;
468 
469   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
470   /* do nondiagonal part */
471   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
472   /* send it on its way */
473   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
474                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
475   /* do local part */
476   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
477   /* receive remote parts: note this assumes the values are not actually */
478   /* inserted in yy until the next line, which is true for my implementation*/
479   /* but is not perhaps always true. */
480   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
481                          aij->Mvctx); CHKERR(ierr);
482   return 0;
483 }
484 
485 /*
486   This only works correctly for square matrices where the subblock A->A is the
487    diagonal block
488 */
489 static int MatiAIJgetdiag(Mat Ain,Vec v)
490 {
491   Matimpiaij *A = (Matimpiaij *) Ain->data;
492   if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first");
493   return MatGetDiagonal(A->A,v);
494 }
495 
496 static int MatiAIJdestroy(PetscObject obj)
497 {
498   Mat        mat = (Mat) obj;
499   Matimpiaij *aij = (Matimpiaij *) mat->data;
500   int        ierr;
501   FREE(aij->rowners);
502   ierr = MatDestroy(aij->A); CHKERR(ierr);
503   ierr = MatDestroy(aij->B); CHKERR(ierr);
504   FREE(aij); FREE(mat);
505   if (aij->lvec) VecDestroy(aij->lvec);
506   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
507   return 0;
508 }
509 
510 static int MatiView(PetscObject obj,Viewer viewer)
511 {
512   Mat        mat = (Mat) obj;
513   Matimpiaij *aij = (Matimpiaij *) mat->data;
514   int        ierr;
515 
516   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
517   MPE_Seq_begin(mat->comm,1);
518   printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
519           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
520           aij->cend);
521   ierr = MatView(aij->A,viewer); CHKERR(ierr);
522   ierr = MatView(aij->B,viewer); CHKERR(ierr);
523   MPE_Seq_end(mat->comm,1);
524   return 0;
525 }
526 
527 extern int MatiAIJmarkdiag(Matiaij  *);
528 /*
529     This has to provide several versions.
530 
531      1) per sequential
532      2) a) use only local smoothing updating outer values only once.
533         b) local smoothing updating outer values each inner iteration
534      3) color updating out values betwen colors.
535 */
536 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift,
537                         int its,Vec xx)
538 {
539   Matimpiaij *mat = (Matimpiaij *) matin->data;
540   Mat        AA = mat->A, BB = mat->B;
541   Matiaij    *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data;
542   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
543   int        ierr,*idx, *diag;
544   int        n = mat->n, m = mat->m, i;
545   Vec        tt;
546 
547   if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first");
548 
549   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
550   xs = x -1; /* shift by one for index start of 1 */
551   ls--;
552   if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;}
553   diag = A->diag;
554   if (flag & SOR_EISENSTAT) {
555     /* Let  A = L + U + D; where L is lower trianglar,
556     U is upper triangular, E is diagonal; This routine applies
557 
558             (L + E)^{-1} A (U + E)^{-1}
559 
560     to a vector efficiently using Eisenstat's trick. This is for
561     the case of SSOR preconditioner, so E is D/omega where omega
562     is the relaxation factor.
563     */
564     ierr = VecCreate(xx,&tt); CHKERR(ierr);
565     VecGetArray(tt,&t);
566     scale = (2.0/omega) - 1.0;
567     /*  x = (E + U)^{-1} b */
568     VecSet(&zero,mat->lvec);
569     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
570                               mat->Mvctx); CHKERR(ierr);
571     for ( i=m-1; i>-1; i-- ) {
572       n    = A->i[i+1] - diag[i] - 1;
573       idx  = A->j + diag[i];
574       v    = A->a + diag[i];
575       sum  = b[i];
576       SPARSEDENSEMDOT(sum,xs,v,idx,n);
577       d    = shift + A->a[diag[i]-1];
578       n    = B->i[i+1] - B->i[i];
579       idx  = B->j + B->i[i] - 1;
580       v    = B->a + B->i[i] - 1;
581       SPARSEDENSEMDOT(sum,ls,v,idx,n);
582       x[i] = omega*(sum/d);
583     }
584     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
585                             mat->Mvctx); CHKERR(ierr);
586 
587     /*  t = b - (2*E - D)x */
588     v = A->a;
589     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
590 
591     /*  t = (E + L)^{-1}t */
592     ts = t - 1; /* shifted by one for index start of a or mat->j*/
593     diag = A->diag;
594     VecSet(&zero,mat->lvec);
595     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
596                                                  mat->Mvctx); CHKERR(ierr);
597     for ( i=0; i<m; i++ ) {
598       n    = diag[i] - A->i[i];
599       idx  = A->j + A->i[i] - 1;
600       v    = A->a + A->i[i] - 1;
601       sum  = t[i];
602       SPARSEDENSEMDOT(sum,ts,v,idx,n);
603       d    = shift + A->a[diag[i]-1];
604       n    = B->i[i+1] - B->i[i];
605       idx  = B->j + B->i[i] - 1;
606       v    = B->a + B->i[i] - 1;
607       SPARSEDENSEMDOT(sum,ls,v,idx,n);
608       t[i] = omega*(sum/d);
609     }
610     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
611                                                     mat->Mvctx); CHKERR(ierr);
612     /*  x = x + t */
613     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
614     VecDestroy(tt);
615     return 0;
616   }
617 
618 
619   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
620     if (flag & SOR_ZERO_INITIAL_GUESS) {
621       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
622     }
623     else {
624       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
625       CHKERR(ierr);
626       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
627       CHKERR(ierr);
628     }
629     while (its--) {
630       /* go down through the rows */
631       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
632                               mat->Mvctx); CHKERR(ierr);
633       for ( i=0; i<m; i++ ) {
634         n    = A->i[i+1] - A->i[i];
635         idx  = A->j + A->i[i] - 1;
636         v    = A->a + A->i[i] - 1;
637         sum  = b[i];
638         SPARSEDENSEMDOT(sum,xs,v,idx,n);
639         d    = shift + A->a[diag[i]-1];
640         n    = B->i[i+1] - B->i[i];
641         idx  = B->j + B->i[i] - 1;
642         v    = B->a + B->i[i] - 1;
643         SPARSEDENSEMDOT(sum,ls,v,idx,n);
644         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
645       }
646       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
647                             mat->Mvctx); CHKERR(ierr);
648       /* come up through the rows */
649       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
650                               mat->Mvctx); CHKERR(ierr);
651       for ( i=m-1; i>-1; i-- ) {
652         n    = A->i[i+1] - A->i[i];
653         idx  = A->j + A->i[i] - 1;
654         v    = A->a + A->i[i] - 1;
655         sum  = b[i];
656         SPARSEDENSEMDOT(sum,xs,v,idx,n);
657         d    = shift + A->a[diag[i]-1];
658         n    = B->i[i+1] - B->i[i];
659         idx  = B->j + B->i[i] - 1;
660         v    = B->a + B->i[i] - 1;
661         SPARSEDENSEMDOT(sum,ls,v,idx,n);
662         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
663       }
664       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
665                             mat->Mvctx); CHKERR(ierr);
666     }
667   }
668   else if (flag & SOR_FORWARD_SWEEP){
669     if (flag & SOR_ZERO_INITIAL_GUESS) {
670       VecSet(&zero,mat->lvec);
671       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
672                               mat->Mvctx); CHKERR(ierr);
673       for ( i=0; i<m; i++ ) {
674         n    = diag[i] - A->i[i];
675         idx  = A->j + A->i[i] - 1;
676         v    = A->a + A->i[i] - 1;
677         sum  = b[i];
678         SPARSEDENSEMDOT(sum,xs,v,idx,n);
679         d    = shift + A->a[diag[i]-1];
680         n    = B->i[i+1] - B->i[i];
681         idx  = B->j + B->i[i] - 1;
682         v    = B->a + B->i[i] - 1;
683         SPARSEDENSEMDOT(sum,ls,v,idx,n);
684         x[i] = omega*(sum/d);
685       }
686       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
687                             mat->Mvctx); CHKERR(ierr);
688       its--;
689     }
690     while (its--) {
691       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
692       CHKERR(ierr);
693       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
694       CHKERR(ierr);
695       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
696                               mat->Mvctx); CHKERR(ierr);
697       for ( i=0; i<m; i++ ) {
698         n    = A->i[i+1] - A->i[i];
699         idx  = A->j + A->i[i] - 1;
700         v    = A->a + A->i[i] - 1;
701         sum  = b[i];
702         SPARSEDENSEMDOT(sum,xs,v,idx,n);
703         d    = shift + A->a[diag[i]-1];
704         n    = B->i[i+1] - B->i[i];
705         idx  = B->j + B->i[i] - 1;
706         v    = B->a + B->i[i] - 1;
707         SPARSEDENSEMDOT(sum,ls,v,idx,n);
708         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
709       }
710       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
711                             mat->Mvctx); CHKERR(ierr);
712     }
713   }
714   else if (flag & SOR_BACKWARD_SWEEP){
715     if (flag & SOR_ZERO_INITIAL_GUESS) {
716       VecSet(&zero,mat->lvec);
717       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
718                               mat->Mvctx); CHKERR(ierr);
719       for ( i=m-1; i>-1; i-- ) {
720         n    = A->i[i+1] - diag[i] - 1;
721         idx  = A->j + diag[i];
722         v    = A->a + diag[i];
723         sum  = b[i];
724         SPARSEDENSEMDOT(sum,xs,v,idx,n);
725         d    = shift + A->a[diag[i]-1];
726         n    = B->i[i+1] - B->i[i];
727         idx  = B->j + B->i[i] - 1;
728         v    = B->a + B->i[i] - 1;
729         SPARSEDENSEMDOT(sum,ls,v,idx,n);
730         x[i] = omega*(sum/d);
731       }
732       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
733                             mat->Mvctx); CHKERR(ierr);
734       its--;
735     }
736     while (its--) {
737       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
738                             mat->Mvctx); CHKERR(ierr);
739       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
740                             mat->Mvctx); CHKERR(ierr);
741       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
742                               mat->Mvctx); CHKERR(ierr);
743       for ( i=m-1; i>-1; i-- ) {
744         n    = A->i[i+1] - A->i[i];
745         idx  = A->j + A->i[i] - 1;
746         v    = A->a + A->i[i] - 1;
747         sum  = b[i];
748         SPARSEDENSEMDOT(sum,xs,v,idx,n);
749         d    = shift + A->a[diag[i]-1];
750         n    = B->i[i+1] - B->i[i];
751         idx  = B->j + B->i[i] - 1;
752         v    = B->a + B->i[i] - 1;
753         SPARSEDENSEMDOT(sum,ls,v,idx,n);
754         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
755       }
756       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
757                             mat->Mvctx); CHKERR(ierr);
758     }
759   }
760   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
761     if (flag & SOR_ZERO_INITIAL_GUESS) {
762       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
763     }
764     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
765     CHKERR(ierr);
766     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
767     CHKERR(ierr);
768     while (its--) {
769       /* go down through the rows */
770       for ( i=0; i<m; i++ ) {
771         n    = A->i[i+1] - A->i[i];
772         idx  = A->j + A->i[i] - 1;
773         v    = A->a + A->i[i] - 1;
774         sum  = b[i];
775         SPARSEDENSEMDOT(sum,xs,v,idx,n);
776         d    = shift + A->a[diag[i]-1];
777         n    = B->i[i+1] - B->i[i];
778         idx  = B->j + B->i[i] - 1;
779         v    = B->a + B->i[i] - 1;
780         SPARSEDENSEMDOT(sum,ls,v,idx,n);
781         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
782       }
783       /* come up through the rows */
784       for ( i=m-1; i>-1; i-- ) {
785         n    = A->i[i+1] - A->i[i];
786         idx  = A->j + A->i[i] - 1;
787         v    = A->a + A->i[i] - 1;
788         sum  = b[i];
789         SPARSEDENSEMDOT(sum,xs,v,idx,n);
790         d    = shift + A->a[diag[i]-1];
791         n    = B->i[i+1] - B->i[i];
792         idx  = B->j + B->i[i] - 1;
793         v    = B->a + B->i[i] - 1;
794         SPARSEDENSEMDOT(sum,ls,v,idx,n);
795         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
796       }
797     }
798   }
799   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
800     if (flag & SOR_ZERO_INITIAL_GUESS) {
801       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
802     }
803     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
804     CHKERR(ierr);
805     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
806     CHKERR(ierr);
807     while (its--) {
808       for ( i=0; i<m; i++ ) {
809         n    = A->i[i+1] - A->i[i];
810         idx  = A->j + A->i[i] - 1;
811         v    = A->a + A->i[i] - 1;
812         sum  = b[i];
813         SPARSEDENSEMDOT(sum,xs,v,idx,n);
814         d    = shift + A->a[diag[i]-1];
815         n    = B->i[i+1] - B->i[i];
816         idx  = B->j + B->i[i] - 1;
817         v    = B->a + B->i[i] - 1;
818         SPARSEDENSEMDOT(sum,ls,v,idx,n);
819         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
820       }
821     }
822   }
823   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
824     if (flag & SOR_ZERO_INITIAL_GUESS) {
825       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
826     }
827     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
828                             mat->Mvctx); CHKERR(ierr);
829     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
830                             mat->Mvctx); CHKERR(ierr);
831     while (its--) {
832       for ( i=m-1; i>-1; i-- ) {
833         n    = A->i[i+1] - A->i[i];
834         idx  = A->j + A->i[i] - 1;
835         v    = A->a + A->i[i] - 1;
836         sum  = b[i];
837         SPARSEDENSEMDOT(sum,xs,v,idx,n);
838         d    = shift + A->a[diag[i]-1];
839         n    = B->i[i+1] - B->i[i];
840         idx  = B->j + B->i[i] - 1;
841         v    = B->a + B->i[i] - 1;
842         SPARSEDENSEMDOT(sum,ls,v,idx,n);
843         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
844       }
845     }
846   }
847   return 0;
848 }
849 static int MatiAIJinsopt(Mat aijin,int op)
850 {
851   Matimpiaij *aij = (Matimpiaij *) aijin->data;
852 
853   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
854     MatSetOption(aij->A,op);
855     MatSetOption(aij->B,op);
856   }
857   else if (op == YES_NEW_NONZERO_LOCATIONS) {
858     MatSetOption(aij->A,op);
859     MatSetOption(aij->B,op);
860   }
861   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
862   return 0;
863 }
864 
865 static int MatiAIJsize(Mat matin,int *m,int *n)
866 {
867   Matimpiaij *mat = (Matimpiaij *) matin->data;
868   *m = mat->M; *n = mat->N;
869   return 0;
870 }
871 
872 static int MatiAIJlocalsize(Mat matin,int *m,int *n)
873 {
874   Matimpiaij *mat = (Matimpiaij *) matin->data;
875   *m = mat->m; *n = mat->n;
876   return 0;
877 }
878 
879 static int MatiAIJrange(Mat matin,int *m,int *n)
880 {
881   Matimpiaij *mat = (Matimpiaij *) matin->data;
882   *m = mat->rstart; *n = mat->rend;
883   return 0;
884 }
885 
886 static int MatiCopy(Mat,Mat *);
887 
888 /* -------------------------------------------------------------------*/
889 static struct _MatOps MatOps = {MatiAIJInsertValues,
890        0, 0,
891        MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd,
892        0,0,0,0,
893        0,0,
894        MatiAIJrelax,
895        0,
896        0,0,0,
897        MatiCopy,
898        MatiAIJgetdiag,0,0,
899        MatiAIJBeginAssemble,MatiAIJEndAssemble,
900        0,
901        MatiAIJinsopt,MatiZero,MatiZerorows,0,
902        0,0,0,0,
903        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
904 
905 
906 
907 /*@
908 
909       MatCreateMPIAIJ - Creates a sparse parallel matrix
910                                  in AIJ format.
911 
912   Input Parameters:
913 .   comm - MPI communicator
914 .   m,n - number of local rows and columns (or -1 to have calculated)
915 .   M,N - global rows and columns (or -1 to have calculated)
916 .   d_nz - total number nonzeros in diagonal portion of matrix
917 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
918 .           You must leave room for the diagonal entry even if it is zero.
919 .   o_nz - total number nonzeros in off-diagonal portion of matrix
920 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
921 .           or null. You must have at least one nonzero per row.
922 
923   Output parameters:
924 .  newmat - the matrix
925 
926   Keywords: matrix, aij, compressed row, sparse, parallel
927 @*/
928 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
929                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
930 {
931   Mat          mat;
932   Matimpiaij   *aij;
933   int          ierr, i,sum[2],work[2];
934   *newmat         = 0;
935   CREATEHEADER(mat,_Mat);
936   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
937   mat->cookie     = MAT_COOKIE;
938   mat->ops        = &MatOps;
939   mat->destroy    = MatiAIJdestroy;
940   mat->view       = MatiView;
941   mat->type       = MATAIJMPI;
942   mat->factor     = 0;
943   mat->row        = 0;
944   mat->col        = 0;
945 
946   mat->comm       = comm;
947   aij->insertmode = NotSetValues;
948   MPI_Comm_rank(comm,&aij->mytid);
949   MPI_Comm_size(comm,&aij->numtids);
950 
951   if (M == -1 || N == -1) {
952     work[0] = m; work[1] = n;
953     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
954     if (M == -1) M = sum[0];
955     if (N == -1) N = sum[1];
956   }
957   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
958   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
959   aij->m       = m;
960   aij->n       = n;
961   aij->N       = N;
962   aij->M       = M;
963 
964   /* build local table of row and column ownerships */
965   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
966   CHKPTR(aij->rowners);
967   aij->cowners = aij->rowners + aij->numtids + 1;
968   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
969   aij->rowners[0] = 0;
970   for ( i=2; i<=aij->numtids; i++ ) {
971     aij->rowners[i] += aij->rowners[i-1];
972   }
973   aij->rstart = aij->rowners[aij->mytid];
974   aij->rend   = aij->rowners[aij->mytid+1];
975   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
976   aij->cowners[0] = 0;
977   for ( i=2; i<=aij->numtids; i++ ) {
978     aij->cowners[i] += aij->cowners[i-1];
979   }
980   aij->cstart = aij->cowners[aij->mytid];
981   aij->cend   = aij->cowners[aij->mytid+1];
982 
983 
984   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
985   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
986 
987   /* build cache for off array entries formed */
988   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
989   aij->stash.n    = 0;
990   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
991                             sizeof(Scalar))); CHKPTR(aij->stash.array);
992   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
993   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
994 
995   /* stuff used for matrix vector multiply */
996   aij->lvec      = 0;
997   aij->Mvctx     = 0;
998   aij->assembled = 0;
999 
1000   *newmat = mat;
1001   return 0;
1002 }
1003 
1004 static int MatiCopy(Mat matin,Mat *newmat)
1005 {
1006   Mat        mat;
1007   Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data;
1008   int        ierr;
1009   *newmat      = 0;
1010 
1011   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1012   CREATEHEADER(mat,_Mat);
1013   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
1014   mat->cookie     = MAT_COOKIE;
1015   mat->ops        = &MatOps;
1016   mat->destroy    = MatiAIJdestroy;
1017   mat->view       = MatiView;
1018   mat->type       = MATAIJSEQ;
1019   mat->factor     = matin->factor;
1020   mat->row        = 0;
1021   mat->col        = 0;
1022 
1023   aij->m          = oldmat->m;
1024   aij->n          = oldmat->n;
1025   aij->M          = oldmat->M;
1026   aij->N          = oldmat->N;
1027 
1028   aij->assembled  = 1;
1029   aij->rstart     = oldmat->rstart;
1030   aij->rend       = oldmat->rend;
1031   aij->cstart     = oldmat->cstart;
1032   aij->cend       = oldmat->cend;
1033   aij->numtids    = oldmat->numtids;
1034   aij->mytid      = oldmat->mytid;
1035   aij->insertmode = NotSetValues;
1036 
1037   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1038   CHKPTR(aij->rowners);
1039   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1040   aij->stash.nmax = 0;
1041   aij->stash.n    = 0;
1042   aij->stash.array= 0;
1043   mat->comm       = matin->comm;
1044 
1045   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1046   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1047   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1048   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1049   *newmat = mat;
1050   return 0;
1051 }
1052