xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 289889947d663f9d99846900455456d96b4bc1c8)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.7 1995/03/06 04:04:51 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 #define CHUNCKSIZE   100
10 /*
11    This is a simple minded stash. Do a linear search to determine if
12  in stash, if not add to end.
13 */
14 static int StashValues(Stash *stash,int row,int n, int *idxn,
15                        Scalar *values,InsertMode addv)
16 {
17   int    i,j,N = stash->n,found,*n_idx, *n_idy;
18   Scalar val,*n_array;
19 
20   for ( i=0; i<n; i++ ) {
21     found = 0;
22     val = *values++;
23     for ( j=0; j<N; j++ ) {
24       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
25         /* found a match */
26         if (addv == AddValues) stash->array[j] += val;
27         else stash->array[j] = val;
28         found = 1;
29         break;
30       }
31     }
32     if (!found) { /* not found so add to end */
33       if ( stash->n == stash->nmax ) {
34         /* allocate a larger stash */
35         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
36                                      2*sizeof(int) + sizeof(Scalar)));
37         CHKPTR(n_array);
38         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
39         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
40         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
41         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
42         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
43         if (stash->array) FREE(stash->array);
44         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
45         stash->nmax += CHUNCKSIZE;
46       }
47       stash->array[stash->n]   = val;
48       stash->idx[stash->n]     = row;
49       stash->idy[stash->n++]   = idxn[i];
50     }
51   }
52   return 0;
53 }
54 
55 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
56                             int *idxn,Scalar *v,InsertMode addv)
57 {
58   Matimpiaij *aij = (Matimpiaij *) mat->data;
59   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
60   int        cstart = aij->cstart, cend = aij->cend,row,col;
61 
62   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
63     SETERR(1,"You cannot mix inserts and adds");
64   }
65   aij->insertmode = addv;
66   for ( i=0; i<m; i++ ) {
67     if (idxm[i] < 0) SETERR(1,"Negative row index");
68     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
69     if (idxm[i] >= rstart && idxm[i] < rend) {
70       row = idxm[i] - rstart;
71       for ( j=0; j<n; j++ ) {
72         if (idxn[j] < 0) SETERR(1,"Negative column index");
73         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
74         if (idxn[j] >= cstart && idxn[j] < cend){
75           col = idxn[j] - cstart;
76           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
77         }
78         else {
79           if (aij->assembled) {
80             SETERR(1,"Cannot insert off diagonal block in already\
81                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
82                      if your need this feature");
83           }
84           col = idxn[j];
85           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
86         }
87       }
88     }
89     else {
90       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
91     }
92   }
93   return 0;
94 }
95 
96 /*
97     the assembly code is alot like the code for vectors, we should
98     sometime derive a single assembly code that can be used for
99     either case.
100 */
101 
102 static int MatiAIJBeginAssemble(Mat mat)
103 {
104   Matimpiaij  *aij = (Matimpiaij *) mat->data;
105   MPI_Comm    comm = mat->comm;
106   int         numtids = aij->numtids, *owners = aij->rowners;
107   int         mytid = aij->mytid;
108   MPI_Request *send_waits,*recv_waits;
109   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
110   int         tag = 50, *owner,*starts,count;
111   InsertMode  addv;
112   Scalar      *rvalues,*svalues;
113 
114   /* make sure all processors are either in INSERTMODE or ADDMODE */
115   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
116                 MPI_BOR,comm);
117   if (addv == (AddValues|InsertValues)) {
118     SETERR(1,"Some processors have inserted while others have added");
119   }
120   aij->insertmode = addv; /* in case this processor had no cache */
121 
122   /*  first count number of contributors to each processor */
123   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
124   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
125   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
126   for ( i=0; i<aij->stash.n; i++ ) {
127     idx = aij->stash.idx[i];
128     for ( j=0; j<numtids; j++ ) {
129       if (idx >= owners[j] && idx < owners[j+1]) {
130         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
131       }
132     }
133   }
134   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
135 
136   /* inform other processors of number of messages and max length*/
137   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
138   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
139   nreceives = work[mytid];
140   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
141   nmax = work[mytid];
142   FREE(work);
143 
144   /* post receives:
145        1) each message will consist of ordered pairs
146      (global index,value) we store the global index as a double
147      to simplify the message passing.
148        2) since we don't know how long each individual message is we
149      allocate the largest needed buffer for each receive. Potentially
150      this is a lot of wasted space.
151 
152 
153        This could be done better.
154   */
155   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
156   CHKPTR(rvalues);
157   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
158   CHKPTR(recv_waits);
159   for ( i=0; i<nreceives; i++ ) {
160     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
161               comm,recv_waits+i);
162   }
163 
164   /* do sends:
165       1) starts[i] gives the starting index in svalues for stuff going to
166          the ith processor
167   */
168   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
169   CHKPTR(svalues);
170   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
171   CHKPTR(send_waits);
172   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
173   starts[0] = 0;
174   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
175   for ( i=0; i<aij->stash.n; i++ ) {
176     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
177     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
178     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
179   }
180   FREE(owner);
181   starts[0] = 0;
182   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
183   count = 0;
184   for ( i=0; i<numtids; i++ ) {
185     if (procs[i]) {
186       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
187                 comm,send_waits+count++);
188     }
189   }
190   FREE(starts); FREE(nprocs);
191 
192   /* Free cache space */
193   aij->stash.nmax = aij->stash.n = 0;
194   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
195 
196   aij->svalues    = svalues;       aij->rvalues = rvalues;
197   aij->nsends     = nsends;         aij->nrecvs = nreceives;
198   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
199   aij->rmax       = nmax;
200 
201   return 0;
202 }
203 extern int MPIAIJSetUpMultiply(Mat);
204 
205 static int MatiAIJEndAssemble(Mat mat)
206 {
207   int        ierr;
208   Matimpiaij *aij = (Matimpiaij *) mat->data;
209 
210   MPI_Status  *send_status,recv_status;
211   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
212   int         row,col;
213   Scalar      *values,val;
214   InsertMode  addv = aij->insertmode;
215 
216   /*  wait on receives */
217   while (count) {
218     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
219     /* unpack receives into our local space */
220     values = aij->rvalues + 3*imdex*aij->rmax;
221     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
222     n = n/3;
223     for ( i=0; i<n; i++ ) {
224       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
225       col = (int) PETSCREAL(values[3*i+1]);
226       val = values[3*i+2];
227       if (col >= aij->cstart && col < aij->cend) {
228           col -= aij->cstart;
229         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
230       }
231       else {
232         if (aij->assembled) {
233           SETERR(1,"Cannot insert off diagonal block in already\
234                    assembled matrix. Contact petsc-maint@mcs.anl.gov\
235                    if your need this feature");
236         }
237         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
238       }
239     }
240     count--;
241   }
242   FREE(aij->recv_waits); FREE(aij->rvalues);
243 
244   /* wait on sends */
245   if (aij->nsends) {
246     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
247     CHKPTR(send_status);
248     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
249     FREE(send_status);
250   }
251   FREE(aij->send_waits); FREE(aij->svalues);
252 
253   aij->insertmode = NotSetValues;
254   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
255   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
256 
257 
258   if (aij->assembled) {
259     /* this is because the present implimentation doesn't support
260      inserting values off the diagonal block.*/
261     return 0;
262   }
263 
264   ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
265   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
266   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
267 
268   aij->assembled = 1;
269   return 0;
270 }
271 
272 static int MatiZero(Mat A)
273 {
274   Matimpiaij *l = (Matimpiaij *) A->data;
275 
276   MatZeroEntries(l->A); MatZeroEntries(l->B);
277   return 0;
278 }
279 
280 /* again this uses the same basic stratagy as in the assembly and
281    scatter create routines, we should try to do it systemamatically
282    if we can figure out the proper level of generality. */
283 
284 /* the code does not do the diagonal entries correctly unless the
285    matrix is square and the column and row owerships are identical.
286    This is a BUG. The only way to fix it seems to be to access
287    aij->A and aij->B directly and not through the MatZeroRows()
288    routine.
289 */
290 static int MatiZerorows(Mat A,IS is,Scalar *diag)
291 {
292   Matimpiaij     *l = (Matimpiaij *) A->data;
293   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
294   int            *procs,*nprocs,j,found,idx,nsends,*work;
295   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
296   int            *rvalues,tag = 67,count,base,slen,n,*source;
297   int            *lens,imdex,*lrows,*values;
298   MPI_Comm       comm = A->comm;
299   MPI_Request    *send_waits,*recv_waits;
300   MPI_Status     recv_status,*send_status;
301   IS             istmp;
302 
303   if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first");
304   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
305   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
306 
307   /*  first count number of contributors to each processor */
308   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
309   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
310   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
311   for ( i=0; i<N; i++ ) {
312     idx = rows[i];
313     found = 0;
314     for ( j=0; j<numtids; j++ ) {
315       if (idx >= owners[j] && idx < owners[j+1]) {
316         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
317       }
318     }
319     if (!found) SETERR(1,"Imdex out of range");
320   }
321   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
322 
323   /* inform other processors of number of messages and max length*/
324   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
325   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
326   nrecvs = work[mytid];
327   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
328   nmax = work[mytid];
329   FREE(work);
330 
331   /* post receives:   */
332   rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
333   CHKPTR(rvalues);
334   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
335   CHKPTR(recv_waits);
336   for ( i=0; i<nrecvs; i++ ) {
337     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
338               comm,recv_waits+i);
339   }
340 
341   /* do sends:
342       1) starts[i] gives the starting index in svalues for stuff going to
343          the ith processor
344   */
345   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
346   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
347   CHKPTR(send_waits);
348   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
349   starts[0] = 0;
350   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
351   for ( i=0; i<N; i++ ) {
352     svalues[starts[owner[i]]++] = rows[i];
353   }
354   ISRestoreIndices(is,&rows);
355 
356   starts[0] = 0;
357   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
358   count = 0;
359   for ( i=0; i<numtids; i++ ) {
360     if (procs[i]) {
361       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
362                 comm,send_waits+count++);
363     }
364   }
365   FREE(starts);
366 
367   base = owners[mytid];
368 
369   /*  wait on receives */
370   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
371   source = lens + nrecvs;
372   count = nrecvs; slen = 0;
373   while (count) {
374     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
375     /* unpack receives into our local space */
376     MPI_Get_count(&recv_status,MPI_INT,&n);
377     source[imdex]  = recv_status.MPI_SOURCE;
378     lens[imdex]  = n;
379     slen += n;
380     count--;
381   }
382   FREE(recv_waits);
383 
384   /* move the data into the send scatter */
385   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
386   count = 0;
387   for ( i=0; i<nrecvs; i++ ) {
388     values = rvalues + i*nmax;
389     for ( j=0; j<lens[i]; j++ ) {
390       lrows[count++] = values[j] - base;
391     }
392   }
393   FREE(rvalues); FREE(lens);
394   FREE(owner); FREE(nprocs);
395 
396   /* actually zap the local rows */
397   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
398   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
399   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
400   ierr = ISDestroy(istmp); CHKERR(ierr);
401 
402   /* wait on sends */
403   if (nsends) {
404     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
405     CHKPTR(send_status);
406     MPI_Waitall(nsends,send_waits,send_status);
407     FREE(send_status);
408   }
409   FREE(send_waits); FREE(svalues);
410 
411 
412   return 0;
413 }
414 
415 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
416 {
417   Matimpiaij *aij = (Matimpiaij *) aijin->data;
418   int        ierr;
419   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
420   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
421   CHKERR(ierr);
422   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
423   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
424   CHKERR(ierr);
425   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
426   return 0;
427 }
428 
429 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz)
430 {
431   Matimpiaij *aij = (Matimpiaij *) aijin->data;
432   int        ierr;
433   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
434   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
435   CHKERR(ierr);
436   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
437   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
438   CHKERR(ierr);
439   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
440   return 0;
441 }
442 
443 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy)
444 {
445   Matimpiaij *aij = (Matimpiaij *) aijin->data;
446   int        ierr;
447 
448   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
449   /* do nondiagonal part */
450   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
451   /* send it on its way */
452   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
453                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
454   /* do local part */
455   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
456   /* receive remote parts: note this assumes the values are not actually */
457   /* inserted in yy until the next line, which is true for my implementation*/
458   /* but is not perhaps always true. */
459   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
460                          aij->Mvctx); CHKERR(ierr);
461   return 0;
462 }
463 
464 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz)
465 {
466   Matimpiaij *aij = (Matimpiaij *) aijin->data;
467   int        ierr;
468 
469   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
470   /* do nondiagonal part */
471   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
472   /* send it on its way */
473   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
474                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
475   /* do local part */
476   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
477   /* receive remote parts: note this assumes the values are not actually */
478   /* inserted in yy until the next line, which is true for my implementation*/
479   /* but is not perhaps always true. */
480   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
481                          aij->Mvctx); CHKERR(ierr);
482   return 0;
483 }
484 
485 /*
486   This only works correctly for square matrices where the subblock A->A is the
487    diagonal block
488 */
489 static int MatiAIJgetdiag(Mat Ain,Vec v)
490 {
491   Matimpiaij *A = (Matimpiaij *) Ain->data;
492   if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first");
493   return MatGetDiagonal(A->A,v);
494 }
495 
496 static int MatiAIJdestroy(PetscObject obj)
497 {
498   Mat        mat = (Mat) obj;
499   Matimpiaij *aij = (Matimpiaij *) mat->data;
500   int        ierr;
501   FREE(aij->rowners);
502   ierr = MatDestroy(aij->A); CHKERR(ierr);
503   ierr = MatDestroy(aij->B); CHKERR(ierr);
504   FREE(aij); FREE(mat);
505   if (aij->lvec) VecDestroy(aij->lvec);
506   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
507   return 0;
508 }
509 
510 static int MatiView(PetscObject obj,Viewer viewer)
511 {
512   Mat        mat = (Mat) obj;
513   Matimpiaij *aij = (Matimpiaij *) mat->data;
514   int        ierr;
515 
516   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
517   MPE_Seq_begin(mat->comm,1);
518     printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
519           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
520           aij->cend);
521     ierr = MatView(aij->A,0); CHKERR(ierr);
522     ierr = MatView(aij->B,0); CHKERR(ierr);
523     fflush(stdout);
524   MPE_Seq_end(mat->comm,1);
525   return 0;
526 }
527 
528 extern int MatiAIJmarkdiag(Matiaij  *);
529 /*
530     This has to provide several versions.
531 
532      1) per sequential
533      2) a) use only local smoothing updating outer values only once.
534         b) local smoothing updating outer values each inner iteration
535      3) color updating out values betwen colors.
536 */
537 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift,
538                         int its,Vec xx)
539 {
540   Matimpiaij *mat = (Matimpiaij *) matin->data;
541   Mat        AA = mat->A, BB = mat->B;
542   Matiaij    *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data;
543   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
544   int        ierr,*idx, *diag;
545   int        n = mat->n, m = mat->m, i;
546   Vec        tt;
547 
548   if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first");
549 
550   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
551   xs = x -1; /* shift by one for index start of 1 */
552   ls--;
553   if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;}
554   diag = A->diag;
555   if (flag & SOR_EISENSTAT) {
556     /* Let  A = L + U + D; where L is lower trianglar,
557     U is upper triangular, E is diagonal; This routine applies
558 
559             (L + E)^{-1} A (U + E)^{-1}
560 
561     to a vector efficiently using Eisenstat's trick. This is for
562     the case of SSOR preconditioner, so E is D/omega where omega
563     is the relaxation factor.
564     */
565     ierr = VecCreate(xx,&tt); CHKERR(ierr);
566     VecGetArray(tt,&t);
567     scale = (2.0/omega) - 1.0;
568     /*  x = (E + U)^{-1} b */
569     VecSet(&zero,mat->lvec);
570     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
571                               mat->Mvctx); CHKERR(ierr);
572     for ( i=m-1; i>-1; i-- ) {
573       n    = A->i[i+1] - diag[i] - 1;
574       idx  = A->j + diag[i];
575       v    = A->a + diag[i];
576       sum  = b[i];
577       SPARSEDENSEMDOT(sum,xs,v,idx,n);
578       d    = shift + A->a[diag[i]-1];
579       n    = B->i[i+1] - B->i[i];
580       idx  = B->j + B->i[i] - 1;
581       v    = B->a + B->i[i] - 1;
582       SPARSEDENSEMDOT(sum,ls,v,idx,n);
583       x[i] = omega*(sum/d);
584     }
585     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
586                             mat->Mvctx); CHKERR(ierr);
587 
588     /*  t = b - (2*E - D)x */
589     v = A->a;
590     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
591 
592     /*  t = (E + L)^{-1}t */
593     ts = t - 1; /* shifted by one for index start of a or mat->j*/
594     diag = A->diag;
595     VecSet(&zero,mat->lvec);
596     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
597                                                  mat->Mvctx); CHKERR(ierr);
598     for ( i=0; i<m; i++ ) {
599       n    = diag[i] - A->i[i];
600       idx  = A->j + A->i[i] - 1;
601       v    = A->a + A->i[i] - 1;
602       sum  = t[i];
603       SPARSEDENSEMDOT(sum,ts,v,idx,n);
604       d    = shift + A->a[diag[i]-1];
605       n    = B->i[i+1] - B->i[i];
606       idx  = B->j + B->i[i] - 1;
607       v    = B->a + B->i[i] - 1;
608       SPARSEDENSEMDOT(sum,ls,v,idx,n);
609       t[i] = omega*(sum/d);
610     }
611     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
612                                                     mat->Mvctx); CHKERR(ierr);
613     /*  x = x + t */
614     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
615     VecDestroy(tt);
616     return 0;
617   }
618 
619 
620   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
621     if (flag & SOR_ZERO_INITIAL_GUESS) {
622       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
623     }
624     else {
625       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
626       CHKERR(ierr);
627       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
628       CHKERR(ierr);
629     }
630     while (its--) {
631       /* go down through the rows */
632       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
633                               mat->Mvctx); CHKERR(ierr);
634       for ( i=0; i<m; i++ ) {
635         n    = A->i[i+1] - A->i[i];
636         idx  = A->j + A->i[i] - 1;
637         v    = A->a + A->i[i] - 1;
638         sum  = b[i];
639         SPARSEDENSEMDOT(sum,xs,v,idx,n);
640         d    = shift + A->a[diag[i]-1];
641         n    = B->i[i+1] - B->i[i];
642         idx  = B->j + B->i[i] - 1;
643         v    = B->a + B->i[i] - 1;
644         SPARSEDENSEMDOT(sum,ls,v,idx,n);
645         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
646       }
647       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
648                             mat->Mvctx); CHKERR(ierr);
649       /* come up through the rows */
650       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
651                               mat->Mvctx); CHKERR(ierr);
652       for ( i=m-1; i>-1; i-- ) {
653         n    = A->i[i+1] - A->i[i];
654         idx  = A->j + A->i[i] - 1;
655         v    = A->a + A->i[i] - 1;
656         sum  = b[i];
657         SPARSEDENSEMDOT(sum,xs,v,idx,n);
658         d    = shift + A->a[diag[i]-1];
659         n    = B->i[i+1] - B->i[i];
660         idx  = B->j + B->i[i] - 1;
661         v    = B->a + B->i[i] - 1;
662         SPARSEDENSEMDOT(sum,ls,v,idx,n);
663         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
664       }
665       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
666                             mat->Mvctx); CHKERR(ierr);
667     }
668   }
669   else if (flag & SOR_FORWARD_SWEEP){
670     if (flag & SOR_ZERO_INITIAL_GUESS) {
671       VecSet(&zero,mat->lvec);
672       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
673                               mat->Mvctx); CHKERR(ierr);
674       for ( i=0; i<m; i++ ) {
675         n    = diag[i] - A->i[i];
676         idx  = A->j + A->i[i] - 1;
677         v    = A->a + A->i[i] - 1;
678         sum  = b[i];
679         SPARSEDENSEMDOT(sum,xs,v,idx,n);
680         d    = shift + A->a[diag[i]-1];
681         n    = B->i[i+1] - B->i[i];
682         idx  = B->j + B->i[i] - 1;
683         v    = B->a + B->i[i] - 1;
684         SPARSEDENSEMDOT(sum,ls,v,idx,n);
685         x[i] = omega*(sum/d);
686       }
687       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
688                             mat->Mvctx); CHKERR(ierr);
689       its--;
690     }
691     while (its--) {
692       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
693       CHKERR(ierr);
694       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
695       CHKERR(ierr);
696       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
697                               mat->Mvctx); CHKERR(ierr);
698       for ( i=0; i<m; i++ ) {
699         n    = A->i[i+1] - A->i[i];
700         idx  = A->j + A->i[i] - 1;
701         v    = A->a + A->i[i] - 1;
702         sum  = b[i];
703         SPARSEDENSEMDOT(sum,xs,v,idx,n);
704         d    = shift + A->a[diag[i]-1];
705         n    = B->i[i+1] - B->i[i];
706         idx  = B->j + B->i[i] - 1;
707         v    = B->a + B->i[i] - 1;
708         SPARSEDENSEMDOT(sum,ls,v,idx,n);
709         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
710       }
711       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
712                             mat->Mvctx); CHKERR(ierr);
713     }
714   }
715   else if (flag & SOR_BACKWARD_SWEEP){
716     if (flag & SOR_ZERO_INITIAL_GUESS) {
717       VecSet(&zero,mat->lvec);
718       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
719                               mat->Mvctx); CHKERR(ierr);
720       for ( i=m-1; i>-1; i-- ) {
721         n    = A->i[i+1] - diag[i] - 1;
722         idx  = A->j + diag[i];
723         v    = A->a + diag[i];
724         sum  = b[i];
725         SPARSEDENSEMDOT(sum,xs,v,idx,n);
726         d    = shift + A->a[diag[i]-1];
727         n    = B->i[i+1] - B->i[i];
728         idx  = B->j + B->i[i] - 1;
729         v    = B->a + B->i[i] - 1;
730         SPARSEDENSEMDOT(sum,ls,v,idx,n);
731         x[i] = omega*(sum/d);
732       }
733       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
734                             mat->Mvctx); CHKERR(ierr);
735       its--;
736     }
737     while (its--) {
738       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
739                             mat->Mvctx); CHKERR(ierr);
740       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
741                             mat->Mvctx); CHKERR(ierr);
742       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
743                               mat->Mvctx); CHKERR(ierr);
744       for ( i=m-1; i>-1; i-- ) {
745         n    = A->i[i+1] - A->i[i];
746         idx  = A->j + A->i[i] - 1;
747         v    = A->a + A->i[i] - 1;
748         sum  = b[i];
749         SPARSEDENSEMDOT(sum,xs,v,idx,n);
750         d    = shift + A->a[diag[i]-1];
751         n    = B->i[i+1] - B->i[i];
752         idx  = B->j + B->i[i] - 1;
753         v    = B->a + B->i[i] - 1;
754         SPARSEDENSEMDOT(sum,ls,v,idx,n);
755         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
756       }
757       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
758                             mat->Mvctx); CHKERR(ierr);
759     }
760   }
761   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
762     if (flag & SOR_ZERO_INITIAL_GUESS) {
763       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
764     }
765     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
766     CHKERR(ierr);
767     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
768     CHKERR(ierr);
769     while (its--) {
770       /* go down through the rows */
771       for ( i=0; i<m; i++ ) {
772         n    = A->i[i+1] - A->i[i];
773         idx  = A->j + A->i[i] - 1;
774         v    = A->a + A->i[i] - 1;
775         sum  = b[i];
776         SPARSEDENSEMDOT(sum,xs,v,idx,n);
777         d    = shift + A->a[diag[i]-1];
778         n    = B->i[i+1] - B->i[i];
779         idx  = B->j + B->i[i] - 1;
780         v    = B->a + B->i[i] - 1;
781         SPARSEDENSEMDOT(sum,ls,v,idx,n);
782         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
783       }
784       /* come up through the rows */
785       for ( i=m-1; i>-1; i-- ) {
786         n    = A->i[i+1] - A->i[i];
787         idx  = A->j + A->i[i] - 1;
788         v    = A->a + A->i[i] - 1;
789         sum  = b[i];
790         SPARSEDENSEMDOT(sum,xs,v,idx,n);
791         d    = shift + A->a[diag[i]-1];
792         n    = B->i[i+1] - B->i[i];
793         idx  = B->j + B->i[i] - 1;
794         v    = B->a + B->i[i] - 1;
795         SPARSEDENSEMDOT(sum,ls,v,idx,n);
796         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
797       }
798     }
799   }
800   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
801     if (flag & SOR_ZERO_INITIAL_GUESS) {
802       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
803     }
804     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
805     CHKERR(ierr);
806     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
807     CHKERR(ierr);
808     while (its--) {
809       for ( i=0; i<m; i++ ) {
810         n    = A->i[i+1] - A->i[i];
811         idx  = A->j + A->i[i] - 1;
812         v    = A->a + A->i[i] - 1;
813         sum  = b[i];
814         SPARSEDENSEMDOT(sum,xs,v,idx,n);
815         d    = shift + A->a[diag[i]-1];
816         n    = B->i[i+1] - B->i[i];
817         idx  = B->j + B->i[i] - 1;
818         v    = B->a + B->i[i] - 1;
819         SPARSEDENSEMDOT(sum,ls,v,idx,n);
820         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
821       }
822     }
823   }
824   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
825     if (flag & SOR_ZERO_INITIAL_GUESS) {
826       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
827     }
828     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
829                             mat->Mvctx); CHKERR(ierr);
830     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
831                             mat->Mvctx); CHKERR(ierr);
832     while (its--) {
833       for ( i=m-1; i>-1; i-- ) {
834         n    = A->i[i+1] - A->i[i];
835         idx  = A->j + A->i[i] - 1;
836         v    = A->a + A->i[i] - 1;
837         sum  = b[i];
838         SPARSEDENSEMDOT(sum,xs,v,idx,n);
839         d    = shift + A->a[diag[i]-1];
840         n    = B->i[i+1] - B->i[i];
841         idx  = B->j + B->i[i] - 1;
842         v    = B->a + B->i[i] - 1;
843         SPARSEDENSEMDOT(sum,ls,v,idx,n);
844         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
845       }
846     }
847   }
848   return 0;
849 }
850 static int MatiAIJinsopt(Mat aijin,int op)
851 {
852   Matimpiaij *aij = (Matimpiaij *) aijin->data;
853 
854   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
855     MatSetOption(aij->A,op);
856     MatSetOption(aij->B,op);
857   }
858   else if (op == YES_NEW_NONZERO_LOCATIONS) {
859     MatSetOption(aij->A,op);
860     MatSetOption(aij->B,op);
861   }
862   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
863   return 0;
864 }
865 
866 static int MatiAIJsize(Mat matin,int *m,int *n)
867 {
868   Matimpiaij *mat = (Matimpiaij *) matin->data;
869   *m = mat->M; *n = mat->N;
870   return 0;
871 }
872 
873 static int MatiAIJlocalsize(Mat matin,int *m,int *n)
874 {
875   Matimpiaij *mat = (Matimpiaij *) matin->data;
876   *m = mat->m; *n = mat->n;
877   return 0;
878 }
879 
880 static int MatiAIJrange(Mat matin,int *m,int *n)
881 {
882   Matimpiaij *mat = (Matimpiaij *) matin->data;
883   *m = mat->rstart; *n = mat->rend;
884   return 0;
885 }
886 
887 static int MatiCopy(Mat,Mat *);
888 
889 /* -------------------------------------------------------------------*/
890 static struct _MatOps MatOps = {MatiAIJInsertValues,
891        0, 0,
892        MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd,
893        0,0,0,0,
894        0,0,
895        MatiAIJrelax,
896        0,
897        0,0,0,
898        MatiCopy,
899        MatiAIJgetdiag,0,0,
900        MatiAIJBeginAssemble,MatiAIJEndAssemble,
901        0,
902        MatiAIJinsopt,MatiZero,MatiZerorows,0,
903        0,0,0,0,
904        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
905 
906 
907 
908 /*@
909 
910       MatCreateMPIAIJ - Creates a sparse parallel matrix
911                                  in AIJ format.
912 
913   Input Parameters:
914 .   comm - MPI communicator
915 .   m,n - number of local rows and columns (or -1 to have calculated)
916 .   M,N - global rows and columns (or -1 to have calculated)
917 .   d_nz - total number nonzeros in diagonal portion of matrix
918 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
919 .           You must leave room for the diagonal entry even if it is zero.
920 .   o_nz - total number nonzeros in off-diagonal portion of matrix
921 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
922 .           or null. You must have at least one nonzero per row.
923 
924   Output parameters:
925 .  newmat - the matrix
926 
927   Keywords: matrix, aij, compressed row, sparse, parallel
928 @*/
929 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
930                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
931 {
932   Mat          mat;
933   Matimpiaij   *aij;
934   int          ierr, i,sum[2],work[2];
935   *newmat         = 0;
936   CREATEHEADER(mat,_Mat);
937   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
938   mat->cookie     = MAT_COOKIE;
939   mat->ops        = &MatOps;
940   mat->destroy    = MatiAIJdestroy;
941   mat->view       = MatiView;
942   mat->type       = MATAIJMPI;
943   mat->factor     = 0;
944   mat->row        = 0;
945   mat->col        = 0;
946 
947   mat->comm       = comm;
948   aij->insertmode = NotSetValues;
949   MPI_Comm_rank(comm,&aij->mytid);
950   MPI_Comm_size(comm,&aij->numtids);
951 
952   if (M == -1 || N == -1) {
953     work[0] = m; work[1] = n;
954     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
955     if (M == -1) M = sum[0];
956     if (N == -1) N = sum[1];
957   }
958   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
959   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
960   aij->m       = m;
961   aij->n       = n;
962   aij->N       = N;
963   aij->M       = M;
964 
965   /* build local table of row and column ownerships */
966   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
967   CHKPTR(aij->rowners);
968   aij->cowners = aij->rowners + aij->numtids + 1;
969   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
970   aij->rowners[0] = 0;
971   for ( i=2; i<=aij->numtids; i++ ) {
972     aij->rowners[i] += aij->rowners[i-1];
973   }
974   aij->rstart = aij->rowners[aij->mytid];
975   aij->rend   = aij->rowners[aij->mytid+1];
976   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
977   aij->cowners[0] = 0;
978   for ( i=2; i<=aij->numtids; i++ ) {
979     aij->cowners[i] += aij->cowners[i-1];
980   }
981   aij->cstart = aij->cowners[aij->mytid];
982   aij->cend   = aij->cowners[aij->mytid+1];
983 
984 
985   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
986   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
987 
988   /* build cache for off array entries formed */
989   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
990   aij->stash.n    = 0;
991   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
992                             sizeof(Scalar))); CHKPTR(aij->stash.array);
993   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
994   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
995 
996   /* stuff used for matrix vector multiply */
997   aij->lvec      = 0;
998   aij->Mvctx     = 0;
999   aij->assembled = 0;
1000 
1001   *newmat = mat;
1002   return 0;
1003 }
1004 
1005 static int MatiCopy(Mat matin,Mat *newmat)
1006 {
1007   Mat        mat;
1008   Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data;
1009   int        ierr;
1010   *newmat      = 0;
1011 
1012   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1013   CREATEHEADER(mat,_Mat);
1014   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
1015   mat->cookie     = MAT_COOKIE;
1016   mat->ops        = &MatOps;
1017   mat->destroy    = MatiAIJdestroy;
1018   mat->view       = MatiView;
1019   mat->type       = MATAIJMPI;
1020   mat->factor     = matin->factor;
1021   mat->row        = 0;
1022   mat->col        = 0;
1023 
1024   aij->m          = oldmat->m;
1025   aij->n          = oldmat->n;
1026   aij->M          = oldmat->M;
1027   aij->N          = oldmat->N;
1028 
1029   aij->assembled  = 1;
1030   aij->rstart     = oldmat->rstart;
1031   aij->rend       = oldmat->rend;
1032   aij->cstart     = oldmat->cstart;
1033   aij->cend       = oldmat->cend;
1034   aij->numtids    = oldmat->numtids;
1035   aij->mytid      = oldmat->mytid;
1036   aij->insertmode = NotSetValues;
1037 
1038   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1039   CHKPTR(aij->rowners);
1040   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1041   aij->stash.nmax = 0;
1042   aij->stash.n    = 0;
1043   aij->stash.array= 0;
1044   mat->comm       = matin->comm;
1045 
1046   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1047   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1048   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1049   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1050   *newmat = mat;
1051   return 0;
1052 }
1053