xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision acb40c829c5c8e50a1bd2ec359a5002f3724cd09)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.8 1995/03/10 04:44:54 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 #define CHUNCKSIZE   100
10 /*
11    This is a simple minded stash. Do a linear search to determine if
12  in stash, if not add to end.
13 */
14 static int StashValues(Stash *stash,int row,int n, int *idxn,
15                        Scalar *values,InsertMode addv)
16 {
17   int    i,j,N = stash->n,found,*n_idx, *n_idy;
18   Scalar val,*n_array;
19 
20   for ( i=0; i<n; i++ ) {
21     found = 0;
22     val = *values++;
23     for ( j=0; j<N; j++ ) {
24       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
25         /* found a match */
26         if (addv == AddValues) stash->array[j] += val;
27         else stash->array[j] = val;
28         found = 1;
29         break;
30       }
31     }
32     if (!found) { /* not found so add to end */
33       if ( stash->n == stash->nmax ) {
34         /* allocate a larger stash */
35         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
36                                      2*sizeof(int) + sizeof(Scalar)));
37         CHKPTR(n_array);
38         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
39         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
40         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
41         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
42         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
43         if (stash->array) FREE(stash->array);
44         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
45         stash->nmax += CHUNCKSIZE;
46       }
47       stash->array[stash->n]   = val;
48       stash->idx[stash->n]     = row;
49       stash->idy[stash->n++]   = idxn[i];
50     }
51   }
52   return 0;
53 }
54 
55 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
56                             int *idxn,Scalar *v,InsertMode addv)
57 {
58   Matimpiaij *aij = (Matimpiaij *) mat->data;
59   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
60   int        cstart = aij->cstart, cend = aij->cend,row,col;
61 
62   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
63     SETERR(1,"You cannot mix inserts and adds");
64   }
65   aij->insertmode = addv;
66   for ( i=0; i<m; i++ ) {
67     if (idxm[i] < 0) SETERR(1,"Negative row index");
68     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
69     if (idxm[i] >= rstart && idxm[i] < rend) {
70       row = idxm[i] - rstart;
71       for ( j=0; j<n; j++ ) {
72         if (idxn[j] < 0) SETERR(1,"Negative column index");
73         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
74         if (idxn[j] >= cstart && idxn[j] < cend){
75           col = idxn[j] - cstart;
76           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
77         }
78         else {
79           if (aij->assembled) {
80             SETERR(1,"Cannot insert off diagonal block in already\
81                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
82                      if your need this feature");
83           }
84           col = idxn[j];
85           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
86         }
87       }
88     }
89     else {
90       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
91     }
92   }
93   return 0;
94 }
95 
96 /*
97     the assembly code is alot like the code for vectors, we should
98     sometime derive a single assembly code that can be used for
99     either case.
100 */
101 
102 static int MatiAIJBeginAssemble(Mat mat)
103 {
104   Matimpiaij  *aij = (Matimpiaij *) mat->data;
105   MPI_Comm    comm = mat->comm;
106   int         numtids = aij->numtids, *owners = aij->rowners;
107   int         mytid = aij->mytid;
108   MPI_Request *send_waits,*recv_waits;
109   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
110   int         tag = 50, *owner,*starts,count;
111   InsertMode  addv;
112   Scalar      *rvalues,*svalues;
113 
114   /* make sure all processors are either in INSERTMODE or ADDMODE */
115   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
116                 MPI_BOR,comm);
117   if (addv == (AddValues|InsertValues)) {
118     SETERR(1,"Some processors have inserted while others have added");
119   }
120   aij->insertmode = addv; /* in case this processor had no cache */
121 
122   /*  first count number of contributors to each processor */
123   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
124   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
125   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
126   for ( i=0; i<aij->stash.n; i++ ) {
127     idx = aij->stash.idx[i];
128     for ( j=0; j<numtids; j++ ) {
129       if (idx >= owners[j] && idx < owners[j+1]) {
130         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
131       }
132     }
133   }
134   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
135 
136   /* inform other processors of number of messages and max length*/
137   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
138   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
139   nreceives = work[mytid];
140   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
141   nmax = work[mytid];
142   FREE(work);
143 
144   /* post receives:
145        1) each message will consist of ordered pairs
146      (global index,value) we store the global index as a double
147      to simplify the message passing.
148        2) since we don't know how long each individual message is we
149      allocate the largest needed buffer for each receive. Potentially
150      this is a lot of wasted space.
151 
152 
153        This could be done better.
154   */
155   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
156   CHKPTR(rvalues);
157   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
158   CHKPTR(recv_waits);
159   for ( i=0; i<nreceives; i++ ) {
160     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
161               comm,recv_waits+i);
162   }
163 
164   /* do sends:
165       1) starts[i] gives the starting index in svalues for stuff going to
166          the ith processor
167   */
168   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
169   CHKPTR(svalues);
170   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
171   CHKPTR(send_waits);
172   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
173   starts[0] = 0;
174   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
175   for ( i=0; i<aij->stash.n; i++ ) {
176     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
177     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
178     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
179   }
180   FREE(owner);
181   starts[0] = 0;
182   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
183   count = 0;
184   for ( i=0; i<numtids; i++ ) {
185     if (procs[i]) {
186       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
187                 comm,send_waits+count++);
188     }
189   }
190   FREE(starts); FREE(nprocs);
191 
192   /* Free cache space */
193   aij->stash.nmax = aij->stash.n = 0;
194   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
195 
196   aij->svalues    = svalues;       aij->rvalues = rvalues;
197   aij->nsends     = nsends;         aij->nrecvs = nreceives;
198   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
199   aij->rmax       = nmax;
200 
201   return 0;
202 }
203 extern int MPIAIJSetUpMultiply(Mat);
204 
205 static int MatiAIJEndAssemble(Mat mat)
206 {
207   int        ierr;
208   Matimpiaij *aij = (Matimpiaij *) mat->data;
209 
210   MPI_Status  *send_status,recv_status;
211   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
212   int         row,col;
213   Scalar      *values,val;
214   InsertMode  addv = aij->insertmode;
215 
216   /*  wait on receives */
217   while (count) {
218     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
219     /* unpack receives into our local space */
220     values = aij->rvalues + 3*imdex*aij->rmax;
221     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
222     n = n/3;
223     for ( i=0; i<n; i++ ) {
224       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
225       col = (int) PETSCREAL(values[3*i+1]);
226       val = values[3*i+2];
227       if (col >= aij->cstart && col < aij->cend) {
228           col -= aij->cstart;
229         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
230       }
231       else {
232         if (aij->assembled) {
233           SETERR(1,"Cannot insert off diagonal block in already\
234                    assembled matrix. Contact petsc-maint@mcs.anl.gov\
235                    if your need this feature");
236         }
237         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
238       }
239     }
240     count--;
241   }
242   FREE(aij->recv_waits); FREE(aij->rvalues);
243 
244   /* wait on sends */
245   if (aij->nsends) {
246     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
247     CHKPTR(send_status);
248     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
249     FREE(send_status);
250   }
251   FREE(aij->send_waits); FREE(aij->svalues);
252 
253   aij->insertmode = NotSetValues;
254   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
255   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
256 
257 
258   if (aij->assembled) {
259     /* this is because the present implimentation doesn't support
260      inserting values off the diagonal block.*/
261     return 0;
262   }
263 
264   ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
265   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
266   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
267 
268   aij->assembled = 1;
269   return 0;
270 }
271 
272 static int MatiZero(Mat A)
273 {
274   Matimpiaij *l = (Matimpiaij *) A->data;
275 
276   MatZeroEntries(l->A); MatZeroEntries(l->B);
277   return 0;
278 }
279 
280 /* again this uses the same basic stratagy as in the assembly and
281    scatter create routines, we should try to do it systemamatically
282    if we can figure out the proper level of generality. */
283 
284 /* the code does not do the diagonal entries correctly unless the
285    matrix is square and the column and row owerships are identical.
286    This is a BUG. The only way to fix it seems to be to access
287    aij->A and aij->B directly and not through the MatZeroRows()
288    routine.
289 */
290 static int MatiZerorows(Mat A,IS is,Scalar *diag)
291 {
292   Matimpiaij     *l = (Matimpiaij *) A->data;
293   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
294   int            *procs,*nprocs,j,found,idx,nsends,*work;
295   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
296   int            *rvalues,tag = 67,count,base,slen,n,*source;
297   int            *lens,imdex,*lrows,*values;
298   MPI_Comm       comm = A->comm;
299   MPI_Request    *send_waits,*recv_waits;
300   MPI_Status     recv_status,*send_status;
301   IS             istmp;
302 
303   if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first");
304   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
305   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
306 
307   /*  first count number of contributors to each processor */
308   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
309   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
310   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
311   for ( i=0; i<N; i++ ) {
312     idx = rows[i];
313     found = 0;
314     for ( j=0; j<numtids; j++ ) {
315       if (idx >= owners[j] && idx < owners[j+1]) {
316         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
317       }
318     }
319     if (!found) SETERR(1,"Imdex out of range");
320   }
321   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
322 
323   /* inform other processors of number of messages and max length*/
324   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
325   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
326   nrecvs = work[mytid];
327   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
328   nmax = work[mytid];
329   FREE(work);
330 
331   /* post receives:   */
332   rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
333   CHKPTR(rvalues);
334   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
335   CHKPTR(recv_waits);
336   for ( i=0; i<nrecvs; i++ ) {
337     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
338               comm,recv_waits+i);
339   }
340 
341   /* do sends:
342       1) starts[i] gives the starting index in svalues for stuff going to
343          the ith processor
344   */
345   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
346   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
347   CHKPTR(send_waits);
348   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
349   starts[0] = 0;
350   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
351   for ( i=0; i<N; i++ ) {
352     svalues[starts[owner[i]]++] = rows[i];
353   }
354   ISRestoreIndices(is,&rows);
355 
356   starts[0] = 0;
357   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
358   count = 0;
359   for ( i=0; i<numtids; i++ ) {
360     if (procs[i]) {
361       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
362                 comm,send_waits+count++);
363     }
364   }
365   FREE(starts);
366 
367   base = owners[mytid];
368 
369   /*  wait on receives */
370   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
371   source = lens + nrecvs;
372   count = nrecvs; slen = 0;
373   while (count) {
374     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
375     /* unpack receives into our local space */
376     MPI_Get_count(&recv_status,MPI_INT,&n);
377     source[imdex]  = recv_status.MPI_SOURCE;
378     lens[imdex]  = n;
379     slen += n;
380     count--;
381   }
382   FREE(recv_waits);
383 
384   /* move the data into the send scatter */
385   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
386   count = 0;
387   for ( i=0; i<nrecvs; i++ ) {
388     values = rvalues + i*nmax;
389     for ( j=0; j<lens[i]; j++ ) {
390       lrows[count++] = values[j] - base;
391     }
392   }
393   FREE(rvalues); FREE(lens);
394   FREE(owner); FREE(nprocs);
395 
396   /* actually zap the local rows */
397   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
398   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
399   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
400   ierr = ISDestroy(istmp); CHKERR(ierr);
401 
402   /* wait on sends */
403   if (nsends) {
404     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
405     CHKPTR(send_status);
406     MPI_Waitall(nsends,send_waits,send_status);
407     FREE(send_status);
408   }
409   FREE(send_waits); FREE(svalues);
410 
411 
412   return 0;
413 }
414 
415 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
416 {
417   Matimpiaij *aij = (Matimpiaij *) aijin->data;
418   int        ierr;
419   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
420   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
421   CHKERR(ierr);
422   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
423   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
424   CHKERR(ierr);
425   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
426   return 0;
427 }
428 
429 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz)
430 {
431   Matimpiaij *aij = (Matimpiaij *) aijin->data;
432   int        ierr;
433   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
434   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
435   CHKERR(ierr);
436   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
437   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
438   CHKERR(ierr);
439   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
440   return 0;
441 }
442 
443 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy)
444 {
445   Matimpiaij *aij = (Matimpiaij *) aijin->data;
446   int        ierr;
447 
448   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
449   /* do nondiagonal part */
450   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
451   /* send it on its way */
452   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
453                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
454   /* do local part */
455   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
456   /* receive remote parts: note this assumes the values are not actually */
457   /* inserted in yy until the next line, which is true for my implementation*/
458   /* but is not perhaps always true. */
459   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
460                          aij->Mvctx); CHKERR(ierr);
461   return 0;
462 }
463 
464 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz)
465 {
466   Matimpiaij *aij = (Matimpiaij *) aijin->data;
467   int        ierr;
468 
469   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
470   /* do nondiagonal part */
471   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
472   /* send it on its way */
473   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
474                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
475   /* do local part */
476   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
477   /* receive remote parts: note this assumes the values are not actually */
478   /* inserted in yy until the next line, which is true for my implementation*/
479   /* but is not perhaps always true. */
480   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
481                          aij->Mvctx); CHKERR(ierr);
482   return 0;
483 }
484 
485 /*
486   This only works correctly for square matrices where the subblock A->A is the
487    diagonal block
488 */
489 static int MatiAIJgetdiag(Mat Ain,Vec v)
490 {
491   Matimpiaij *A = (Matimpiaij *) Ain->data;
492   if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first");
493   return MatGetDiagonal(A->A,v);
494 }
495 
496 static int MatiAIJdestroy(PetscObject obj)
497 {
498   Mat        mat = (Mat) obj;
499   Matimpiaij *aij = (Matimpiaij *) mat->data;
500   int        ierr;
501   FREE(aij->rowners);
502   ierr = MatDestroy(aij->A); CHKERR(ierr);
503   ierr = MatDestroy(aij->B); CHKERR(ierr);
504   if (aij->lvec) VecDestroy(aij->lvec);
505   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
506   FREE(aij); PETSCHEADERDESTROY(mat);
507   return 0;
508 }
509 
510 static int MatiView(PetscObject obj,Viewer viewer)
511 {
512   Mat        mat = (Mat) obj;
513   Matimpiaij *aij = (Matimpiaij *) mat->data;
514   int        ierr;
515 
516   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
517   MPE_Seq_begin(mat->comm,1);
518     printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
519           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
520           aij->cend);
521     ierr = MatView(aij->A,0); CHKERR(ierr);
522     ierr = MatView(aij->B,0); CHKERR(ierr);
523     fflush(stdout);
524   MPE_Seq_end(mat->comm,1);
525   return 0;
526 }
527 
528 extern int MatiAIJmarkdiag(Matiaij  *);
529 /*
530     This has to provide several versions.
531 
532      1) per sequential
533      2) a) use only local smoothing updating outer values only once.
534         b) local smoothing updating outer values each inner iteration
535      3) color updating out values betwen colors.
536 */
537 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift,
538                         int its,Vec xx)
539 {
540   Matimpiaij *mat = (Matimpiaij *) matin->data;
541   Mat        AA = mat->A, BB = mat->B;
542   Matiaij    *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data;
543   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
544   int        ierr,*idx, *diag;
545   int        n = mat->n, m = mat->m, i;
546   Vec        tt;
547 
548   if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first");
549 
550   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
551   xs = x -1; /* shift by one for index start of 1 */
552   ls--;
553   if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;}
554   diag = A->diag;
555   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
556     SETERR(1,"That option not yet support for parallel AIJ matrices");
557   }
558   if (flag & SOR_EISENSTAT) {
559     /* Let  A = L + U + D; where L is lower trianglar,
560     U is upper triangular, E is diagonal; This routine applies
561 
562             (L + E)^{-1} A (U + E)^{-1}
563 
564     to a vector efficiently using Eisenstat's trick. This is for
565     the case of SSOR preconditioner, so E is D/omega where omega
566     is the relaxation factor.
567     */
568     ierr = VecCreate(xx,&tt); CHKERR(ierr);
569     VecGetArray(tt,&t);
570     scale = (2.0/omega) - 1.0;
571     /*  x = (E + U)^{-1} b */
572     VecSet(&zero,mat->lvec);
573     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
574                               mat->Mvctx); CHKERR(ierr);
575     for ( i=m-1; i>-1; i-- ) {
576       n    = A->i[i+1] - diag[i] - 1;
577       idx  = A->j + diag[i];
578       v    = A->a + diag[i];
579       sum  = b[i];
580       SPARSEDENSEMDOT(sum,xs,v,idx,n);
581       d    = shift + A->a[diag[i]-1];
582       n    = B->i[i+1] - B->i[i];
583       idx  = B->j + B->i[i] - 1;
584       v    = B->a + B->i[i] - 1;
585       SPARSEDENSEMDOT(sum,ls,v,idx,n);
586       x[i] = omega*(sum/d);
587     }
588     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
589                             mat->Mvctx); CHKERR(ierr);
590 
591     /*  t = b - (2*E - D)x */
592     v = A->a;
593     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
594 
595     /*  t = (E + L)^{-1}t */
596     ts = t - 1; /* shifted by one for index start of a or mat->j*/
597     diag = A->diag;
598     VecSet(&zero,mat->lvec);
599     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
600                                                  mat->Mvctx); CHKERR(ierr);
601     for ( i=0; i<m; i++ ) {
602       n    = diag[i] - A->i[i];
603       idx  = A->j + A->i[i] - 1;
604       v    = A->a + A->i[i] - 1;
605       sum  = t[i];
606       SPARSEDENSEMDOT(sum,ts,v,idx,n);
607       d    = shift + A->a[diag[i]-1];
608       n    = B->i[i+1] - B->i[i];
609       idx  = B->j + B->i[i] - 1;
610       v    = B->a + B->i[i] - 1;
611       SPARSEDENSEMDOT(sum,ls,v,idx,n);
612       t[i] = omega*(sum/d);
613     }
614     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
615                                                     mat->Mvctx); CHKERR(ierr);
616     /*  x = x + t */
617     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
618     VecDestroy(tt);
619     return 0;
620   }
621 
622 
623   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
624     if (flag & SOR_ZERO_INITIAL_GUESS) {
625       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
626     }
627     else {
628       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
629       CHKERR(ierr);
630       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
631       CHKERR(ierr);
632     }
633     while (its--) {
634       /* go down through the rows */
635       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
636                               mat->Mvctx); CHKERR(ierr);
637       for ( i=0; i<m; i++ ) {
638         n    = A->i[i+1] - A->i[i];
639         idx  = A->j + A->i[i] - 1;
640         v    = A->a + A->i[i] - 1;
641         sum  = b[i];
642         SPARSEDENSEMDOT(sum,xs,v,idx,n);
643         d    = shift + A->a[diag[i]-1];
644         n    = B->i[i+1] - B->i[i];
645         idx  = B->j + B->i[i] - 1;
646         v    = B->a + B->i[i] - 1;
647         SPARSEDENSEMDOT(sum,ls,v,idx,n);
648         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
649       }
650       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
651                             mat->Mvctx); CHKERR(ierr);
652       /* come up through the rows */
653       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
654                               mat->Mvctx); CHKERR(ierr);
655       for ( i=m-1; i>-1; i-- ) {
656         n    = A->i[i+1] - A->i[i];
657         idx  = A->j + A->i[i] - 1;
658         v    = A->a + A->i[i] - 1;
659         sum  = b[i];
660         SPARSEDENSEMDOT(sum,xs,v,idx,n);
661         d    = shift + A->a[diag[i]-1];
662         n    = B->i[i+1] - B->i[i];
663         idx  = B->j + B->i[i] - 1;
664         v    = B->a + B->i[i] - 1;
665         SPARSEDENSEMDOT(sum,ls,v,idx,n);
666         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
667       }
668       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
669                             mat->Mvctx); CHKERR(ierr);
670     }
671   }
672   else if (flag & SOR_FORWARD_SWEEP){
673     if (flag & SOR_ZERO_INITIAL_GUESS) {
674       VecSet(&zero,mat->lvec);
675       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
676                               mat->Mvctx); CHKERR(ierr);
677       for ( i=0; i<m; i++ ) {
678         n    = diag[i] - A->i[i];
679         idx  = A->j + A->i[i] - 1;
680         v    = A->a + A->i[i] - 1;
681         sum  = b[i];
682         SPARSEDENSEMDOT(sum,xs,v,idx,n);
683         d    = shift + A->a[diag[i]-1];
684         n    = B->i[i+1] - B->i[i];
685         idx  = B->j + B->i[i] - 1;
686         v    = B->a + B->i[i] - 1;
687         SPARSEDENSEMDOT(sum,ls,v,idx,n);
688         x[i] = omega*(sum/d);
689       }
690       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
691                             mat->Mvctx); CHKERR(ierr);
692       its--;
693     }
694     while (its--) {
695       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
696       CHKERR(ierr);
697       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
698       CHKERR(ierr);
699       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
700                               mat->Mvctx); CHKERR(ierr);
701       for ( i=0; i<m; i++ ) {
702         n    = A->i[i+1] - A->i[i];
703         idx  = A->j + A->i[i] - 1;
704         v    = A->a + A->i[i] - 1;
705         sum  = b[i];
706         SPARSEDENSEMDOT(sum,xs,v,idx,n);
707         d    = shift + A->a[diag[i]-1];
708         n    = B->i[i+1] - B->i[i];
709         idx  = B->j + B->i[i] - 1;
710         v    = B->a + B->i[i] - 1;
711         SPARSEDENSEMDOT(sum,ls,v,idx,n);
712         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
713       }
714       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
715                             mat->Mvctx); CHKERR(ierr);
716     }
717   }
718   else if (flag & SOR_BACKWARD_SWEEP){
719     if (flag & SOR_ZERO_INITIAL_GUESS) {
720       VecSet(&zero,mat->lvec);
721       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
722                               mat->Mvctx); CHKERR(ierr);
723       for ( i=m-1; i>-1; i-- ) {
724         n    = A->i[i+1] - diag[i] - 1;
725         idx  = A->j + diag[i];
726         v    = A->a + diag[i];
727         sum  = b[i];
728         SPARSEDENSEMDOT(sum,xs,v,idx,n);
729         d    = shift + A->a[diag[i]-1];
730         n    = B->i[i+1] - B->i[i];
731         idx  = B->j + B->i[i] - 1;
732         v    = B->a + B->i[i] - 1;
733         SPARSEDENSEMDOT(sum,ls,v,idx,n);
734         x[i] = omega*(sum/d);
735       }
736       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
737                             mat->Mvctx); CHKERR(ierr);
738       its--;
739     }
740     while (its--) {
741       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
742                             mat->Mvctx); CHKERR(ierr);
743       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
744                             mat->Mvctx); CHKERR(ierr);
745       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
746                               mat->Mvctx); CHKERR(ierr);
747       for ( i=m-1; i>-1; i-- ) {
748         n    = A->i[i+1] - A->i[i];
749         idx  = A->j + A->i[i] - 1;
750         v    = A->a + A->i[i] - 1;
751         sum  = b[i];
752         SPARSEDENSEMDOT(sum,xs,v,idx,n);
753         d    = shift + A->a[diag[i]-1];
754         n    = B->i[i+1] - B->i[i];
755         idx  = B->j + B->i[i] - 1;
756         v    = B->a + B->i[i] - 1;
757         SPARSEDENSEMDOT(sum,ls,v,idx,n);
758         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
759       }
760       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
761                             mat->Mvctx); CHKERR(ierr);
762     }
763   }
764   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
765     if (flag & SOR_ZERO_INITIAL_GUESS) {
766       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
767     }
768     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
769     CHKERR(ierr);
770     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
771     CHKERR(ierr);
772     while (its--) {
773       /* go down through the rows */
774       for ( i=0; i<m; i++ ) {
775         n    = A->i[i+1] - A->i[i];
776         idx  = A->j + A->i[i] - 1;
777         v    = A->a + A->i[i] - 1;
778         sum  = b[i];
779         SPARSEDENSEMDOT(sum,xs,v,idx,n);
780         d    = shift + A->a[diag[i]-1];
781         n    = B->i[i+1] - B->i[i];
782         idx  = B->j + B->i[i] - 1;
783         v    = B->a + B->i[i] - 1;
784         SPARSEDENSEMDOT(sum,ls,v,idx,n);
785         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
786       }
787       /* come up through the rows */
788       for ( i=m-1; i>-1; i-- ) {
789         n    = A->i[i+1] - A->i[i];
790         idx  = A->j + A->i[i] - 1;
791         v    = A->a + A->i[i] - 1;
792         sum  = b[i];
793         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794         d    = shift + A->a[diag[i]-1];
795         n    = B->i[i+1] - B->i[i];
796         idx  = B->j + B->i[i] - 1;
797         v    = B->a + B->i[i] - 1;
798         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
800       }
801     }
802   }
803   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
804     if (flag & SOR_ZERO_INITIAL_GUESS) {
805       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
806     }
807     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
808     CHKERR(ierr);
809     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
810     CHKERR(ierr);
811     while (its--) {
812       for ( i=0; i<m; i++ ) {
813         n    = A->i[i+1] - A->i[i];
814         idx  = A->j + A->i[i] - 1;
815         v    = A->a + A->i[i] - 1;
816         sum  = b[i];
817         SPARSEDENSEMDOT(sum,xs,v,idx,n);
818         d    = shift + A->a[diag[i]-1];
819         n    = B->i[i+1] - B->i[i];
820         idx  = B->j + B->i[i] - 1;
821         v    = B->a + B->i[i] - 1;
822         SPARSEDENSEMDOT(sum,ls,v,idx,n);
823         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
824       }
825     }
826   }
827   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
828     if (flag & SOR_ZERO_INITIAL_GUESS) {
829       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
830     }
831     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
832                             mat->Mvctx); CHKERR(ierr);
833     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
834                             mat->Mvctx); CHKERR(ierr);
835     while (its--) {
836       for ( i=m-1; i>-1; i-- ) {
837         n    = A->i[i+1] - A->i[i];
838         idx  = A->j + A->i[i] - 1;
839         v    = A->a + A->i[i] - 1;
840         sum  = b[i];
841         SPARSEDENSEMDOT(sum,xs,v,idx,n);
842         d    = shift + A->a[diag[i]-1];
843         n    = B->i[i+1] - B->i[i];
844         idx  = B->j + B->i[i] - 1;
845         v    = B->a + B->i[i] - 1;
846         SPARSEDENSEMDOT(sum,ls,v,idx,n);
847         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
848       }
849     }
850   }
851   return 0;
852 }
853 static int MatiAIJinsopt(Mat aijin,int op)
854 {
855   Matimpiaij *aij = (Matimpiaij *) aijin->data;
856 
857   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
858     MatSetOption(aij->A,op);
859     MatSetOption(aij->B,op);
860   }
861   else if (op == YES_NEW_NONZERO_LOCATIONS) {
862     MatSetOption(aij->A,op);
863     MatSetOption(aij->B,op);
864   }
865   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
866   return 0;
867 }
868 
869 static int MatiAIJsize(Mat matin,int *m,int *n)
870 {
871   Matimpiaij *mat = (Matimpiaij *) matin->data;
872   *m = mat->M; *n = mat->N;
873   return 0;
874 }
875 
876 static int MatiAIJlocalsize(Mat matin,int *m,int *n)
877 {
878   Matimpiaij *mat = (Matimpiaij *) matin->data;
879   *m = mat->m; *n = mat->n;
880   return 0;
881 }
882 
883 static int MatiAIJrange(Mat matin,int *m,int *n)
884 {
885   Matimpiaij *mat = (Matimpiaij *) matin->data;
886   *m = mat->rstart; *n = mat->rend;
887   return 0;
888 }
889 
890 static int MatiCopy(Mat,Mat *);
891 
892 /* -------------------------------------------------------------------*/
893 static struct _MatOps MatOps = {MatiAIJInsertValues,
894        0, 0,
895        MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd,
896        0,0,0,0,
897        0,0,
898        MatiAIJrelax,
899        0,
900        0,0,0,
901        MatiCopy,
902        MatiAIJgetdiag,0,0,
903        MatiAIJBeginAssemble,MatiAIJEndAssemble,
904        0,
905        MatiAIJinsopt,MatiZero,MatiZerorows,0,
906        0,0,0,0,
907        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
908 
909 
910 
911 /*@
912 
913       MatCreateMPIAIJ - Creates a sparse parallel matrix
914                                  in AIJ format.
915 
916   Input Parameters:
917 .   comm - MPI communicator
918 .   m,n - number of local rows and columns (or -1 to have calculated)
919 .   M,N - global rows and columns (or -1 to have calculated)
920 .   d_nz - total number nonzeros in diagonal portion of matrix
921 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
922 .           You must leave room for the diagonal entry even if it is zero.
923 .   o_nz - total number nonzeros in off-diagonal portion of matrix
924 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
925 .           or null. You must have at least one nonzero per row.
926 
927   Output parameters:
928 .  newmat - the matrix
929 
930   Keywords: matrix, aij, compressed row, sparse, parallel
931 @*/
932 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
933                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
934 {
935   Mat          mat;
936   Matimpiaij   *aij;
937   int          ierr, i,sum[2],work[2];
938   *newmat         = 0;
939   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,comm);
940   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
941   mat->ops        = &MatOps;
942   mat->destroy    = MatiAIJdestroy;
943   mat->view       = MatiView;
944   mat->factor     = 0;
945   mat->row        = 0;
946   mat->col        = 0;
947 
948   mat->comm       = comm;
949   aij->insertmode = NotSetValues;
950   MPI_Comm_rank(comm,&aij->mytid);
951   MPI_Comm_size(comm,&aij->numtids);
952 
953   if (M == -1 || N == -1) {
954     work[0] = m; work[1] = n;
955     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
956     if (M == -1) M = sum[0];
957     if (N == -1) N = sum[1];
958   }
959   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
960   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
961   aij->m       = m;
962   aij->n       = n;
963   aij->N       = N;
964   aij->M       = M;
965 
966   /* build local table of row and column ownerships */
967   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
968   CHKPTR(aij->rowners);
969   aij->cowners = aij->rowners + aij->numtids + 1;
970   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
971   aij->rowners[0] = 0;
972   for ( i=2; i<=aij->numtids; i++ ) {
973     aij->rowners[i] += aij->rowners[i-1];
974   }
975   aij->rstart = aij->rowners[aij->mytid];
976   aij->rend   = aij->rowners[aij->mytid+1];
977   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
978   aij->cowners[0] = 0;
979   for ( i=2; i<=aij->numtids; i++ ) {
980     aij->cowners[i] += aij->cowners[i-1];
981   }
982   aij->cstart = aij->cowners[aij->mytid];
983   aij->cend   = aij->cowners[aij->mytid+1];
984 
985 
986   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
987   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
988 
989   /* build cache for off array entries formed */
990   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
991   aij->stash.n    = 0;
992   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
993                             sizeof(Scalar))); CHKPTR(aij->stash.array);
994   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
995   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
996 
997   /* stuff used for matrix vector multiply */
998   aij->lvec      = 0;
999   aij->Mvctx     = 0;
1000   aij->assembled = 0;
1001 
1002   *newmat = mat;
1003   return 0;
1004 }
1005 
1006 static int MatiCopy(Mat matin,Mat *newmat)
1007 {
1008   Mat        mat;
1009   Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data;
1010   int        ierr;
1011   *newmat      = 0;
1012 
1013   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1014   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,matin->comm);
1015   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
1016   mat->ops        = &MatOps;
1017   mat->destroy    = MatiAIJdestroy;
1018   mat->view       = MatiView;
1019   mat->factor     = matin->factor;
1020   mat->row        = 0;
1021   mat->col        = 0;
1022 
1023   aij->m          = oldmat->m;
1024   aij->n          = oldmat->n;
1025   aij->M          = oldmat->M;
1026   aij->N          = oldmat->N;
1027 
1028   aij->assembled  = 1;
1029   aij->rstart     = oldmat->rstart;
1030   aij->rend       = oldmat->rend;
1031   aij->cstart     = oldmat->cstart;
1032   aij->cend       = oldmat->cend;
1033   aij->numtids    = oldmat->numtids;
1034   aij->mytid      = oldmat->mytid;
1035   aij->insertmode = NotSetValues;
1036 
1037   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1038   CHKPTR(aij->rowners);
1039   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1040   aij->stash.nmax = 0;
1041   aij->stash.n    = 0;
1042   aij->stash.array= 0;
1043   mat->comm       = matin->comm;
1044 
1045   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1046   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1047   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1048   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1049   *newmat = mat;
1050   return 0;
1051 }
1052