xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision da3a660d273b912abcae7b3f88d2c9355b68b6f0)
1 
2 #include "mpiaij.h"
3 #include "vec/vecimpl.h"
4 #include "inline/spops.h"
5 
6 #define CHUNCKSIZE   100
7 /*
8    This is a simple minded stash. Do a linear search to determine if
9  in stash, if not add to end.
10 */
11 static int StashValues(Stash *stash,int row,int n, int *idxn,
12                        Scalar *values,InsertMode addv)
13 {
14   int    i,j,N = stash->n,found,*n_idx, *n_idy;
15   Scalar val,*n_array;
16 
17   for ( i=0; i<n; i++ ) {
18     found = 0;
19     val = *values++;
20     for ( j=0; j<N; j++ ) {
21       if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) {
22         /* found a match */
23         if (addv == AddValues) stash->array[j] += val;
24         else stash->array[j] = val;
25         found = 1;
26         break;
27       }
28     }
29     if (!found) { /* not found so add to end */
30       if ( stash->n == stash->nmax ) {
31         /* allocate a larger stash */
32         n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*(
33                                      2*sizeof(int) + sizeof(Scalar)));
34         CHKPTR(n_array);
35         n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE);
36         n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE);
37         MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar));
38         MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int));
39         MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int));
40         if (stash->array) FREE(stash->array);
41         stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy;
42         stash->nmax += CHUNCKSIZE;
43       }
44       stash->array[stash->n]   = val;
45       stash->idx[stash->n]     = row;
46       stash->idy[stash->n++]   = idxn[i];
47     }
48   }
49   return 0;
50 }
51 
52 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n,
53                             int *idxn,Scalar *v,InsertMode addv)
54 {
55   Matimpiaij *aij = (Matimpiaij *) mat->data;
56   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
57   int        cstart = aij->cstart, cend = aij->cend,row,col;
58 
59   if (aij->insertmode != NotSetValues && aij->insertmode != addv) {
60     SETERR(1,"You cannot mix inserts and adds");
61   }
62   aij->insertmode = addv;
63   for ( i=0; i<m; i++ ) {
64     if (idxm[i] < 0) SETERR(1,"Negative row index");
65     if (idxm[i] >= aij->M) SETERR(1,"Row index too large");
66     if (idxm[i] >= rstart && idxm[i] < rend) {
67       row = idxm[i] - rstart;
68       for ( j=0; j<n; j++ ) {
69         if (idxn[j] < 0) SETERR(1,"Negative column index");
70         if (idxn[j] >= aij->N) SETERR(1,"Column index too large");
71         if (idxn[j] >= cstart && idxn[j] < cend){
72           col = idxn[j] - cstart;
73           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
74         }
75         else {
76           if (aij->assembled) {
77             SETERR(1,"Cannot insert off diagonal block in already\
78                      assembled matrix. Contact petsc-maint@mcs.anl.gov\
79                      if your need this feature");
80           }
81           col = idxn[j];
82           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr);
83         }
84       }
85     }
86     else {
87       ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr);
88     }
89   }
90   return 0;
91 }
92 
93 /*
94     the assembly code is alot like the code for vectors, we should
95     sometime derive a single assembly code that can be used for
96     either case.
97 */
98 
99 static int MatiAIJBeginAssemble(Mat mat)
100 {
101   Matimpiaij  *aij = (Matimpiaij *) mat->data;
102   MPI_Comm    comm = mat->comm;
103   int         ierr, numtids = aij->numtids, *owners = aij->rowners;
104   int         mytid = aij->mytid;
105   MPI_Request *send_waits,*recv_waits;
106   int         *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work;
107   int         tag = 50, *owner,*starts,count;
108   InsertMode  addv;
109   Scalar      *rvalues,*svalues;
110 
111   /* make sure all processors are either in INSERTMODE or ADDMODE */
112   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT,
113                 MPI_BOR,comm);
114   if (addv == (AddValues|InsertValues)) {
115     SETERR(1,"Some processors have inserted while others have added");
116   }
117   aij->insertmode = addv; /* in case this processor had no cache */
118 
119   /*  first count number of contributors to each processor */
120   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
121   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
122   owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner);
123   for ( i=0; i<aij->stash.n; i++ ) {
124     idx = aij->stash.idx[i];
125     for ( j=0; j<numtids; j++ ) {
126       if (idx >= owners[j] && idx < owners[j+1]) {
127         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
128       }
129     }
130   }
131   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
132 
133   /* inform other processors of number of messages and max length*/
134   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
135   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
136   nreceives = work[mytid];
137   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
138   nmax = work[mytid];
139   FREE(work);
140 
141   /* post receives:
142        1) each message will consist of ordered pairs
143      (global index,value) we store the global index as a double
144      to simplify the message passing.
145        2) since we don't know how long each individual message is we
146      allocate the largest needed buffer for each receive. Potentially
147      this is a lot of wasted space.
148 
149 
150        This could be done better.
151   */
152   rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar));
153   CHKPTR(rvalues);
154   recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request));
155   CHKPTR(recv_waits);
156   for ( i=0; i<nreceives; i++ ) {
157     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag,
158               comm,recv_waits+i);
159   }
160 
161   /* do sends:
162       1) starts[i] gives the starting index in svalues for stuff going to
163          the ith processor
164   */
165   svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
166   CHKPTR(svalues);
167   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
168   CHKPTR(send_waits);
169   starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts);
170   starts[0] = 0;
171   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
172   for ( i=0; i<aij->stash.n; i++ ) {
173     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
174     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
175     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
176   }
177   FREE(owner);
178   starts[0] = 0;
179   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
180   count = 0;
181   for ( i=0; i<numtids; i++ ) {
182     if (procs[i]) {
183       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag,
184                 comm,send_waits+count++);
185     }
186   }
187   FREE(starts); FREE(nprocs);
188 
189   /* Free cache space */
190   aij->stash.nmax = aij->stash.n = 0;
191   if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;}
192 
193   aij->svalues    = svalues;       aij->rvalues = rvalues;
194   aij->nsends     = nsends;         aij->nrecvs = nreceives;
195   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
196   aij->rmax       = nmax;
197 
198   return 0;
199 }
200 extern int MPIAIJSetUpMultiply(Mat);
201 
202 static int MatiAIJEndAssemble(Mat mat)
203 {
204   int        ierr;
205   Matimpiaij *aij = (Matimpiaij *) mat->data;
206 
207   MPI_Status  *send_status,recv_status;
208   int         imdex,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n;
209   int         row,col;
210   Scalar      *values,val;
211   InsertMode  addv = aij->insertmode;
212 
213   /*  wait on receives */
214   while (count) {
215     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
216     /* unpack receives into our local space */
217     values = aij->rvalues + 3*imdex*aij->rmax;
218     MPI_Get_count(&recv_status,MPI_SCALAR,&n);
219     n = n/3;
220     for ( i=0; i<n; i++ ) {
221       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
222       col = (int) PETSCREAL(values[3*i+1]);
223       val = values[3*i+2];
224       if (col >= aij->cstart && col < aij->cend) {
225           col -= aij->cstart;
226         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
227       }
228       else {
229         if (aij->assembled) {
230           SETERR(1,"Cannot insert off diagonal block in already\
231                    assembled matrix. Contact petsc-maint@mcs.anl.gov\
232                    if your need this feature");
233         }
234         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
235       }
236     }
237     count--;
238   }
239   FREE(aij->recv_waits); FREE(aij->rvalues);
240 
241   /* wait on sends */
242   if (aij->nsends) {
243     send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) );
244     CHKPTR(send_status);
245     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
246     FREE(send_status);
247   }
248   FREE(aij->send_waits); FREE(aij->svalues);
249 
250   aij->insertmode = NotSetValues;
251   ierr = MatBeginAssembly(aij->A); CHKERR(ierr);
252   ierr = MatEndAssembly(aij->A); CHKERR(ierr);
253 
254 
255   if (aij->assembled) {
256     /* this is because the present implimentation doesn't support
257      inserting values off the diagonal block.*/
258     return 0;
259   }
260 
261   ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr);
262   ierr = MatBeginAssembly(aij->B); CHKERR(ierr);
263   ierr = MatEndAssembly(aij->B); CHKERR(ierr);
264 
265   aij->assembled = 1;
266   return 0;
267 }
268 
269 static int MatiZero(Mat A)
270 {
271   Matimpiaij *l = (Matimpiaij *) A->data;
272 
273   MatZeroEntries(l->A); MatZeroEntries(l->B);
274   return 0;
275 }
276 
277 /* again this uses the same basic stratagy as in the assembly and
278    scatter create routines, we should try to do it systemamatically
279    if we can figure out the proper level of generality. */
280 
281 /* the code does not do the diagonal entries correctly unless the
282    matrix is square and the column and row owerships are identical.
283    This is a BUG. The only way to fix it seems to be to access
284    aij->A and aij->B directly and not through the MatZeroRows()
285    routine.
286 */
287 static int MatiZerorows(Mat A,IS is,Scalar *diag)
288 {
289   Matimpiaij     *l = (Matimpiaij *) A->data;
290   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
291   int            *localrows,*procs,*nprocs,j,found,idx,nsends,*work;
292   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
293   int            *rvalues,tag = 67,count,base,slen,n,len,*source;
294   int            *lens,imdex,*lrows,*values;
295   MPI_Comm       comm = A->comm;
296   MPI_Request    *send_waits,*recv_waits;
297   MPI_Status     recv_status,*send_status;
298   IS             istmp;
299 
300   if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first");
301   ierr = ISGetLocalSize(is,&N); CHKERR(ierr);
302   ierr = ISGetIndices(is,&rows); CHKERR(ierr);
303 
304   /*  first count number of contributors to each processor */
305   nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs);
306   MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
307   owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/
308   for ( i=0; i<N; i++ ) {
309     idx = rows[i];
310     found = 0;
311     for ( j=0; j<numtids; j++ ) {
312       if (idx >= owners[j] && idx < owners[j+1]) {
313         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
314       }
315     }
316     if (!found) SETERR(1,"Imdex out of range");
317   }
318   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
319 
320   /* inform other processors of number of messages and max length*/
321   work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work);
322   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
323   nrecvs = work[mytid];
324   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
325   nmax = work[mytid];
326   FREE(work);
327 
328   /* post receives:   */
329   rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */
330   CHKPTR(rvalues);
331   recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request));
332   CHKPTR(recv_waits);
333   for ( i=0; i<nrecvs; i++ ) {
334     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
335               comm,recv_waits+i);
336   }
337 
338   /* do sends:
339       1) starts[i] gives the starting index in svalues for stuff going to
340          the ith processor
341   */
342   svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues);
343   send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request));
344   CHKPTR(send_waits);
345   starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts);
346   starts[0] = 0;
347   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
348   for ( i=0; i<N; i++ ) {
349     svalues[starts[owner[i]]++] = rows[i];
350   }
351   ISRestoreIndices(is,&rows);
352 
353   starts[0] = 0;
354   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
355   count = 0;
356   for ( i=0; i<numtids; i++ ) {
357     if (procs[i]) {
358       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
359                 comm,send_waits+count++);
360     }
361   }
362   FREE(starts);
363 
364   base = owners[mytid];
365 
366   /*  wait on receives */
367   lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens);
368   source = lens + nrecvs;
369   count = nrecvs; slen = 0;
370   while (count) {
371     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
372     /* unpack receives into our local space */
373     MPI_Get_count(&recv_status,MPI_INT,&n);
374     source[imdex]  = recv_status.MPI_SOURCE;
375     lens[imdex]  = n;
376     slen += n;
377     count--;
378   }
379   FREE(recv_waits);
380 
381   /* move the data into the send scatter */
382   lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows);
383   count = 0;
384   for ( i=0; i<nrecvs; i++ ) {
385     values = rvalues + i*nmax;
386     for ( j=0; j<lens[i]; j++ ) {
387       lrows[count++] = values[j] - base;
388     }
389   }
390   FREE(rvalues); FREE(lens);
391   FREE(owner); FREE(nprocs);
392 
393   /* actually zap the local rows */
394   ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr);  FREE(lrows);
395   ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr);
396   ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr);
397   ierr = ISDestroy(istmp); CHKERR(ierr);
398 
399   /* wait on sends */
400   if (nsends) {
401     send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) );
402     CHKPTR(send_status);
403     MPI_Waitall(nsends,send_waits,send_status);
404     FREE(send_status);
405   }
406   FREE(send_waits); FREE(svalues);
407 
408 
409   return 0;
410 }
411 
412 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy)
413 {
414   Matimpiaij *aij = (Matimpiaij *) aijin->data;
415   int        ierr;
416   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
417   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
418   CHKERR(ierr);
419   ierr = MatMult(aij->A,xx,yy); CHKERR(ierr);
420   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
421   CHKERR(ierr);
422   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr);
423   return 0;
424 }
425 
426 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz)
427 {
428   Matimpiaij *aij = (Matimpiaij *) aijin->data;
429   int        ierr;
430   if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first");
431   ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
432   CHKERR(ierr);
433   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr);
434   ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx);
435   CHKERR(ierr);
436   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr);
437   return 0;
438 }
439 
440 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy)
441 {
442   Matimpiaij *aij = (Matimpiaij *) aijin->data;
443   int        ierr;
444 
445   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues,
450                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
451   /* do local part */
452   ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse,
457                          aij->Mvctx); CHKERR(ierr);
458   return 0;
459 }
460 
461 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz)
462 {
463   Matimpiaij *aij = (Matimpiaij *) aijin->data;
464   int        ierr;
465 
466   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
467   /* do nondiagonal part */
468   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr);
469   /* send it on its way */
470   ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues,
471                          ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr);
472   /* do local part */
473   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr);
474   /* receive remote parts: note this assumes the values are not actually */
475   /* inserted in yy until the next line, which is true for my implementation*/
476   /* but is not perhaps always true. */
477   ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse,
478                          aij->Mvctx); CHKERR(ierr);
479   return 0;
480 }
481 
482 /*
483   This only works correctly for square matrices where the subblock A->A is the
484    diagonal block
485 */
486 static int MatiAIJgetdiag(Mat Ain,Vec v)
487 {
488   Matimpiaij *A = (Matimpiaij *) Ain->data;
489   if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first");
490   return MatGetDiagonal(A->A,v);
491 }
492 
493 static int MatiAIJdestroy(PetscObject obj)
494 {
495   Mat        mat = (Mat) obj;
496   Matimpiaij *aij = (Matimpiaij *) mat->data;
497   int        ierr;
498   FREE(aij->rowners);
499   ierr = MatDestroy(aij->A); CHKERR(ierr);
500   ierr = MatDestroy(aij->B); CHKERR(ierr);
501   FREE(aij); FREE(mat);
502   if (aij->lvec) VecDestroy(aij->lvec);
503   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
504   return 0;
505 }
506 
507 static int MatiView(PetscObject obj,Viewer viewer)
508 {
509   Mat        mat = (Mat) obj;
510   Matimpiaij *aij = (Matimpiaij *) mat->data;
511   int        ierr;
512 
513   if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first");
514   MPE_Seq_begin(mat->comm,1);
515   printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
516           aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
517           aij->cend);
518   ierr = MatView(aij->A,viewer); CHKERR(ierr);
519   ierr = MatView(aij->B,viewer); CHKERR(ierr);
520   MPE_Seq_end(mat->comm,1);
521   return 0;
522 }
523 
524 extern int MatiAIJmarkdiag(Matiaij  *);
525 /*
526     This has to provide several versions.
527 
528      1) per sequential
529      2) a) use only local smoothing updating outer values only once.
530         b) local smoothing updating outer values each inner iteration
531      3) color updating out values betwen colors.
532 */
533 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift,
534                         int its,Vec xx)
535 {
536   Matimpiaij *mat = (Matimpiaij *) matin->data;
537   Mat        AA = mat->A, BB = mat->B;
538   Matiaij    *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data;
539   Scalar     zero = 0.0,*b,*x,*w,*xs,*ls,d,*v,sum,scale,*t,*ts;
540   int        ierr,one = 1, tmp, *idx, *diag,mytid;
541   int        n = mat->n, m = mat->m, i, j;
542   Vec        tt;
543 
544   if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first");
545 
546   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
547   xs = x -1; /* shift by one for index start of 1 */
548   ls--;
549   if (!A->diag) {if (ierr = MatiAIJmarkdiag(A)) return ierr;}
550   diag = A->diag;
551   if (flag & SOR_EISENSTAT) {
552     /* Let  A = L + U + D; where L is lower trianglar,
553     U is upper triangular, E is diagonal; This routine applies
554 
555             (L + E)^{-1} A (U + E)^{-1}
556 
557     to a vector efficiently using Eisenstat's trick. This is for
558     the case of SSOR preconditioner, so E is D/omega where omega
559     is the relaxation factor.
560     */
561     ierr = VecCreate(xx,&tt); CHKERR(ierr);
562     VecGetArray(tt,&t);
563     scale = (2.0/omega) - 1.0;
564     /*  x = (E + U)^{-1} b */
565     VecSet(&zero,mat->lvec);
566     ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
567                               mat->Mvctx); CHKERR(ierr);
568     for ( i=m-1; i>-1; i-- ) {
569       n    = A->i[i+1] - diag[i] - 1;
570       idx  = A->j + diag[i];
571       v    = A->a + diag[i];
572       sum  = b[i];
573       SPARSEDENSEMDOT(sum,xs,v,idx,n);
574       d    = shift + A->a[diag[i]-1];
575       n    = B->i[i+1] - B->i[i];
576       idx  = B->j + B->i[i] - 1;
577       v    = B->a + B->i[i] - 1;
578       SPARSEDENSEMDOT(sum,ls,v,idx,n);
579       x[i] = omega*(sum/d);
580     }
581     ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
582                             mat->Mvctx); CHKERR(ierr);
583 
584     /*  t = b - (2*E - D)x */
585     v = A->a;
586     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; }
587 
588     /*  t = (E + L)^{-1}t */
589     ts = t - 1; /* shifted by one for index start of a or mat->j*/
590     diag = A->diag;
591     VecSet(&zero,mat->lvec);
592     ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown,
593                                                  mat->Mvctx); CHKERR(ierr);
594     for ( i=0; i<m; i++ ) {
595       n    = diag[i] - A->i[i];
596       idx  = A->j + A->i[i] - 1;
597       v    = A->a + A->i[i] - 1;
598       sum  = t[i];
599       SPARSEDENSEMDOT(sum,ts,v,idx,n);
600       d    = shift + A->a[diag[i]-1];
601       n    = B->i[i+1] - B->i[i];
602       idx  = B->j + B->i[i] - 1;
603       v    = B->a + B->i[i] - 1;
604       SPARSEDENSEMDOT(sum,ls,v,idx,n);
605       t[i] = omega*(sum/d);
606     }
607     ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown,
608                                                     mat->Mvctx); CHKERR(ierr);
609     /*  x = x + t */
610     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
611     VecDestroy(tt);
612     return 0;
613   }
614 
615 
616   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
617     if (flag & SOR_ZERO_INITIAL_GUESS) {
618       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
619     }
620     else {
621       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
622       CHKERR(ierr);
623       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
624       CHKERR(ierr);
625     }
626     while (its--) {
627       /* go down through the rows */
628       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
629                               mat->Mvctx); CHKERR(ierr);
630       for ( i=0; i<m; i++ ) {
631         n    = A->i[i+1] - A->i[i];
632         idx  = A->j + A->i[i] - 1;
633         v    = A->a + A->i[i] - 1;
634         sum  = b[i];
635         SPARSEDENSEMDOT(sum,xs,v,idx,n);
636         d    = shift + A->a[diag[i]-1];
637         n    = B->i[i+1] - B->i[i];
638         idx  = B->j + B->i[i] - 1;
639         v    = B->a + B->i[i] - 1;
640         SPARSEDENSEMDOT(sum,ls,v,idx,n);
641         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
642       }
643       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
644                             mat->Mvctx); CHKERR(ierr);
645       /* come up through the rows */
646       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
647                               mat->Mvctx); CHKERR(ierr);
648       for ( i=m-1; i>-1; i-- ) {
649         n    = A->i[i+1] - A->i[i];
650         idx  = A->j + A->i[i] - 1;
651         v    = A->a + A->i[i] - 1;
652         sum  = b[i];
653         SPARSEDENSEMDOT(sum,xs,v,idx,n);
654         d    = shift + A->a[diag[i]-1];
655         n    = B->i[i+1] - B->i[i];
656         idx  = B->j + B->i[i] - 1;
657         v    = B->a + B->i[i] - 1;
658         SPARSEDENSEMDOT(sum,ls,v,idx,n);
659         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
660       }
661       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
662                             mat->Mvctx); CHKERR(ierr);
663     }
664   }
665   else if (flag & SOR_FORWARD_SWEEP){
666     if (flag & SOR_ZERO_INITIAL_GUESS) {
667       VecSet(&zero,mat->lvec);
668       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
669                               mat->Mvctx); CHKERR(ierr);
670       for ( i=0; i<m; i++ ) {
671         n    = diag[i] - A->i[i];
672         idx  = A->j + A->i[i] - 1;
673         v    = A->a + A->i[i] - 1;
674         sum  = b[i];
675         SPARSEDENSEMDOT(sum,xs,v,idx,n);
676         d    = shift + A->a[diag[i]-1];
677         n    = B->i[i+1] - B->i[i];
678         idx  = B->j + B->i[i] - 1;
679         v    = B->a + B->i[i] - 1;
680         SPARSEDENSEMDOT(sum,ls,v,idx,n);
681         x[i] = omega*(sum/d);
682       }
683       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
684                             mat->Mvctx); CHKERR(ierr);
685       its--;
686     }
687     while (its--) {
688       ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
689       CHKERR(ierr);
690       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx);
691       CHKERR(ierr);
692       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown,
693                               mat->Mvctx); CHKERR(ierr);
694       for ( i=0; i<m; i++ ) {
695         n    = A->i[i+1] - A->i[i];
696         idx  = A->j + A->i[i] - 1;
697         v    = A->a + A->i[i] - 1;
698         sum  = b[i];
699         SPARSEDENSEMDOT(sum,xs,v,idx,n);
700         d    = shift + A->a[diag[i]-1];
701         n    = B->i[i+1] - B->i[i];
702         idx  = B->j + B->i[i] - 1;
703         v    = B->a + B->i[i] - 1;
704         SPARSEDENSEMDOT(sum,ls,v,idx,n);
705         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
706       }
707       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown,
708                             mat->Mvctx); CHKERR(ierr);
709     }
710   }
711   else if (flag & SOR_BACKWARD_SWEEP){
712     if (flag & SOR_ZERO_INITIAL_GUESS) {
713       VecSet(&zero,mat->lvec);
714       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
715                               mat->Mvctx); CHKERR(ierr);
716       for ( i=m-1; i>-1; i-- ) {
717         n    = A->i[i+1] - diag[i] - 1;
718         idx  = A->j + diag[i];
719         v    = A->a + diag[i];
720         sum  = b[i];
721         SPARSEDENSEMDOT(sum,xs,v,idx,n);
722         d    = shift + A->a[diag[i]-1];
723         n    = B->i[i+1] - B->i[i];
724         idx  = B->j + B->i[i] - 1;
725         v    = B->a + B->i[i] - 1;
726         SPARSEDENSEMDOT(sum,ls,v,idx,n);
727         x[i] = omega*(sum/d);
728       }
729       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
730                             mat->Mvctx); CHKERR(ierr);
731       its--;
732     }
733     while (its--) {
734       ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown,
735                             mat->Mvctx); CHKERR(ierr);
736       ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown,
737                             mat->Mvctx); CHKERR(ierr);
738       ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp,
739                               mat->Mvctx); CHKERR(ierr);
740       for ( i=m-1; i>-1; i-- ) {
741         n    = A->i[i+1] - A->i[i];
742         idx  = A->j + A->i[i] - 1;
743         v    = A->a + A->i[i] - 1;
744         sum  = b[i];
745         SPARSEDENSEMDOT(sum,xs,v,idx,n);
746         d    = shift + A->a[diag[i]-1];
747         n    = B->i[i+1] - B->i[i];
748         idx  = B->j + B->i[i] - 1;
749         v    = B->a + B->i[i] - 1;
750         SPARSEDENSEMDOT(sum,ls,v,idx,n);
751         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
752       }
753       ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp,
754                             mat->Mvctx); CHKERR(ierr);
755     }
756   }
757   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
758     if (flag & SOR_ZERO_INITIAL_GUESS) {
759       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
760     }
761     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
762     CHKERR(ierr);
763     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
764     CHKERR(ierr);
765     while (its--) {
766       /* go down through the rows */
767       for ( i=0; i<m; i++ ) {
768         n    = A->i[i+1] - A->i[i];
769         idx  = A->j + A->i[i] - 1;
770         v    = A->a + A->i[i] - 1;
771         sum  = b[i];
772         SPARSEDENSEMDOT(sum,xs,v,idx,n);
773         d    = shift + A->a[diag[i]-1];
774         n    = B->i[i+1] - B->i[i];
775         idx  = B->j + B->i[i] - 1;
776         v    = B->a + B->i[i] - 1;
777         SPARSEDENSEMDOT(sum,ls,v,idx,n);
778         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
779       }
780       /* come up through the rows */
781       for ( i=m-1; i>-1; i-- ) {
782         n    = A->i[i+1] - A->i[i];
783         idx  = A->j + A->i[i] - 1;
784         v    = A->a + A->i[i] - 1;
785         sum  = b[i];
786         SPARSEDENSEMDOT(sum,xs,v,idx,n);
787         d    = shift + A->a[diag[i]-1];
788         n    = B->i[i+1] - B->i[i];
789         idx  = B->j + B->i[i] - 1;
790         v    = B->a + B->i[i] - 1;
791         SPARSEDENSEMDOT(sum,ls,v,idx,n);
792         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
793       }
794     }
795   }
796   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
797     if (flag & SOR_ZERO_INITIAL_GUESS) {
798       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
799     }
800     ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
801     CHKERR(ierr);
802     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx);
803     CHKERR(ierr);
804     while (its--) {
805       for ( i=0; i<m; i++ ) {
806         n    = A->i[i+1] - A->i[i];
807         idx  = A->j + A->i[i] - 1;
808         v    = A->a + A->i[i] - 1;
809         sum  = b[i];
810         SPARSEDENSEMDOT(sum,xs,v,idx,n);
811         d    = shift + A->a[diag[i]-1];
812         n    = B->i[i+1] - B->i[i];
813         idx  = B->j + B->i[i] - 1;
814         v    = B->a + B->i[i] - 1;
815         SPARSEDENSEMDOT(sum,ls,v,idx,n);
816         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
817       }
818     }
819   }
820   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
821     if (flag & SOR_ZERO_INITIAL_GUESS) {
822       return MatRelax(mat->A,bb,omega,flag,shift,its,xx);
823     }
824     ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,
825                             mat->Mvctx); CHKERR(ierr);
826     ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,
827                             mat->Mvctx); CHKERR(ierr);
828     while (its--) {
829       for ( i=m-1; i>-1; i-- ) {
830         n    = A->i[i+1] - A->i[i];
831         idx  = A->j + A->i[i] - 1;
832         v    = A->a + A->i[i] - 1;
833         sum  = b[i];
834         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835         d    = shift + A->a[diag[i]-1];
836         n    = B->i[i+1] - B->i[i];
837         idx  = B->j + B->i[i] - 1;
838         v    = B->a + B->i[i] - 1;
839         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
841       }
842     }
843   }
844   return 0;
845 }
846 static int MatiAIJinsopt(Mat aijin,int op)
847 {
848   Matimpiaij *aij = (Matimpiaij *) aijin->data;
849 
850   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
851     MatSetOption(aij->A,op);
852     MatSetOption(aij->B,op);
853   }
854   else if (op == YES_NEW_NONZERO_LOCATIONS) {
855     MatSetOption(aij->A,op);
856     MatSetOption(aij->B,op);
857   }
858   else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported");
859   return 0;
860 }
861 
862 static int MatiAIJsize(Mat matin,int *m,int *n)
863 {
864   Matimpiaij *mat = (Matimpiaij *) matin->data;
865   *m = mat->M; *n = mat->N;
866   return 0;
867 }
868 
869 static int MatiAIJlocalsize(Mat matin,int *m,int *n)
870 {
871   Matimpiaij *mat = (Matimpiaij *) matin->data;
872   *m = mat->m; *n = mat->n;
873   return 0;
874 }
875 
876 static int MatiAIJrange(Mat matin,int *m,int *n)
877 {
878   Matimpiaij *mat = (Matimpiaij *) matin->data;
879   *m = mat->rstart; *n = mat->rend;
880   return 0;
881 }
882 
883 static int MatiCopy(Mat,Mat *);
884 
885 /* -------------------------------------------------------------------*/
886 static struct _MatOps MatOps = {MatiAIJInsertValues,
887        0, 0,
888        MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd,
889        0,0,0,0,
890        0,0,
891        MatiAIJrelax,
892        0,
893        0,0,0,
894        MatiCopy,
895        MatiAIJgetdiag,0,0,
896        MatiAIJBeginAssemble,MatiAIJEndAssemble,
897        0,
898        MatiAIJinsopt,MatiZero,MatiZerorows,0,
899        0,0,0,0,
900        MatiAIJsize,MatiAIJlocalsize,MatiAIJrange };
901 
902 
903 
904 /*@
905 
906       MatCreateMPIAIJ - Creates a sparse parallel matrix
907                                  in AIJ format.
908 
909   Input Parameters:
910 .   comm - MPI communicator
911 .   m,n - number of local rows and columns (or -1 to have calculated)
912 .   M,N - global rows and columns (or -1 to have calculated)
913 .   d_nz - total number nonzeros in diagonal portion of matrix
914 .   d_nzz - number of nonzeros per row in diagonal portion of matrix or null
915 .           You must leave room for the diagonal entry even if it is zero.
916 .   o_nz - total number nonzeros in off-diagonal portion of matrix
917 .   o_nzz - number of nonzeros per row in off-diagonal portion of matrix
918 .           or null. You must have at least one nonzero per row.
919 
920   Output parameters:
921 .  newmat - the matrix
922 
923   Keywords: matrix, aij, compressed row, sparse, parallel
924 @*/
925 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
926                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
927 {
928   Mat          mat;
929   Matimpiaij   *aij;
930   int          ierr, i,rl,len,sum[2],work[2];
931   *newmat         = 0;
932   CREATEHEADER(mat,_Mat);
933   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
934   mat->cookie     = MAT_COOKIE;
935   mat->ops        = &MatOps;
936   mat->destroy    = MatiAIJdestroy;
937   mat->view       = MatiView;
938   mat->type       = MATAIJMPI;
939   mat->factor     = 0;
940   mat->row        = 0;
941   mat->col        = 0;
942 
943   mat->comm       = comm;
944   aij->insertmode = NotSetValues;
945   MPI_Comm_rank(comm,&aij->mytid);
946   MPI_Comm_size(comm,&aij->numtids);
947 
948   if (M == -1 || N == -1) {
949     work[0] = m; work[1] = n;
950     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
951     if (M == -1) M = sum[0];
952     if (N == -1) N = sum[1];
953   }
954   if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
955   if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
956   aij->m       = m;
957   aij->n       = n;
958   aij->N       = N;
959   aij->M       = M;
960 
961   /* build local table of row and column ownerships */
962   aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int));
963   CHKPTR(aij->rowners);
964   aij->cowners = aij->rowners + aij->numtids + 1;
965   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
966   aij->rowners[0] = 0;
967   for ( i=2; i<=aij->numtids; i++ ) {
968     aij->rowners[i] += aij->rowners[i-1];
969   }
970   aij->rstart = aij->rowners[aij->mytid];
971   aij->rend   = aij->rowners[aij->mytid+1];
972   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
973   aij->cowners[0] = 0;
974   for ( i=2; i<=aij->numtids; i++ ) {
975     aij->cowners[i] += aij->cowners[i-1];
976   }
977   aij->cstart = aij->cowners[aij->mytid];
978   aij->cend   = aij->cowners[aij->mytid+1];
979 
980 
981   ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr);
982   ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr);
983 
984   /* build cache for off array entries formed */
985   aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */
986   aij->stash.n    = 0;
987   aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) +
988                             sizeof(Scalar))); CHKPTR(aij->stash.array);
989   aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax);
990   aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax);
991 
992   /* stuff used for matrix vector multiply */
993   aij->lvec      = 0;
994   aij->Mvctx     = 0;
995   aij->assembled = 0;
996 
997   *newmat = mat;
998   return 0;
999 }
1000 
1001 static int MatiCopy(Mat matin,Mat *newmat)
1002 {
1003   Mat        mat;
1004   Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data;
1005   int        i,rl,len, m = oldmat->m, n = oldmat->n,ierr;
1006   *newmat      = 0;
1007 
1008   if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix");
1009   CREATEHEADER(mat,_Mat);
1010   mat->data       = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij);
1011   mat->cookie     = MAT_COOKIE;
1012   mat->ops        = &MatOps;
1013   mat->destroy    = MatiAIJdestroy;
1014   mat->view       = MatiView;
1015   mat->type       = MATAIJSEQ;
1016   mat->factor     = matin->factor;
1017   mat->row        = 0;
1018   mat->col        = 0;
1019 
1020   aij->m          = oldmat->m;
1021   aij->n          = oldmat->n;
1022   aij->M          = oldmat->M;
1023   aij->N          = oldmat->N;
1024 
1025   aij->assembled  = 1;
1026   aij->rstart     = oldmat->rstart;
1027   aij->rend       = oldmat->rend;
1028   aij->cstart     = oldmat->cstart;
1029   aij->cend       = oldmat->cend;
1030   aij->numtids    = oldmat->numtids;
1031   aij->mytid      = oldmat->mytid;
1032   aij->insertmode = NotSetValues;
1033 
1034   aij->rowners    = (int *) MALLOC( (aij->numtids+1)*sizeof(int) );
1035   CHKPTR(aij->rowners);
1036   MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1037   aij->stash.nmax = 0;
1038   aij->stash.n    = 0;
1039   aij->stash.array= 0;
1040   mat->comm       = matin->comm;
1041 
1042   ierr =  VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr);
1043   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr);
1044   ierr =  MatCopy(oldmat->A,&aij->A); CHKERR(ierr);
1045   ierr =  MatCopy(oldmat->B,&aij->B); CHKERR(ierr);
1046   *newmat = mat;
1047   return 0;
1048 }
1049