xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dbb450ca0a1bd4568a196678f20f5151425d3a04)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.79 1995/09/12 03:25:20 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i;
19   int        shift = B->indexshift;
20 
21   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
22   PLogObjectMemory(mat,aij->N*sizeof(int));
23   PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int));
24   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
25   return 0;
26 }
27 
28 extern int DisAssemble_MPIAIJ(Mat);
29 
30 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
31 {
32   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33   int ierr;
34   if (aij->numtids == 1) {
35     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
36   } else
37     SETERRQ(1,"MatGetReordering_MPIAIJ:  not yet supported in parallel");
38   return 0;
39 }
40 
41 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
42                             int *idxn,Scalar *v,InsertMode addv)
43 {
44   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
45   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
46   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
47   int        cstart = aij->cstart, cend = aij->cend,row,col;
48   int        shift = C->indexshift;
49 
50   if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) {
51     SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds");
52   }
53   aij->insertmode = addv;
54   for ( i=0; i<m; i++ ) {
55     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
56     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
57     if (idxm[i] >= rstart && idxm[i] < rend) {
58       row = idxm[i] - rstart;
59       for ( j=0; j<n; j++ ) {
60         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
61         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
62         if (idxn[j] >= cstart && idxn[j] < cend){
63           col = idxn[j] - cstart;
64           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
65         }
66         else {
67           if (aij->assembled) {
68             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
69             col = aij->colmap[idxn[j]] + shift;
70             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
71               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
72               col =  idxn[j];
73             }
74           }
75           else col = idxn[j];
76           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
77         }
78       }
79     }
80     else {
81       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);
82       CHKERRQ(ierr);
83     }
84   }
85   return 0;
86 }
87 
88 /*
89     the assembly code is a lot like the code for vectors, we should
90     sometime derive a single assembly code that can be used for
91     either case.
92 */
93 
94 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
95 {
96   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
97   MPI_Comm    comm = mat->comm;
98   int         numtids = aij->numtids, *owners = aij->rowners;
99   int         mytid = aij->mytid;
100   MPI_Request *send_waits,*recv_waits;
101   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
102   int         tag = mat->tag, *owner,*starts,count,ierr;
103   InsertMode  addv;
104   Scalar      *rvalues,*svalues;
105 
106   /* make sure all processors are either in INSERTMODE or ADDMODE */
107   MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT,
108                 MPI_BOR,comm);
109   if (addv == (ADD_VALUES|INSERT_VALUES)) {
110     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
111   }
112   aij->insertmode = addv; /* in case this processor had no cache */
113 
114   /*  first count number of contributors to each processor */
115   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
116   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
117   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
118   for ( i=0; i<aij->stash.n; i++ ) {
119     idx = aij->stash.idx[i];
120     for ( j=0; j<numtids; j++ ) {
121       if (idx >= owners[j] && idx < owners[j+1]) {
122         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
123       }
124     }
125   }
126   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
127 
128   /* inform other processors of number of messages and max length*/
129   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
130   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
131   nreceives = work[mytid];
132   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
133   nmax = work[mytid];
134   PETSCFREE(work);
135 
136   /* post receives:
137        1) each message will consist of ordered pairs
138      (global index,value) we store the global index as a double
139      to simplify the message passing.
140        2) since we don't know how long each individual message is we
141      allocate the largest needed buffer for each receive. Potentially
142      this is a lot of wasted space.
143 
144 
145        This could be done better.
146   */
147   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
148   CHKPTRQ(rvalues);
149   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
150   CHKPTRQ(recv_waits);
151   for ( i=0; i<nreceives; i++ ) {
152     MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
153               comm,recv_waits+i);
154   }
155 
156   /* do sends:
157       1) starts[i] gives the starting index in svalues for stuff going to
158          the ith processor
159   */
160   svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) );
161   CHKPTRQ(svalues);
162   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
163   CHKPTRQ(send_waits);
164   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
165   starts[0] = 0;
166   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
167   for ( i=0; i<aij->stash.n; i++ ) {
168     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
169     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
170     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
171   }
172   PETSCFREE(owner);
173   starts[0] = 0;
174   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
175   count = 0;
176   for ( i=0; i<numtids; i++ ) {
177     if (procs[i]) {
178       MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag,
179                 comm,send_waits+count++);
180     }
181   }
182   PETSCFREE(starts); PETSCFREE(nprocs);
183 
184   /* Free cache space */
185   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
186 
187   aij->svalues    = svalues;    aij->rvalues = rvalues;
188   aij->nsends     = nsends;     aij->nrecvs = nreceives;
189   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
190   aij->rmax       = nmax;
191 
192   return 0;
193 }
194 extern int MatSetUpMultiply_MPIAIJ(Mat);
195 
196 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
197 {
198   int        ierr;
199   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
200   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
201   MPI_Status  *send_status,recv_status;
202   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n;
203   int         row,col,other_disassembled;
204   Scalar      *values,val;
205   InsertMode  addv = aij->insertmode;
206   int         shift = C->indexshift;
207 
208   /*  wait on receives */
209   while (count) {
210     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
211     /* unpack receives into our local space */
212     values = aij->rvalues + 3*imdex*aij->rmax;
213     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
214     n = n/3;
215     for ( i=0; i<n; i++ ) {
216       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
217       col = (int) PETSCREAL(values[3*i+1]);
218       val = values[3*i+2];
219       if (col >= aij->cstart && col < aij->cend) {
220           col -= aij->cstart;
221         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
222       }
223       else {
224         if (aij->assembled) {
225           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
226           col = aij->colmap[col] + shift;
227           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
228             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
229             col = (int) PETSCREAL(values[3*i+1]);
230           }
231         }
232         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
233       }
234     }
235     count--;
236   }
237   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
238 
239   /* wait on sends */
240   if (aij->nsends) {
241     send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) );
242     CHKPTRQ(send_status);
243     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
244     PETSCFREE(send_status);
245   }
246   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
247 
248   aij->insertmode = NOTSETVALUES;
249   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
251 
252   /* determine if any processor has disassembled, if so we must
253      also disassemble ourselfs, in order that we may reassemble. */
254   MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1,
255                  MPI_INT,MPI_PROD,mat->comm);
256   if (aij->assembled && !other_disassembled) {
257     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
258   }
259 
260   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
261     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
262     aij->assembled = 1;
263   }
264   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
265   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
266 
267   return 0;
268 }
269 
270 static int MatZeroEntries_MPIAIJ(Mat A)
271 {
272   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
273   int ierr;
274   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
275   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
276   return 0;
277 }
278 
279 /* again this uses the same basic stratagy as in the assembly and
280    scatter create routines, we should try to do it systemamatically
281    if we can figure out the proper level of generality. */
282 
283 /* the code does not do the diagonal entries correctly unless the
284    matrix is square and the column and row owerships are identical.
285    This is a BUG. The only way to fix it seems to be to access
286    aij->A and aij->B directly and not through the MatZeroRows()
287    routine.
288 */
289 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
290 {
291   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
292   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
293   int            *procs,*nprocs,j,found,idx,nsends,*work;
294   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
295   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
296   int            *lens,imdex,*lrows,*values;
297   MPI_Comm       comm = A->comm;
298   MPI_Request    *send_waits,*recv_waits;
299   MPI_Status     recv_status,*send_status;
300   IS             istmp;
301 
302   if (!l->assembled)
303     SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first");
304   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
305   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
306 
307   /*  first count number of contributors to each processor */
308   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
309   PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids;
310   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
311   for ( i=0; i<N; i++ ) {
312     idx = rows[i];
313     found = 0;
314     for ( j=0; j<numtids; j++ ) {
315       if (idx >= owners[j] && idx < owners[j+1]) {
316         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
317       }
318     }
319     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
320   }
321   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
322 
323   /* inform other processors of number of messages and max length*/
324   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
325   MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm);
326   nrecvs = work[mytid];
327   MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm);
328   nmax = work[mytid];
329   PETSCFREE(work);
330 
331   /* post receives:   */
332   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
333   CHKPTRQ(rvalues);
334   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
335   CHKPTRQ(recv_waits);
336   for ( i=0; i<nrecvs; i++ ) {
337     MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag,
338               comm,recv_waits+i);
339   }
340 
341   /* do sends:
342       1) starts[i] gives the starting index in svalues for stuff going to
343          the ith processor
344   */
345   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
346   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
347   CHKPTRQ(send_waits);
348   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
349   starts[0] = 0;
350   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
351   for ( i=0; i<N; i++ ) {
352     svalues[starts[owner[i]]++] = rows[i];
353   }
354   ISRestoreIndices(is,&rows);
355 
356   starts[0] = 0;
357   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
358   count = 0;
359   for ( i=0; i<numtids; i++ ) {
360     if (procs[i]) {
361       MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag,
362                 comm,send_waits+count++);
363     }
364   }
365   PETSCFREE(starts);
366 
367   base = owners[mytid];
368 
369   /*  wait on receives */
370   lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
371   source = lens + nrecvs;
372   count = nrecvs; slen = 0;
373   while (count) {
374     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
375     /* unpack receives into our local space */
376     MPI_Get_count(&recv_status,MPI_INT,&n);
377     source[imdex]  = recv_status.MPI_SOURCE;
378     lens[imdex]  = n;
379     slen += n;
380     count--;
381   }
382   PETSCFREE(recv_waits);
383 
384   /* move the data into the send scatter */
385   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
386   count = 0;
387   for ( i=0; i<nrecvs; i++ ) {
388     values = rvalues + i*nmax;
389     for ( j=0; j<lens[i]; j++ ) {
390       lrows[count++] = values[j] - base;
391     }
392   }
393   PETSCFREE(rvalues); PETSCFREE(lens);
394   PETSCFREE(owner); PETSCFREE(nprocs);
395 
396   /* actually zap the local rows */
397   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);
398   PLogObjectParent(A,istmp);
399   CHKERRQ(ierr);  PETSCFREE(lrows);
400   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
401   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
402   ierr = ISDestroy(istmp); CHKERRQ(ierr);
403 
404   /* wait on sends */
405   if (nsends) {
406     send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) );
407     CHKPTRQ(send_status);
408     MPI_Waitall(nsends,send_waits,send_status);
409     PETSCFREE(send_status);
410   }
411   PETSCFREE(send_waits); PETSCFREE(svalues);
412 
413 
414   return 0;
415 }
416 
417 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy)
418 {
419   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
420   int        ierr;
421   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
422   ierr = VecScatterBegin(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
423   CHKERRQ(ierr);
424   ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr);
425   ierr = VecScatterEnd(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
426   CHKERRQ(ierr);
427   ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr);
428   return 0;
429 }
430 
431 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
432 {
433   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
434   int        ierr;
435   if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first");
436   ierr = VecScatterBegin(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
437   CHKERRQ(ierr);
438   ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
439   ierr = VecScatterEnd(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx);
440   CHKERRQ(ierr);
441   ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr);
442   return 0;
443 }
444 
445 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy)
446 {
447   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
448   int        ierr;
449 
450   if (!aij->assembled)
451     SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first");
452   /* do nondiagonal part */
453   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
454   /* send it on its way */
455   ierr = VecScatterBegin(aij->lvec,yy,ADD_VALUES,
456            (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
457   /* do local part */
458   ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr);
459   /* receive remote parts: note this assumes the values are not actually */
460   /* inserted in yy until the next line, which is true for my implementation*/
461   /* but is not perhaps always true. */
462   ierr = VecScatterEnd(aij->lvec,yy,ADD_VALUES,
463          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
464   return 0;
465 }
466 
467 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz)
468 {
469   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
470   int        ierr;
471 
472   if (!aij->assembled)
473     SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first");
474   /* do nondiagonal part */
475   ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr);
476   /* send it on its way */
477   ierr = VecScatterBegin(aij->lvec,zz,ADD_VALUES,
478          (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
479   /* do local part */
480   ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr);
481   /* receive remote parts: note this assumes the values are not actually */
482   /* inserted in yy until the next line, which is true for my implementation*/
483   /* but is not perhaps always true. */
484   ierr = VecScatterEnd(aij->lvec,zz,ADD_VALUES,
485        (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr);
486   return 0;
487 }
488 
489 /*
490   This only works correctly for square matrices where the subblock A->A is the
491    diagonal block
492 */
493 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v)
494 {
495   Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data;
496   if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first");
497   return MatGetDiagonal(A->A,v);
498 }
499 
500 static int MatDestroy_MPIAIJ(PetscObject obj)
501 {
502   Mat        mat = (Mat) obj;
503   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
504   int        ierr;
505 #if defined(PETSC_LOG)
506   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
507 #endif
508   PETSCFREE(aij->rowners);
509   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
510   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
511   if (aij->colmap) PETSCFREE(aij->colmap);
512   if (aij->garray) PETSCFREE(aij->garray);
513   if (aij->lvec) VecDestroy(aij->lvec);
514   if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx);
515   PETSCFREE(aij);
516   PLogObjectDestroy(mat);
517   PETSCHEADERDESTROY(mat);
518   return 0;
519 }
520 #include "draw.h"
521 #include "pinclude/pviewer.h"
522 
523 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
524 {
525   Mat         mat = (Mat) obj;
526   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
527   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
528   int         ierr, format;
529   PetscObject vobj = (PetscObject) viewer;
530   int         shift = C->indexshift;
531 
532   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first");
533   if (!viewer) { /* so that viewers may be used from debuggers */
534     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
535   }
536   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
537   ierr = ViewerFileGetFormat_Private(viewer,&format);
538   if (vobj->cookie == VIEWER_COOKIE && format == FILE_FORMAT_INFO &&
539      (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER)) {
540    /* do nothing for now */
541   }
542   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
543     FILE *fd;
544     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
545     MPIU_Seq_begin(mat->comm,1);
546     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
547            aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
548            aij->cend);
549     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
550     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
551     fflush(fd);
552     MPIU_Seq_end(mat->comm,1);
553   }
554   else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) ||
555             vobj->cookie == DRAW_COOKIE) {
556     int numtids = aij->numtids, mytid = aij->mytid;
557     if (numtids == 1) {
558       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
559     }
560     else {
561       /* assemble the entire matrix onto first processor. */
562       Mat     A;
563       Mat_SeqAIJ *Aloc;
564       int     M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
565       Scalar  *a;
566 
567       if (!mytid) {
568         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);
569       }
570       else {
571         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);
572       }
573       PLogObjectParent(mat,A);
574       CHKERRQ(ierr);
575 
576       /* copy over the A part */
577       Aloc = (Mat_SeqAIJ*) aij->A->data;
578       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
579       row = aij->rstart;
580       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
581       for ( i=0; i<m; i++ ) {
582         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
583         CHKERRQ(ierr);
584         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
585       }
586       aj = Aloc->j;
587       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
588 
589       /* copy over the B part */
590       Aloc = (Mat_SeqAIJ*) aij->B->data;
591       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
592       row = aij->rstart;
593       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
594       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
595       for ( i=0; i<m; i++ ) {
596         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
597         CHKERRQ(ierr);
598         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
599       }
600       PETSCFREE(ct);
601       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
602       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
603       if (!mytid) {
604         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
605       }
606       ierr = MatDestroy(A); CHKERRQ(ierr);
607     }
608   }
609   return 0;
610 }
611 
612 extern int MatMarkDiag_SeqAIJ(Mat);
613 /*
614     This has to provide several versions.
615 
616      1) per sequential
617      2) a) use only local smoothing updating outer values only once.
618         b) local smoothing updating outer values each inner iteration
619      3) color updating out values betwen colors.
620 */
621 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
622                            double fshift,int its,Vec xx)
623 {
624   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
625   Mat        AA = mat->A, BB = mat->B;
626   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
627   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
628   int        ierr,*idx, *diag;
629   int        n = mat->n, m = mat->m, i;
630   Vec        tt;
631   int        shift = A->indexshift;
632 
633   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first");
634 
635   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
636   xs = x + shift; /* shift by one for index start of 1 */
637   ls = ls + shift;
638   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
639   diag = A->diag;
640   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
641     SETERRQ(1,"MatRelax_MPIAIJ:Option not yet supported");
642   }
643   if (flag & SOR_EISENSTAT) {
644     /* Let  A = L + U + D; where L is lower trianglar,
645     U is upper triangular, E is diagonal; This routine applies
646 
647             (L + E)^{-1} A (U + E)^{-1}
648 
649     to a vector efficiently using Eisenstat's trick. This is for
650     the case of SSOR preconditioner, so E is D/omega where omega
651     is the relaxation factor.
652     */
653     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
654     PLogObjectParent(matin,tt);
655     VecGetArray(tt,&t);
656     scale = (2.0/omega) - 1.0;
657     /*  x = (E + U)^{-1} b */
658     VecSet(&zero,mat->lvec);
659     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
660                               mat->Mvctx); CHKERRQ(ierr);
661     for ( i=m-1; i>-1; i-- ) {
662       n    = A->i[i+1] - diag[i] - 1;
663       idx  = A->j + diag[i] + !shift;
664       v    = A->a + diag[i] + !shift;
665       sum  = b[i];
666       SPARSEDENSEMDOT(sum,xs,v,idx,n);
667       d    = fshift + A->a[diag[i]+shift];
668       n    = B->i[i+1] - B->i[i];
669       idx  = B->j + B->i[i] + shift;
670       v    = B->a + B->i[i] + shift;
671       SPARSEDENSEMDOT(sum,ls,v,idx,n);
672       x[i] = omega*(sum/d);
673     }
674     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
675                             mat->Mvctx); CHKERRQ(ierr);
676 
677     /*  t = b - (2*E - D)x */
678     v = A->a;
679     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
680 
681     /*  t = (E + L)^{-1}t */
682     ts = t + shift; /* shifted by one for index start of a or mat->j*/
683     diag = A->diag;
684     VecSet(&zero,mat->lvec);
685     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
686                                                  mat->Mvctx); CHKERRQ(ierr);
687     for ( i=0; i<m; i++ ) {
688       n    = diag[i] - A->i[i];
689       idx  = A->j + A->i[i] + shift;
690       v    = A->a + A->i[i] + shift;
691       sum  = t[i];
692       SPARSEDENSEMDOT(sum,ts,v,idx,n);
693       d    = fshift + A->a[diag[i]+shift];
694       n    = B->i[i+1] - B->i[i];
695       idx  = B->j + B->i[i] + shift;
696       v    = B->a + B->i[i] + shift;
697       SPARSEDENSEMDOT(sum,ls,v,idx,n);
698       t[i] = omega*(sum/d);
699     }
700     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
701                                                     mat->Mvctx); CHKERRQ(ierr);
702     /*  x = x + t */
703     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
704     VecDestroy(tt);
705     return 0;
706   }
707 
708 
709   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
710     if (flag & SOR_ZERO_INITIAL_GUESS) {
711       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
712     }
713     else {
714       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
715       CHKERRQ(ierr);
716       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
717       CHKERRQ(ierr);
718     }
719     while (its--) {
720       /* go down through the rows */
721       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
722                               mat->Mvctx); CHKERRQ(ierr);
723       for ( i=0; i<m; i++ ) {
724         n    = A->i[i+1] - A->i[i];
725         idx  = A->j + A->i[i] + shift;
726         v    = A->a + A->i[i] + shift;
727         sum  = b[i];
728         SPARSEDENSEMDOT(sum,xs,v,idx,n);
729         d    = fshift + A->a[diag[i]+shift];
730         n    = B->i[i+1] - B->i[i];
731         idx  = B->j + B->i[i] + shift;
732         v    = B->a + B->i[i] + shift;
733         SPARSEDENSEMDOT(sum,ls,v,idx,n);
734         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
735       }
736       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
737                             mat->Mvctx); CHKERRQ(ierr);
738       /* come up through the rows */
739       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
740                               mat->Mvctx); CHKERRQ(ierr);
741       for ( i=m-1; i>-1; i-- ) {
742         n    = A->i[i+1] - A->i[i];
743         idx  = A->j + A->i[i] + shift;
744         v    = A->a + A->i[i] + shift;
745         sum  = b[i];
746         SPARSEDENSEMDOT(sum,xs,v,idx,n);
747         d    = fshift + A->a[diag[i]+shift];
748         n    = B->i[i+1] - B->i[i];
749         idx  = B->j + B->i[i] + shift;
750         v    = B->a + B->i[i] + shift;
751         SPARSEDENSEMDOT(sum,ls,v,idx,n);
752         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
753       }
754       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
755                             mat->Mvctx); CHKERRQ(ierr);
756     }
757   }
758   else if (flag & SOR_FORWARD_SWEEP){
759     if (flag & SOR_ZERO_INITIAL_GUESS) {
760       VecSet(&zero,mat->lvec);
761       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
762                               mat->Mvctx); CHKERRQ(ierr);
763       for ( i=0; i<m; i++ ) {
764         n    = diag[i] - A->i[i];
765         idx  = A->j + A->i[i] + shift;
766         v    = A->a + A->i[i] + shift;
767         sum  = b[i];
768         SPARSEDENSEMDOT(sum,xs,v,idx,n);
769         d    = fshift + A->a[diag[i]+shift];
770         n    = B->i[i+1] - B->i[i];
771         idx  = B->j + B->i[i] + shift;
772         v    = B->a + B->i[i] + shift;
773         SPARSEDENSEMDOT(sum,ls,v,idx,n);
774         x[i] = omega*(sum/d);
775       }
776       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
777                             mat->Mvctx); CHKERRQ(ierr);
778       its--;
779     }
780     while (its--) {
781       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
782       CHKERRQ(ierr);
783       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx);
784       CHKERRQ(ierr);
785       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
786                               mat->Mvctx); CHKERRQ(ierr);
787       for ( i=0; i<m; i++ ) {
788         n    = A->i[i+1] - A->i[i];
789         idx  = A->j + A->i[i] + shift;
790         v    = A->a + A->i[i] + shift;
791         sum  = b[i];
792         SPARSEDENSEMDOT(sum,xs,v,idx,n);
793         d    = fshift + A->a[diag[i]+shift];
794         n    = B->i[i+1] - B->i[i];
795         idx  = B->j + B->i[i] + shift;
796         v    = B->a + B->i[i] + shift;
797         SPARSEDENSEMDOT(sum,ls,v,idx,n);
798         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
799       }
800       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN,
801                             mat->Mvctx); CHKERRQ(ierr);
802     }
803   }
804   else if (flag & SOR_BACKWARD_SWEEP){
805     if (flag & SOR_ZERO_INITIAL_GUESS) {
806       VecSet(&zero,mat->lvec);
807       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
808                               mat->Mvctx); CHKERRQ(ierr);
809       for ( i=m-1; i>-1; i-- ) {
810         n    = A->i[i+1] - diag[i] - 1;
811         idx  = A->j + diag[i] + !shift;
812         v    = A->a + diag[i] + !shift;
813         sum  = b[i];
814         SPARSEDENSEMDOT(sum,xs,v,idx,n);
815         d    = fshift + A->a[diag[i]+shift];
816         n    = B->i[i+1] - B->i[i];
817         idx  = B->j + B->i[i] + shift;
818         v    = B->a + B->i[i] + shift;
819         SPARSEDENSEMDOT(sum,ls,v,idx,n);
820         x[i] = omega*(sum/d);
821       }
822       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
823                             mat->Mvctx); CHKERRQ(ierr);
824       its--;
825     }
826     while (its--) {
827       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN,
828                             mat->Mvctx); CHKERRQ(ierr);
829       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN,
830                             mat->Mvctx); CHKERRQ(ierr);
831       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
832                               mat->Mvctx); CHKERRQ(ierr);
833       for ( i=m-1; i>-1; i-- ) {
834         n    = A->i[i+1] - A->i[i];
835         idx  = A->j + A->i[i] + shift;
836         v    = A->a + A->i[i] + shift;
837         sum  = b[i];
838         SPARSEDENSEMDOT(sum,xs,v,idx,n);
839         d    = fshift + A->a[diag[i]+shift];
840         n    = B->i[i+1] - B->i[i];
841         idx  = B->j + B->i[i] + shift;
842         v    = B->a + B->i[i] + shift;
843         SPARSEDENSEMDOT(sum,ls,v,idx,n);
844         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
845       }
846       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP,
847                             mat->Mvctx); CHKERRQ(ierr);
848     }
849   }
850   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
851     if (flag & SOR_ZERO_INITIAL_GUESS) {
852       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
853     }
854     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
855     CHKERRQ(ierr);
856     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
857     CHKERRQ(ierr);
858     while (its--) {
859       /* go down through the rows */
860       for ( i=0; i<m; i++ ) {
861         n    = A->i[i+1] - A->i[i];
862         idx  = A->j + A->i[i] + shift;
863         v    = A->a + A->i[i] + shift;
864         sum  = b[i];
865         SPARSEDENSEMDOT(sum,xs,v,idx,n);
866         d    = fshift + A->a[diag[i]+shift];
867         n    = B->i[i+1] - B->i[i];
868         idx  = B->j + B->i[i] + shift;
869         v    = B->a + B->i[i] + shift;
870         SPARSEDENSEMDOT(sum,ls,v,idx,n);
871         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
872       }
873       /* come up through the rows */
874       for ( i=m-1; i>-1; i-- ) {
875         n    = A->i[i+1] - A->i[i];
876         idx  = A->j + A->i[i] + shift;
877         v    = A->a + A->i[i] + shift;
878         sum  = b[i];
879         SPARSEDENSEMDOT(sum,xs,v,idx,n);
880         d    = fshift + A->a[diag[i]+shift];
881         n    = B->i[i+1] - B->i[i];
882         idx  = B->j + B->i[i] + shift;
883         v    = B->a + B->i[i] + shift;
884         SPARSEDENSEMDOT(sum,ls,v,idx,n);
885         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
886       }
887     }
888   }
889   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
890     if (flag & SOR_ZERO_INITIAL_GUESS) {
891       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
892     }
893     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
894     CHKERRQ(ierr);
895     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx);
896     CHKERRQ(ierr);
897     while (its--) {
898       for ( i=0; i<m; i++ ) {
899         n    = A->i[i+1] - A->i[i];
900         idx  = A->j + A->i[i] + shift;
901         v    = A->a + A->i[i] + shift;
902         sum  = b[i];
903         SPARSEDENSEMDOT(sum,xs,v,idx,n);
904         d    = fshift + A->a[diag[i]+shift];
905         n    = B->i[i+1] - B->i[i];
906         idx  = B->j + B->i[i] + shift;
907         v    = B->a + B->i[i] + shift;
908         SPARSEDENSEMDOT(sum,ls,v,idx,n);
909         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
910       }
911     }
912   }
913   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
914     if (flag & SOR_ZERO_INITIAL_GUESS) {
915       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
916     }
917     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,
918                             mat->Mvctx); CHKERRQ(ierr);
919     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,
920                             mat->Mvctx); CHKERRQ(ierr);
921     while (its--) {
922       for ( i=m-1; i>-1; i-- ) {
923         n    = A->i[i+1] - A->i[i];
924         idx  = A->j + A->i[i] + shift;
925         v    = A->a + A->i[i] + shift;
926         sum  = b[i];
927         SPARSEDENSEMDOT(sum,xs,v,idx,n);
928         d    = fshift + A->a[diag[i]+shift];
929         n    = B->i[i+1] - B->i[i];
930         idx  = B->j + B->i[i] + shift;
931         v    = B->a + B->i[i] + shift;
932         SPARSEDENSEMDOT(sum,ls,v,idx,n);
933         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
934       }
935     }
936   }
937   return 0;
938 }
939 
940 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
941                              int *nzalloc,int *mem)
942 {
943   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
944   Mat        A = mat->A, B = mat->B;
945   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
946 
947   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
948   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
949   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
950   if (flag == MAT_LOCAL) {
951     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
952   } else if (flag == MAT_GLOBAL_MAX) {
953     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm);
954     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
955   } else if (flag == MAT_GLOBAL_SUM) {
956     MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm);
957     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
958   }
959   return 0;
960 }
961 
962 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
963 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
964 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
965 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
966 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
967 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
968 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
969 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
970 
971 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op)
972 {
973   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data;
974 
975   if      (op == NO_NEW_NONZERO_LOCATIONS)  {
976     MatSetOption(aij->A,op);
977     MatSetOption(aij->B,op);
978   }
979   else if (op == YES_NEW_NONZERO_LOCATIONS) {
980     MatSetOption(aij->A,op);
981     MatSetOption(aij->B,op);
982   }
983   else if (op == COLUMN_ORIENTED)
984     SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported");
985   return 0;
986 }
987 
988 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
989 {
990   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
991   *m = mat->M; *n = mat->N;
992   return 0;
993 }
994 
995 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
996 {
997   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
998   *m = mat->m; *n = mat->N;
999   return 0;
1000 }
1001 
1002 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1003 {
1004   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1005   *m = mat->rstart; *n = mat->rend;
1006   return 0;
1007 }
1008 
1009 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1010 {
1011   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1012   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1013   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1014   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1015 
1016   if (row < rstart || row >= rend)
1017     SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows")
1018   lrow = row - rstart;
1019 
1020   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1021   if (!v)   {pvA = 0; pvB = 0;}
1022   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1023   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1024   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1025   nztot = nzA + nzB;
1026 
1027   if (v  || idx) {
1028     if (nztot) {
1029       /* Sort by increasing column numbers, assuming A and B already sorted */
1030       int imark;
1031       if (mat->assembled) {
1032         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1033       }
1034       if (v) {
1035         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1036         for ( i=0; i<nzB; i++ ) {
1037           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1038           else break;
1039         }
1040         imark = i;
1041         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1042         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1043       }
1044       if (idx) {
1045         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1046         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1047         for ( i=0; i<nzB; i++ ) {
1048           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1049           else break;
1050         }
1051         imark = i;
1052         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1053         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1054       }
1055     }
1056     else {*idx = 0; *v=0;}
1057   }
1058   *nz = nztot;
1059   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1060   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1061   return 0;
1062 }
1063 
1064 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1065 {
1066   if (idx) PETSCFREE(*idx);
1067   if (v) PETSCFREE(*v);
1068   return 0;
1069 }
1070 
1071 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1072 {
1073   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1074   int        ierr, i, j, cstart = aij->cstart;
1075   double     sum = 0.0;
1076   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1077   Scalar     *v;
1078   int        shift = amat->indexshift;
1079 
1080   if (!aij->assembled)
1081     SETERRQ(1,"MatNorm_MPIAIJ: Cannot compute norm of unassembled matrix");
1082   if (aij->numtids == 1) {
1083     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1084   } else {
1085     if (type == NORM_FROBENIUS) {
1086       v = amat->a;
1087       for (i=0; i<amat->nz; i++ ) {
1088 #if defined(PETSC_COMPLEX)
1089         sum += real(conj(*v)*(*v)); v++;
1090 #else
1091         sum += (*v)*(*v); v++;
1092 #endif
1093       }
1094       v = bmat->a;
1095       for (i=0; i<bmat->nz; i++ ) {
1096 #if defined(PETSC_COMPLEX)
1097         sum += real(conj(*v)*(*v)); v++;
1098 #else
1099         sum += (*v)*(*v); v++;
1100 #endif
1101       }
1102       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1103       *norm = sqrt(*norm);
1104     }
1105     else if (type == NORM_1) { /* max column norm */
1106       double *tmp, *tmp2;
1107       int    *jj, *garray = aij->garray;
1108       tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1109       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1110       PETSCMEMSET(tmp,0,aij->N*sizeof(double));
1111       *norm = 0.0;
1112       v = amat->a; jj = amat->j;
1113       for ( j=0; j<amat->nz; j++ ) {
1114 #if defined(PETSC_COMPLEX)
1115         tmp[cstart + *jj++ + shift] += abs(*v++);
1116 #else
1117         tmp[cstart + *jj++ + shift] += fabs(*v++);
1118 #endif
1119       }
1120       v = bmat->a; jj = bmat->j;
1121       for ( j=0; j<bmat->nz; j++ ) {
1122 #if defined(PETSC_COMPLEX)
1123         tmp[garray[*jj++ + shift]] += abs(*v++);
1124 #else
1125         tmp[garray[*jj++ + shift]] += fabs(*v++);
1126 #endif
1127       }
1128       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1129       for ( j=0; j<aij->N; j++ ) {
1130         if (tmp2[j] > *norm) *norm = tmp2[j];
1131       }
1132       PETSCFREE(tmp); PETSCFREE(tmp2);
1133     }
1134     else if (type == NORM_INFINITY) { /* max row norm */
1135       double normtemp = 0.0;
1136       for ( j=0; j<amat->m; j++ ) {
1137         v = amat->a + amat->i[j] + shift;
1138         sum = 0.0;
1139         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1140 #if defined(PETSC_COMPLEX)
1141           sum += abs(*v); v++;
1142 #else
1143           sum += fabs(*v); v++;
1144 #endif
1145         }
1146         v = bmat->a + bmat->i[j] + shift;
1147         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1148 #if defined(PETSC_COMPLEX)
1149           sum += abs(*v); v++;
1150 #else
1151           sum += fabs(*v); v++;
1152 #endif
1153         }
1154         if (sum > normtemp) normtemp = sum;
1155         MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1156       }
1157     }
1158     else {
1159       SETERRQ(1,"MatNorm_MPIRow:No support for the two norm");
1160     }
1161   }
1162   return 0;
1163 }
1164 
1165 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1166 {
1167   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1168   int        ierr;
1169   Mat        B;
1170   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1171   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1172   Scalar     *array;
1173   int        shift = Aloc->indexshift;
1174 
1175   if (!matout && M != N) SETERRQ(1,
1176     "MatTranspose_MPIAIJ: Cannot transpose rectangular matrix in place");
1177   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1178   CHKERRQ(ierr);
1179 
1180   /* copy over the A part */
1181   Aloc = (Mat_SeqAIJ*) a->A->data;
1182   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1183   row = a->rstart;
1184   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1185   for ( i=0; i<m; i++ ) {
1186       ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1187       CHKERRQ(ierr);
1188       row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1189   }
1190   aj = Aloc->j;
1191   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1192 
1193   /* copy over the B part */
1194   Aloc = (Mat_SeqAIJ*) a->B->data;
1195   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1196   row = a->rstart;
1197   ct = cols = (int *) PETSCMALLOC( (ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1198   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1199   for ( i=0; i<m; i++ ) {
1200     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1201     CHKERRQ(ierr);
1202     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1203   }
1204   PETSCFREE(ct);
1205   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1206   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1207   if (matout) {
1208     *matout = B;
1209   } else {
1210     /* This isn't really an in-place transpose .... but free data structures from a */
1211     PETSCFREE(a->rowners);
1212     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1213     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1214     if (a->colmap) PETSCFREE(a->colmap);
1215     if (a->garray) PETSCFREE(a->garray);
1216     if (a->lvec) VecDestroy(a->lvec);
1217     if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx);
1218     PETSCFREE(a);
1219     PETSCMEMCPY(A,B,sizeof(struct _Mat));
1220     PETSCHEADERDESTROY(B);
1221   }
1222   return 0;
1223 }
1224 
1225 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1226 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1227 
1228 /* -------------------------------------------------------------------*/
1229 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1230        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1231        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1232        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1233        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1234        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1235        MatLUFactor_MPIAIJ,0,
1236        MatRelax_MPIAIJ,
1237        MatTranspose_MPIAIJ,
1238        MatGetInfo_MPIAIJ,0,
1239        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1240        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1241        0,
1242        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1243        MatGetReordering_MPIAIJ,
1244        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1245        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1246        MatILUFactorSymbolic_MPIAIJ,0,
1247        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1248 
1249 /*@
1250    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1251    (the default parallel PETSc format).
1252 
1253    Input Parameters:
1254 .  comm - MPI communicator
1255 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1256 .  n - number of local columns (or PETSC_DECIDE to have calculated
1257            if N is given)
1258 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1259 .  N - number of global columns (or PETSC_DECIDE to have calculated
1260            if n is given)
1261 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1262            (same for all local rows)
1263 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1264            or null (possibly different for each row).  You must leave room
1265            for the diagonal entry even if it is zero.
1266 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1267            submatrix (same for all local rows).
1268 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1269            submatrix or null (possibly different for each row).
1270 
1271    Output Parameter:
1272 .  newmat - the matrix
1273 
1274    Notes:
1275    The AIJ format (also called the Yale sparse matrix format or
1276    compressed row storage), is fully compatible with standard Fortran 77
1277    storage.  That is, the stored row and column indices begin at
1278    one, not zero.  See the users manual for further details.
1279 
1280    The user MUST specify either the local or global matrix dimensions
1281    (possibly both).
1282 
1283    Storage Information:
1284    For a square global matrix we define each processor's diagonal portion
1285    to be its local rows and the corresponding columns (a square submatrix);
1286    each processor's off-diagonal portion encompasses the remainder of the
1287    local matrix (a rectangular submatrix).
1288 
1289    The user can specify preallocated storage for the diagonal part of
1290    the local submatrix with either d_nz or d_nnz (not both). Set both
1291    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1292    Likewise, specify preallocated storage for the off-diagonal part of
1293    the local submatrix with o_nz or o_nnz (not both).
1294 
1295 .keywords: matrix, aij, compressed row, sparse, parallel
1296 
1297 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1298 @*/
1299 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1300                  int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1301 {
1302   Mat          mat;
1303   Mat_MPIAIJ   *aij;
1304   int          ierr, i,sum[2],work[2];
1305   *newmat         = 0;
1306   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1307   PLogObjectCreate(mat);
1308   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1309   PETSCMEMCPY(&mat->ops,&MatOps,sizeof(struct _MatOps));
1310   mat->destroy    = MatDestroy_MPIAIJ;
1311   mat->view       = MatView_MPIAIJ;
1312   mat->factor     = 0;
1313 
1314   aij->insertmode = NOTSETVALUES;
1315   MPI_Comm_rank(comm,&aij->mytid);
1316   MPI_Comm_size(comm,&aij->numtids);
1317 
1318   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1319     work[0] = m; work[1] = n;
1320     MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm );
1321     if (M == PETSC_DECIDE) M = sum[0];
1322     if (N == PETSC_DECIDE) N = sum[1];
1323   }
1324   if (m == PETSC_DECIDE)
1325     {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);}
1326   if (n == PETSC_DECIDE)
1327     {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);}
1328   aij->m = m;
1329   aij->n = n;
1330   aij->N = N;
1331   aij->M = M;
1332 
1333   /* build local table of row and column ownerships */
1334   aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int));
1335   CHKPTRQ(aij->rowners);
1336   PLogObjectMemory(mat,2*(aij->numtids+2)*sizeof(int)+sizeof(struct _Mat)+
1337                        sizeof(Mat_MPIAIJ));
1338   aij->cowners = aij->rowners + aij->numtids + 1;
1339   MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm);
1340   aij->rowners[0] = 0;
1341   for ( i=2; i<=aij->numtids; i++ ) {
1342     aij->rowners[i] += aij->rowners[i-1];
1343   }
1344   aij->rstart = aij->rowners[aij->mytid];
1345   aij->rend   = aij->rowners[aij->mytid+1];
1346   MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm);
1347   aij->cowners[0] = 0;
1348   for ( i=2; i<=aij->numtids; i++ ) {
1349     aij->cowners[i] += aij->cowners[i-1];
1350   }
1351   aij->cstart = aij->cowners[aij->mytid];
1352   aij->cend   = aij->cowners[aij->mytid+1];
1353 
1354   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A);
1355   CHKERRQ(ierr);
1356   PLogObjectParent(mat,aij->A);
1357   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B);
1358   CHKERRQ(ierr);
1359   PLogObjectParent(mat,aij->B);
1360 
1361   /* build cache for off array entries formed */
1362   ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr);
1363   aij->colmap    = 0;
1364   aij->garray    = 0;
1365 
1366   /* stuff used for matrix vector multiply */
1367   aij->lvec      = 0;
1368   aij->Mvctx     = 0;
1369   aij->assembled = 0;
1370 
1371   *newmat = mat;
1372   return 0;
1373 }
1374 
1375 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1376 {
1377   Mat        mat;
1378   Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data;
1379   int        ierr, len;
1380   *newmat      = 0;
1381 
1382   if (!oldmat->assembled)
1383     SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix");
1384   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1385   PLogObjectCreate(mat);
1386   mat->data       = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij);
1387   PETSCMEMCPY(&mat->ops,&MatOps,sizeof(struct _MatOps));
1388   mat->destroy    = MatDestroy_MPIAIJ;
1389   mat->view       = MatView_MPIAIJ;
1390   mat->factor     = matin->factor;
1391 
1392   aij->m          = oldmat->m;
1393   aij->n          = oldmat->n;
1394   aij->M          = oldmat->M;
1395   aij->N          = oldmat->N;
1396 
1397   aij->assembled  = 1;
1398   aij->rstart     = oldmat->rstart;
1399   aij->rend       = oldmat->rend;
1400   aij->cstart     = oldmat->cstart;
1401   aij->cend       = oldmat->cend;
1402   aij->numtids    = oldmat->numtids;
1403   aij->mytid      = oldmat->mytid;
1404   aij->insertmode = NOTSETVALUES;
1405 
1406   aij->rowners        = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) );
1407   CHKPTRQ(aij->rowners);
1408   PLogObjectMemory(mat,(aij->numtids+1)*sizeof(int)+sizeof(struct _Mat)+
1409                        sizeof(Mat_MPIAIJ));
1410   PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int));
1411   ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr);
1412   if (oldmat->colmap) {
1413     aij->colmap      = (int *) PETSCMALLOC( (aij->N)*sizeof(int) );
1414     CHKPTRQ(aij->colmap);
1415     PLogObjectMemory(mat,(aij->N)*sizeof(int));
1416     PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int));
1417   } else aij->colmap = 0;
1418   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1419     aij->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray);
1420     PLogObjectMemory(mat,len*sizeof(int));
1421     PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int));
1422   } else aij->garray = 0;
1423 
1424   ierr =  VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr);
1425   PLogObjectParent(mat,aij->lvec);
1426   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr);
1427   PLogObjectParent(mat,aij->Mvctx);
1428   ierr =  MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr);
1429   PLogObjectParent(mat,aij->A);
1430   ierr =  MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr);
1431   PLogObjectParent(mat,aij->B);
1432   *newmat = mat;
1433   return 0;
1434 }
1435