xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bc5ccf886fc7b029025ea4de8c4047e1db2135de)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.285 1999/02/26 17:08:13 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i;
28 #if defined (USE_CTABLE)
29   int        ierr;
30 #endif
31 
32   PetscFunctionBegin;
33 #if defined (USE_CTABLE)
34   ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr);
35   for ( i=0; i<n; i++ ){
36     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
37   }
38 #else
39   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
40   PLogObjectMemory(mat,aij->N*sizeof(int));
41   PetscMemzero(aij->colmap,aij->N*sizeof(int));
42   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
43 #endif
44   PetscFunctionReturn(0);
45 }
46 
47 #define CHUNKSIZE   15
48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
49 { \
50  \
51     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
52     rmax = aimax[row]; nrow = ailen[row];  \
53     col1 = col - shift; \
54      \
55     low = 0; high = nrow; \
56     while (high-low > 5) { \
57       t = (low+high)/2; \
58       if (rp[t] > col) high = t; \
59       else             low  = t; \
60     } \
61       for ( _i=0; _i<nrow; _i++ ) { \
62         if (rp[_i] > col1) break; \
63         if (rp[_i] == col1) { \
64           if (addv == ADD_VALUES) ap[_i] += value;   \
65           else                  ap[_i] = value; \
66           goto a_noinsert; \
67         } \
68       }  \
69       if (nonew == 1) goto a_noinsert; \
70       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
71       if (nrow >= rmax) { \
72         /* there is no extra room in row, therefore enlarge */ \
73         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
74         Scalar *new_a; \
75  \
76         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
77  \
78         /* malloc new storage space */ \
79         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
80         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
81         new_j   = (int *) (new_a + new_nz); \
82         new_i   = new_j + new_nz; \
83  \
84         /* copy over old data into new slots */ \
85         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
86         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
87         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
88         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
89         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
90                                                            len*sizeof(int)); \
91         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
92         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
93                                                            len*sizeof(Scalar));  \
94         /* free up old matrix storage */ \
95  \
96         PetscFree(a->a);  \
97         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int)); \
162         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
163         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));  \
165         /* free up old matrix storage */ \
166  \
167         PetscFree(b->a);  \
168         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
169         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
170         b->singlemalloc = 1; \
171  \
172         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
173         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
174         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
175         b->maxnz += CHUNKSIZE; \
176         b->reallocs++; \
177       } \
178       N = nrow++ - 1; b->nz++; \
179       /* shift up all the later entries in this row */ \
180       for ( ii=N; ii>=_i; ii-- ) { \
181         rp[ii+1] = rp[ii]; \
182         ap[ii+1] = ap[ii]; \
183       } \
184       rp[_i] = col1;  \
185       ap[_i] = value;  \
186       b_noinsert: ; \
187       bilen[row] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIAIJ"
192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   Scalar     value;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198   int        roworiented = aij->roworiented;
199 
200   /* Some Variables required in the macro */
201   Mat        A = aij->A;
202   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
203   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
204   Scalar     *aa = a->a;
205 
206   Mat        B = aij->B;
207   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
208   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
209   Scalar     *ba = b->a;
210 
211   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
212   int        nonew = a->nonew,shift = a->indexshift;
213   Scalar     *ap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217     if (im[i] < 0) continue;
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart && im[i] < rend) {
222       row = im[i] - rstart;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart && in[j] < cend){
225           col = in[j] - cstart;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230         else if (in[j] < 0) continue;
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!aij->colmap) {
237               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (USE_CTABLE)
240             ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr);
241 	    col--;
242 #else
243             col = aij->colmap[in[j]] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249               B = aij->B;
250               b = (Mat_SeqAIJ *) B->data;
251               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252               ba = b->a;
253             }
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
257           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
258         }
259       }
260     } else {
261       if (roworiented && !aij->donotstash) {
262         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
263       } else {
264         if (!aij->donotstash) {
265           row = im[i];
266           for ( j=0; j<n; j++ ) {
267             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
268           }
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (USE_CTABLE)
301           ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 #if defined (__JUNK__)
319 
320 #undef __FUNC__
321 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
322 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
323 {
324   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
325   MPI_Comm    comm = mat->comm;
326   int         size = aij->size, *owners = aij->rowners;
327   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
328   MPI_Request *send_waits,*recv_waits;
329   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
330   InsertMode  addv;
331   Scalar      *rvalues,*svalues;
332 
333   PetscFunctionBegin;
334   if (aij->donotstash) {
335     aij->svalues    = 0; aij->rvalues    = 0;
336     aij->nsends     = 0; aij->nrecvs     = 0;
337     aij->send_waits = 0; aij->recv_waits = 0;
338     aij->rmax       = 0;
339     PetscFunctionReturn(0);
340   }
341 
342   /* make sure all processors are either in INSERTMODE or ADDMODE */
343   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
344   if (addv == (ADD_VALUES|INSERT_VALUES)) {
345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
346   }
347   mat->insertmode = addv; /* in case this processor had no cache */
348 
349   /*  first count number of contributors to each processor */
350   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
351   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
352   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
353   for ( i=0; i<aij->stash.n; i++ ) {
354     idx = aij->stash.idx[i];
355     for ( j=0; j<size; j++ ) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
358       }
359     }
360   }
361   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
362 
363   /* inform other processors of number of messages and max length*/
364   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
365   ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
366   nreceives = work[rank];
367   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
368   nmax = work[rank];
369   PetscFree(work);
370 
371   /* post receives:
372        1) each message will consist of ordered pairs
373      (global index,value) we store the global index as a double
374      to simplify the message passing.
375        2) since we don't know how long each individual message is we
376      allocate the largest needed buffer for each receive. Potentially
377      this is a lot of wasted space.
378 
379 
380        This could be done better.
381   */
382   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
383   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
384   for ( i=0; i<nreceives; i++ ) {
385     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
386               comm,recv_waits+i);CHKERRQ(ierr);
387   }
388 
389   /* do sends:
390       1) starts[i] gives the starting index in svalues for stuff going to
391          the ith processor
392   */
393   svalues    = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
394   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
395   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
396   starts[0]  = 0;
397   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
398   for ( i=0; i<aij->stash.n; i++ ) {
399     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
400     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
401     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
402   }
403   PetscFree(owner);
404   starts[0] = 0;
405   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
406   count = 0;
407   for ( i=0; i<size; i++ ) {
408     if (procs[i]) {
409       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
410     }
411   }
412   PetscFree(starts);
413   PetscFree(nprocs);
414 
415   /* Free cache space */
416   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
417   ierr = StashReset_Private(&aij->stash); CHKERRQ(ierr);
418 
419   aij->svalues    = svalues;    aij->rvalues    = rvalues;
420   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
421   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
422   aij->rmax       = nmax;
423 
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNC__
428 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
429 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
430 {
431   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
432   MPI_Status  *send_status,recv_status;
433   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
434   int         row,col,other_disassembled;
435   Scalar      *values,val;
436   InsertMode  addv = mat->insertmode;
437 
438   PetscFunctionBegin;
439 
440   /*  wait on receives */
441   while (count) {
442     ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
443     /* unpack receives into our local space */
444     values = aij->rvalues + 3*imdex*aij->rmax;
445     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
446     n = n/3;
447     for ( i=0; i<n; i++ ) {
448       row = (int) PetscReal(values[3*i]) - aij->rstart;
449       col = (int) PetscReal(values[3*i+1]);
450       val = values[3*i+2];
451       if (col >= aij->cstart && col < aij->cend) {
452         col -= aij->cstart;
453         ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
454       } else {
455         if (mat->was_assembled || mat->assembled) {
456           if (!aij->colmap) {
457             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
458           }
459 #if defined (USE_CTABLE)
460             ierr = TableFind(aij->colmap,col+1,&col); CHKERRQ(ierr);
461 	    col --;
462 #else
463           col = aij->colmap[col] - 1;
464 #endif
465           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
466             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
467             col = (int) PetscReal(values[3*i+1]);
468           }
469         }
470         ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
471       }
472     }
473     count--;
474   }
475   if (aij->recv_waits) PetscFree(aij->recv_waits);
476   if (aij->rvalues)    PetscFree(aij->rvalues);
477 
478   /* wait on sends */
479   if (aij->nsends) {
480     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
481     ierr        = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr);
482     PetscFree(send_status);
483   }
484   if (aij->send_waits) PetscFree(aij->send_waits);
485   if (aij->svalues)    PetscFree(aij->svalues);
486 
487   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
488   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
489 
490   /* determine if any processor has disassembled, if so we must
491      also disassemble ourselfs, in order that we may reassemble. */
492   /*
493      if nonzero structure of submatrix B cannot change then we know that
494      no processor disassembled thus we can skip this stuff
495   */
496   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
497     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
498     if (mat->was_assembled && !other_disassembled) {
499       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
500     }
501   }
502 
503   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
504     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
505   }
506   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
507   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
508 
509   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
510   PetscFunctionReturn(0);
511 }
512 
513 #else
514 
515 #undef __FUNC__
516 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
517 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
518 {
519   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
520   int         ierr;
521   InsertMode  addv;
522 
523   PetscFunctionBegin;
524   if (aij->donotstash) {
525     PetscFunctionReturn(0);
526   }
527 
528   /* make sure all processors are either in INSERTMODE or ADDMODE */
529   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
530   if (addv == (ADD_VALUES|INSERT_VALUES)) {
531     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
532   }
533   mat->insertmode = addv; /* in case this processor had no cache */
534 
535   ierr =  StashScatterBegin_Private(&aij->stash,aij->rowners); CHKERRQ(ierr);
536   /* Free cache space */
537   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
538   PetscFunctionReturn(0);
539 }
540 
541 
542 #undef __FUNC__
543 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
544 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
545 {
546   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
547   int         imdex,i,j,rstart,ncols,n,ierr;
548   int         *row,*col,other_disassembled;
549   Scalar      *val;
550   InsertMode  addv = mat->insertmode;
551 
552   PetscFunctionBegin;
553   if (!aij->donotstash) {
554     ierr = StashScatterEnd_Private(&aij->stash); CHKERRQ(ierr);
555     for (imdex=0; imdex<aij->stash.nrecvs; imdex++ ) {
556       n   = aij->stash.rdata[imdex].n;
557       row = aij->stash.rdata[imdex].i;
558       col = aij->stash.rdata[imdex].j;
559       val = aij->stash.rdata[imdex].a;
560       for ( i=0; i<n; ) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         /* Now assemble all these values with a single function call */
566         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = StashReset_Private(&aij->stash); CHKERRQ(ierr);
571   }
572 
573   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
591   }
592   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
594 
595   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
596   PetscFunctionReturn(0);
597 }
598 
599 #endif
600 
601 #undef __FUNC__
602 #define __FUNC__ "MatZeroEntries_MPIAIJ"
603 int MatZeroEntries_MPIAIJ(Mat A)
604 {
605   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
606   int        ierr;
607 
608   PetscFunctionBegin;
609   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
610   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatZeroRows_MPIAIJ"
616 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
617 {
618   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
619   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
620   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
621   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
622   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
623   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
624   MPI_Comm       comm = A->comm;
625   MPI_Request    *send_waits,*recv_waits;
626   MPI_Status     recv_status,*send_status;
627   IS             istmp;
628 
629   PetscFunctionBegin;
630   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
631   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
632 
633   /*  first count number of contributors to each processor */
634   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
635   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
636   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
637   for ( i=0; i<N; i++ ) {
638     idx = rows[i];
639     found = 0;
640     for ( j=0; j<size; j++ ) {
641       if (idx >= owners[j] && idx < owners[j+1]) {
642         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
643       }
644     }
645     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
646   }
647   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
648 
649   /* inform other processors of number of messages and max length*/
650   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
651   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
652   nrecvs = work[rank];
653   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
654   nmax = work[rank];
655   PetscFree(work);
656 
657   /* post receives:   */
658   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
659   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
660   for ( i=0; i<nrecvs; i++ ) {
661     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
662   }
663 
664   /* do sends:
665       1) starts[i] gives the starting index in svalues for stuff going to
666          the ith processor
667   */
668   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
669   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
670   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
671   starts[0] = 0;
672   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
673   for ( i=0; i<N; i++ ) {
674     svalues[starts[owner[i]]++] = rows[i];
675   }
676   ISRestoreIndices(is,&rows);
677 
678   starts[0] = 0;
679   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
680   count = 0;
681   for ( i=0; i<size; i++ ) {
682     if (procs[i]) {
683       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
684     }
685   }
686   PetscFree(starts);
687 
688   base = owners[rank];
689 
690   /*  wait on receives */
691   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
692   source = lens + nrecvs;
693   count  = nrecvs; slen = 0;
694   while (count) {
695     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
696     /* unpack receives into our local space */
697     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
698     source[imdex]  = recv_status.MPI_SOURCE;
699     lens[imdex]  = n;
700     slen += n;
701     count--;
702   }
703   PetscFree(recv_waits);
704 
705   /* move the data into the send scatter */
706   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
707   count = 0;
708   for ( i=0; i<nrecvs; i++ ) {
709     values = rvalues + i*nmax;
710     for ( j=0; j<lens[i]; j++ ) {
711       lrows[count++] = values[j] - base;
712     }
713   }
714   PetscFree(rvalues); PetscFree(lens);
715   PetscFree(owner); PetscFree(nprocs);
716 
717   /* actually zap the local rows */
718   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
719   PLogObjectParent(A,istmp);
720 
721   /*
722         Zero the required rows. If the "diagonal block" of the matrix
723      is square and the user wishes to set the diagonal we use seperate
724      code so that MatSetValues() is not called for each diagonal allocating
725      new memory, thus calling lots of mallocs and slowing things down.
726 
727        Contributed by: Mathew Knepley
728   */
729   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
730   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
731   if (diag && (l->A->M == l->A->N)) {
732     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
733   } else if (diag) {
734     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
735     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
736       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
737 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
738     }
739     for ( i = 0; i < slen; i++ ) {
740       row  = lrows[i] + rstart;
741       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
742     }
743     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745   } else {
746     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
747   }
748   ierr = ISDestroy(istmp); CHKERRQ(ierr);
749   PetscFree(lrows);
750 
751   /* wait on sends */
752   if (nsends) {
753     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
754     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
755     PetscFree(send_status);
756   }
757   PetscFree(send_waits); PetscFree(svalues);
758 
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNC__
763 #define __FUNC__ "MatMult_MPIAIJ"
764 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
765 {
766   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
767   int        ierr,nt;
768 
769   PetscFunctionBegin;
770   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
771   if (nt != a->n) {
772     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
773   }
774   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
775   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
776   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
777   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNC__
782 #define __FUNC__ "MatMultAdd_MPIAIJ"
783 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
784 {
785   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
786   int        ierr;
787 
788   PetscFunctionBegin;
789   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
790   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
791   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
792   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ "MatMultTrans_MPIAIJ"
798 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
799 {
800   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
801   int        ierr;
802 
803   PetscFunctionBegin;
804   /* do nondiagonal part */
805   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
806   /* send it on its way */
807   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
808   /* do local part */
809   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
810   /* receive remote parts: note this assumes the values are not actually */
811   /* inserted in yy until the next line, which is true for my implementation*/
812   /* but is not perhaps always true. */
813   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNC__
818 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
819 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
820 {
821   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
822   int        ierr;
823 
824   PetscFunctionBegin;
825   /* do nondiagonal part */
826   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
827   /* send it on its way */
828   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
829   /* do local part */
830   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
831   /* receive remote parts: note this assumes the values are not actually */
832   /* inserted in yy until the next line, which is true for my implementation*/
833   /* but is not perhaps always true. */
834   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 /*
839   This only works correctly for square matrices where the subblock A->A is the
840    diagonal block
841 */
842 #undef __FUNC__
843 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
844 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
845 {
846   int        ierr;
847   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
848 
849   PetscFunctionBegin;
850   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
851   if (a->rstart != a->cstart || a->rend != a->cend) {
852     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
853   }
854   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNC__
859 #define __FUNC__ "MatScale_MPIAIJ"
860 int MatScale_MPIAIJ(Scalar *aa,Mat A)
861 {
862   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
863   int        ierr;
864 
865   PetscFunctionBegin;
866   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
867   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNC__
872 #define __FUNC__ "MatDestroy_MPIAIJ"
873 int MatDestroy_MPIAIJ(Mat mat)
874 {
875   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
876   int        ierr;
877 
878   PetscFunctionBegin;
879   if (--mat->refct > 0) PetscFunctionReturn(0);
880 
881   if (mat->mapping) {
882     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
883   }
884   if (mat->bmapping) {
885     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
886   }
887   if (mat->rmap) {
888     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
889   }
890   if (mat->cmap) {
891     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
892   }
893 #if defined(USE_PETSC_LOG)
894   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
895 #endif
896   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
897   PetscFree(aij->rowners);
898   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
899   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
900 #if defined (USE_CTABLE)
901   if (aij->colmap) TableDelete(aij->colmap);
902 #else
903   if (aij->colmap) PetscFree(aij->colmap);
904 #endif
905   if (aij->garray) PetscFree(aij->garray);
906   if (aij->lvec)   VecDestroy(aij->lvec);
907   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
908   if (aij->rowvalues) PetscFree(aij->rowvalues);
909   PetscFree(aij);
910   PLogObjectDestroy(mat);
911   PetscHeaderDestroy(mat);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNC__
916 #define __FUNC__ "MatView_MPIAIJ_Binary"
917 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
918 {
919   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
920   int         ierr;
921 
922   PetscFunctionBegin;
923   if (aij->size == 1) {
924     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
925   }
926   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNC__
931 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
932 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
933 {
934   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
935   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
936   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
937   int         size = aij->size;
938   FILE        *fd;
939   ViewerType  vtype;
940 
941   PetscFunctionBegin;
942   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
943   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
944     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
945     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
946       MatInfo info;
947       int     flg;
948       MPI_Comm_rank(mat->comm,&rank);
949       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
950       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
951       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
952       PetscSequentialPhaseBegin(mat->comm,1);
953       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
954          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
955       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
956          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
957       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
958       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
959       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
960       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
961       fflush(fd);
962       PetscSequentialPhaseEnd(mat->comm,1);
963       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
964       PetscFunctionReturn(0);
965     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
966       PetscFunctionReturn(0);
967     }
968   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
969     Draw       draw;
970     PetscTruth isnull;
971     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
972     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
973   }
974 
975   if (size == 1) {
976     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
977   } else {
978     /* assemble the entire matrix onto first processor. */
979     Mat         A;
980     Mat_SeqAIJ *Aloc;
981     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
982     Scalar      *a;
983 
984     if (!rank) {
985       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
986     } else {
987       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
988     }
989     PLogObjectParent(mat,A);
990 
991     /* copy over the A part */
992     Aloc = (Mat_SeqAIJ*) aij->A->data;
993     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
994     row = aij->rstart;
995     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
996     for ( i=0; i<m; i++ ) {
997       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
998       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
999     }
1000     aj = Aloc->j;
1001     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
1002 
1003     /* copy over the B part */
1004     Aloc = (Mat_SeqAIJ*) aij->B->data;
1005     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1006     row = aij->rstart;
1007     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
1008     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
1009     for ( i=0; i<m; i++ ) {
1010       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1011       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1012     }
1013     PetscFree(ct);
1014     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1015     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1016     /*
1017        Everyone has to call to draw the matrix since the graphics waits are
1018        synchronized across all processors that share the Draw object
1019     */
1020     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1021       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1022     }
1023     ierr = MatDestroy(A); CHKERRQ(ierr);
1024   }
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNC__
1029 #define __FUNC__ "MatView_MPIAIJ"
1030 int MatView_MPIAIJ(Mat mat,Viewer viewer)
1031 {
1032   int         ierr;
1033   ViewerType  vtype;
1034 
1035   PetscFunctionBegin;
1036   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1037   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1038       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1039     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
1040   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1041     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1042   } else {
1043     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1044   }
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 /*
1049     This has to provide several versions.
1050 
1051      2) a) use only local smoothing updating outer values only once.
1052         b) local smoothing updating outer values each inner iteration
1053      3) color updating out values betwen colors.
1054 */
1055 #undef __FUNC__
1056 #define __FUNC__ "MatRelax_MPIAIJ"
1057 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
1058                            double fshift,int its,Vec xx)
1059 {
1060   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1061   Mat        AA = mat->A, BB = mat->B;
1062   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
1063   Scalar     *b,*x,*xs,*ls,d,*v,sum;
1064   int        ierr,*idx, *diag;
1065   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
1066 
1067   PetscFunctionBegin;
1068   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
1069   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
1070   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
1071   xs = x + shift; /* shift by one for index start of 1 */
1072   ls = ls + shift;
1073   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
1074   diag = A->diag;
1075   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1076     if (flag & SOR_ZERO_INITIAL_GUESS) {
1077       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1078       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1079       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1080       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1081       PetscFunctionReturn(0);
1082     }
1083     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1084     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1085     while (its--) {
1086       /* go down through the rows */
1087       for ( i=0; i<m; i++ ) {
1088         n    = A->i[i+1] - A->i[i];
1089 	PLogFlops(4*n+3);
1090         idx  = A->j + A->i[i] + shift;
1091         v    = A->a + A->i[i] + shift;
1092         sum  = b[i];
1093         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1094         d    = fshift + A->a[diag[i]+shift];
1095         n    = B->i[i+1] - B->i[i];
1096         idx  = B->j + B->i[i] + shift;
1097         v    = B->a + B->i[i] + shift;
1098         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1099         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1100       }
1101       /* come up through the rows */
1102       for ( i=m-1; i>-1; i-- ) {
1103         n    = A->i[i+1] - A->i[i];
1104 	PLogFlops(4*n+3)
1105         idx  = A->j + A->i[i] + shift;
1106         v    = A->a + A->i[i] + shift;
1107         sum  = b[i];
1108         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1109         d    = fshift + A->a[diag[i]+shift];
1110         n    = B->i[i+1] - B->i[i];
1111         idx  = B->j + B->i[i] + shift;
1112         v    = B->a + B->i[i] + shift;
1113         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1114         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1115       }
1116     }
1117   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1118     if (flag & SOR_ZERO_INITIAL_GUESS) {
1119       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1120       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1121       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1122       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1123       PetscFunctionReturn(0);
1124     }
1125     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1126     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1127     while (its--) {
1128       for ( i=0; i<m; i++ ) {
1129         n    = A->i[i+1] - A->i[i];
1130 	PLogFlops(4*n+3);
1131         idx  = A->j + A->i[i] + shift;
1132         v    = A->a + A->i[i] + shift;
1133         sum  = b[i];
1134         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1135         d    = fshift + A->a[diag[i]+shift];
1136         n    = B->i[i+1] - B->i[i];
1137         idx  = B->j + B->i[i] + shift;
1138         v    = B->a + B->i[i] + shift;
1139         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1140         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1141       }
1142     }
1143   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1144     if (flag & SOR_ZERO_INITIAL_GUESS) {
1145       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
1146       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1147       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1148       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1149       PetscFunctionReturn(0);
1150     }
1151     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1152     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1153     while (its--) {
1154       for ( i=m-1; i>-1; i-- ) {
1155         n    = A->i[i+1] - A->i[i];
1156 	PLogFlops(4*n+3);
1157         idx  = A->j + A->i[i] + shift;
1158         v    = A->a + A->i[i] + shift;
1159         sum  = b[i];
1160         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1161         d    = fshift + A->a[diag[i]+shift];
1162         n    = B->i[i+1] - B->i[i];
1163         idx  = B->j + B->i[i] + shift;
1164         v    = B->a + B->i[i] + shift;
1165         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1166         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
1167       }
1168     }
1169   } else {
1170     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1171   }
1172   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
1173   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1174   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNC__
1179 #define __FUNC__ "MatGetInfo_MPIAIJ"
1180 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1181 {
1182   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1183   Mat        A = mat->A, B = mat->B;
1184   int        ierr;
1185   double     isend[5], irecv[5];
1186 
1187   PetscFunctionBegin;
1188   info->block_size     = 1.0;
1189   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1190   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1191   isend[3] = info->memory;  isend[4] = info->mallocs;
1192   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1193   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1194   isend[3] += info->memory;  isend[4] += info->mallocs;
1195   if (flag == MAT_LOCAL) {
1196     info->nz_used      = isend[0];
1197     info->nz_allocated = isend[1];
1198     info->nz_unneeded  = isend[2];
1199     info->memory       = isend[3];
1200     info->mallocs      = isend[4];
1201   } else if (flag == MAT_GLOBAL_MAX) {
1202     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1203     info->nz_used      = irecv[0];
1204     info->nz_allocated = irecv[1];
1205     info->nz_unneeded  = irecv[2];
1206     info->memory       = irecv[3];
1207     info->mallocs      = irecv[4];
1208   } else if (flag == MAT_GLOBAL_SUM) {
1209     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1210     info->nz_used      = irecv[0];
1211     info->nz_allocated = irecv[1];
1212     info->nz_unneeded  = irecv[2];
1213     info->memory       = irecv[3];
1214     info->mallocs      = irecv[4];
1215   }
1216   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1217   info->fill_ratio_needed = 0;
1218   info->factor_mallocs    = 0;
1219   info->rows_global       = (double)mat->M;
1220   info->columns_global    = (double)mat->N;
1221   info->rows_local        = (double)mat->m;
1222   info->columns_local     = (double)mat->N;
1223 
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNC__
1228 #define __FUNC__ "MatSetOption_MPIAIJ"
1229 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1230 {
1231   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1232   int        ierr;
1233 
1234   PetscFunctionBegin;
1235   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1236       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1237       op == MAT_COLUMNS_UNSORTED ||
1238       op == MAT_COLUMNS_SORTED ||
1239       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1240       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1241         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1243   } else if (op == MAT_ROW_ORIENTED) {
1244     a->roworiented = 1;
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1247   } else if (op == MAT_ROWS_SORTED ||
1248              op == MAT_ROWS_UNSORTED ||
1249              op == MAT_SYMMETRIC ||
1250              op == MAT_STRUCTURALLY_SYMMETRIC ||
1251              op == MAT_YES_NEW_DIAGONALS)
1252     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1253   else if (op == MAT_COLUMN_ORIENTED) {
1254     a->roworiented = 0;
1255     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1256     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1257   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1258     a->donotstash = 1;
1259   } else if (op == MAT_NO_NEW_DIAGONALS){
1260     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1261   } else {
1262     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNC__
1268 #define __FUNC__ "MatGetSize_MPIAIJ"
1269 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1270 {
1271   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1272 
1273   PetscFunctionBegin;
1274   if (m) *m = mat->M;
1275   if (n) *n = mat->N;
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNC__
1280 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1281 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1282 {
1283   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1284 
1285   PetscFunctionBegin;
1286   if (m) *m = mat->m;
1287   if (n) *n = mat->n;
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNC__
1292 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1293 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1294 {
1295   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1296 
1297   PetscFunctionBegin;
1298   *m = mat->rstart; *n = mat->rend;
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNC__
1303 #define __FUNC__ "MatGetRow_MPIAIJ"
1304 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1305 {
1306   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1307   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1308   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1309   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1310   int        *cmap, *idx_p;
1311 
1312   PetscFunctionBegin;
1313   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1314   mat->getrowactive = PETSC_TRUE;
1315 
1316   if (!mat->rowvalues && (idx || v)) {
1317     /*
1318         allocate enough space to hold information from the longest row.
1319     */
1320     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1321     int     max = 1,m = mat->m,tmp;
1322     for ( i=0; i<m; i++ ) {
1323       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1324       if (max < tmp) { max = tmp; }
1325     }
1326     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1327     mat->rowindices = (int *) (mat->rowvalues + max);
1328   }
1329 
1330   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1331   lrow = row - rstart;
1332 
1333   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1334   if (!v)   {pvA = 0; pvB = 0;}
1335   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1336   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1337   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1338   nztot = nzA + nzB;
1339 
1340   cmap  = mat->garray;
1341   if (v  || idx) {
1342     if (nztot) {
1343       /* Sort by increasing column numbers, assuming A and B already sorted */
1344       int imark = -1;
1345       if (v) {
1346         *v = v_p = mat->rowvalues;
1347         for ( i=0; i<nzB; i++ ) {
1348           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1349           else break;
1350         }
1351         imark = i;
1352         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1353         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1354       }
1355       if (idx) {
1356         *idx = idx_p = mat->rowindices;
1357         if (imark > -1) {
1358           for ( i=0; i<imark; i++ ) {
1359             idx_p[i] = cmap[cworkB[i]];
1360           }
1361         } else {
1362           for ( i=0; i<nzB; i++ ) {
1363             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1364             else break;
1365           }
1366           imark = i;
1367         }
1368         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1369         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1370       }
1371     } else {
1372       if (idx) *idx = 0;
1373       if (v)   *v   = 0;
1374     }
1375   }
1376   *nz = nztot;
1377   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1378   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNC__
1383 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1384 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1385 {
1386   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1387 
1388   PetscFunctionBegin;
1389   if (aij->getrowactive == PETSC_FALSE) {
1390     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1391   }
1392   aij->getrowactive = PETSC_FALSE;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNC__
1397 #define __FUNC__ "MatNorm_MPIAIJ"
1398 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1399 {
1400   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1401   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1402   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1403   double     sum = 0.0;
1404   Scalar     *v;
1405 
1406   PetscFunctionBegin;
1407   if (aij->size == 1) {
1408     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1409   } else {
1410     if (type == NORM_FROBENIUS) {
1411       v = amat->a;
1412       for (i=0; i<amat->nz; i++ ) {
1413 #if defined(USE_PETSC_COMPLEX)
1414         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1415 #else
1416         sum += (*v)*(*v); v++;
1417 #endif
1418       }
1419       v = bmat->a;
1420       for (i=0; i<bmat->nz; i++ ) {
1421 #if defined(USE_PETSC_COMPLEX)
1422         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1423 #else
1424         sum += (*v)*(*v); v++;
1425 #endif
1426       }
1427       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1428       *norm = sqrt(*norm);
1429     } else if (type == NORM_1) { /* max column norm */
1430       double *tmp, *tmp2;
1431       int    *jj, *garray = aij->garray;
1432       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1433       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1434       PetscMemzero(tmp,aij->N*sizeof(double));
1435       *norm = 0.0;
1436       v = amat->a; jj = amat->j;
1437       for ( j=0; j<amat->nz; j++ ) {
1438         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1439       }
1440       v = bmat->a; jj = bmat->j;
1441       for ( j=0; j<bmat->nz; j++ ) {
1442         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1443       }
1444       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1445       for ( j=0; j<aij->N; j++ ) {
1446         if (tmp2[j] > *norm) *norm = tmp2[j];
1447       }
1448       PetscFree(tmp); PetscFree(tmp2);
1449     } else if (type == NORM_INFINITY) { /* max row norm */
1450       double ntemp = 0.0;
1451       for ( j=0; j<amat->m; j++ ) {
1452         v = amat->a + amat->i[j] + shift;
1453         sum = 0.0;
1454         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1455           sum += PetscAbsScalar(*v); v++;
1456         }
1457         v = bmat->a + bmat->i[j] + shift;
1458         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1459           sum += PetscAbsScalar(*v); v++;
1460         }
1461         if (sum > ntemp) ntemp = sum;
1462       }
1463       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1464     } else {
1465       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1466     }
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNC__
1472 #define __FUNC__ "MatTranspose_MPIAIJ"
1473 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1474 {
1475   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1476   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1477   int        ierr,shift = Aloc->indexshift;
1478   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1479   Mat        B;
1480   Scalar     *array;
1481 
1482   PetscFunctionBegin;
1483   if (matout == PETSC_NULL && M != N) {
1484     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1485   }
1486 
1487   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1488 
1489   /* copy over the A part */
1490   Aloc = (Mat_SeqAIJ*) a->A->data;
1491   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1492   row = a->rstart;
1493   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1494   for ( i=0; i<m; i++ ) {
1495     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1496     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1497   }
1498   aj = Aloc->j;
1499   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1500 
1501   /* copy over the B part */
1502   Aloc = (Mat_SeqAIJ*) a->B->data;
1503   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1504   row = a->rstart;
1505   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1506   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1507   for ( i=0; i<m; i++ ) {
1508     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1509     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1510   }
1511   PetscFree(ct);
1512   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1513   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1514   if (matout != PETSC_NULL) {
1515     *matout = B;
1516   } else {
1517     PetscOps       *Abops;
1518     struct _MatOps *Aops;
1519 
1520     /* This isn't really an in-place transpose .... but free data structures from a */
1521     PetscFree(a->rowners);
1522     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1523     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1524 #if defined (USE_CTABLE)
1525     if (a->colmap) TableDelete(a->colmap);
1526 #else
1527     if (a->colmap) PetscFree(a->colmap);
1528 #endif
1529     if (a->garray) PetscFree(a->garray);
1530     if (a->lvec) VecDestroy(a->lvec);
1531     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1532     PetscFree(a);
1533 
1534     /*
1535        This is horrible, horrible code. We need to keep the
1536       A pointers for the bops and ops but copy everything
1537       else from C.
1538     */
1539     Abops = A->bops;
1540     Aops  = A->ops;
1541     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1542     A->bops = Abops;
1543     A->ops  = Aops;
1544     PetscHeaderDestroy(B);
1545   }
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNC__
1550 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1551 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1554   Mat a = aij->A, b = aij->B;
1555   int ierr,s1,s2,s3;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1559   if (rr) {
1560     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1561     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1562     /* Overlap communication with computation. */
1563     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1564   }
1565   if (ll) {
1566     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1567     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1568     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1569   }
1570   /* scale  the diagonal block */
1571   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1572 
1573   if (rr) {
1574     /* Do a scatter end and then right scale the off-diagonal block */
1575     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1576     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1577   }
1578 
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 
1583 #undef __FUNC__
1584 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1585 int MatPrintHelp_MPIAIJ(Mat A)
1586 {
1587   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1588   int        ierr;
1589 
1590   PetscFunctionBegin;
1591   if (!a->rank) {
1592     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1593   }
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNC__
1598 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1599 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1600 {
1601   PetscFunctionBegin;
1602   *bs = 1;
1603   PetscFunctionReturn(0);
1604 }
1605 #undef __FUNC__
1606 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1607 int MatSetUnfactored_MPIAIJ(Mat A)
1608 {
1609   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1610   int        ierr;
1611 
1612   PetscFunctionBegin;
1613   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNC__
1618 #define __FUNC__ "MatEqual_MPIAIJ"
1619 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1620 {
1621   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1622   Mat        a, b, c, d;
1623   PetscTruth flg;
1624   int        ierr;
1625 
1626   PetscFunctionBegin;
1627   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1628   a = matA->A; b = matA->B;
1629   c = matB->A; d = matB->B;
1630 
1631   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1632   if (flg == PETSC_TRUE) {
1633     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1634   }
1635   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #undef __FUNC__
1640 #define __FUNC__ "MatCopy_MPIAIJ"
1641 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1642 {
1643   int        ierr;
1644   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1645   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1646 
1647   PetscFunctionBegin;
1648   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1649     /* because of the column compression in the off-processor part of the matrix a->B,
1650        the number of columns in a->B and b->B may be different, hence we cannot call
1651        the MatCopy() directly on the two parts. If need be, we can provide a more
1652        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1653        then copying the submatrices */
1654     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1655   } else {
1656     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1657     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1658   }
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1663 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1664 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1665 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1666 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1667 
1668 /* -------------------------------------------------------------------*/
1669 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1670        MatGetRow_MPIAIJ,
1671        MatRestoreRow_MPIAIJ,
1672        MatMult_MPIAIJ,
1673        MatMultAdd_MPIAIJ,
1674        MatMultTrans_MPIAIJ,
1675        MatMultTransAdd_MPIAIJ,
1676        0,
1677        0,
1678        0,
1679        0,
1680        0,
1681        0,
1682        MatRelax_MPIAIJ,
1683        MatTranspose_MPIAIJ,
1684        MatGetInfo_MPIAIJ,
1685        MatEqual_MPIAIJ,
1686        MatGetDiagonal_MPIAIJ,
1687        MatDiagonalScale_MPIAIJ,
1688        MatNorm_MPIAIJ,
1689        MatAssemblyBegin_MPIAIJ,
1690        MatAssemblyEnd_MPIAIJ,
1691        0,
1692        MatSetOption_MPIAIJ,
1693        MatZeroEntries_MPIAIJ,
1694        MatZeroRows_MPIAIJ,
1695        0,
1696        0,
1697        0,
1698        0,
1699        MatGetSize_MPIAIJ,
1700        MatGetLocalSize_MPIAIJ,
1701        MatGetOwnershipRange_MPIAIJ,
1702        0,
1703        0,
1704        0,
1705        0,
1706        MatDuplicate_MPIAIJ,
1707        0,
1708        0,
1709        0,
1710        0,
1711        0,
1712        MatGetSubMatrices_MPIAIJ,
1713        MatIncreaseOverlap_MPIAIJ,
1714        MatGetValues_MPIAIJ,
1715        MatCopy_MPIAIJ,
1716        MatPrintHelp_MPIAIJ,
1717        MatScale_MPIAIJ,
1718        0,
1719        0,
1720        0,
1721        MatGetBlockSize_MPIAIJ,
1722        0,
1723        0,
1724        0,
1725        0,
1726        MatFDColoringCreate_MPIAIJ,
1727        0,
1728        MatSetUnfactored_MPIAIJ,
1729        0,
1730        0,
1731        MatGetSubMatrix_MPIAIJ,
1732        0,
1733        0,
1734        MatGetMaps_Petsc};
1735 
1736 /* ----------------------------------------------------------------------------------------*/
1737 
1738 EXTERN_C_BEGIN
1739 #undef __FUNC__
1740 #define __FUNC__ "MatStoreValues_MPIAIJ"
1741 int MatStoreValues_MPIAIJ(Mat mat)
1742 {
1743   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1744   int        ierr;
1745 
1746   PetscFunctionBegin;
1747   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1748   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 EXTERN_C_END
1752 
1753 EXTERN_C_BEGIN
1754 #undef __FUNC__
1755 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1756 int MatRetrieveValues_MPIAIJ(Mat mat)
1757 {
1758   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1759   int        ierr;
1760 
1761   PetscFunctionBegin;
1762   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1763   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 EXTERN_C_END
1767 
1768 #include "pc.h"
1769 EXTERN_C_BEGIN
1770 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1771 EXTERN_C_END
1772 
1773 #undef __FUNC__
1774 #define __FUNC__ "MatCreateMPIAIJ"
1775 /*@C
1776    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1777    (the default parallel PETSc format).  For good matrix assembly performance
1778    the user should preallocate the matrix storage by setting the parameters
1779    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1780    performance can be increased by more than a factor of 50.
1781 
1782    Collective on MPI_Comm
1783 
1784    Input Parameters:
1785 +  comm - MPI communicator
1786 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1787            This value should be the same as the local size used in creating the
1788            y vector for the matrix-vector product y = Ax.
1789 .  n - This value should be the same as the local size used in creating the
1790        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1791        calculated if N is given) For square matrices n is almost always m.
1792 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1793 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1794 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1795            (same value is used for all local rows)
1796 .  d_nnz - array containing the number of nonzeros in the various rows of the
1797            DIAGONAL portion of the local submatrix (possibly different for each row)
1798            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1799            The size of this array is equal to the number of local rows, i.e 'm'.
1800            You must leave room for the diagonal entry even if it is zero.
1801 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1802            submatrix (same value is used for all local rows).
1803 -  o_nnz - array containing the number of nonzeros in the various rows of the
1804            OFF-DIAGONAL portion of the local submatrix (possibly different for
1805            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1806            structure. The size of this array is equal to the number
1807            of local rows, i.e 'm'.
1808 
1809    Output Parameter:
1810 .  A - the matrix
1811 
1812    Notes:
1813    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1814    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1815    storage requirements for this matrix.
1816 
1817    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1818    processor than it must be used on all processors that share the object for
1819    that argument.
1820 
1821    The AIJ format (also called the Yale sparse matrix format or
1822    compressed row storage), is fully compatible with standard Fortran 77
1823    storage.  That is, the stored row and column indices can begin at
1824    either one (as in Fortran) or zero.  See the users manual for details.
1825 
1826    The user MUST specify either the local or global matrix dimensions
1827    (possibly both).
1828 
1829    The parallel matrix is partitioned such that the first m0 rows belong to
1830    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1831    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1832 
1833    The DIAGONAL portion of the local submatrix of a processor can be defined
1834    as the submatrix which is obtained by extraction the part corresponding
1835    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1836    first row that belongs to the processor, and r2 is the last row belonging
1837    to the this processor. This is a square mxm matrix. The remaining portion
1838    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1839 
1840    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1841 
1842    By default, this format uses inodes (identical nodes) when possible.
1843    We search for consecutive rows with the same nonzero structure, thereby
1844    reusing matrix information to achieve increased efficiency.
1845 
1846    Options Database Keys:
1847 +  -mat_aij_no_inode  - Do not use inodes
1848 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1849 -  -mat_aij_oneindex - Internally use indexing starting at 1
1850         rather than 0.  Note that when calling MatSetValues(),
1851         the user still MUST index entries starting at 0!
1852 .   -mat_mpi - use the parallel matrix data structures even on one processor
1853                (defaults to using SeqBAIJ format on one processor)
1854 .   -mat_mpi - use the parallel matrix data structures even on one processor
1855                (defaults to using SeqAIJ format on one processor)
1856 
1857 
1858    Example usage:
1859 
1860    Consider the following 8x8 matrix with 34 non-zero values, that is
1861    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1862    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1863    as follows:
1864 
1865 .vb
1866             1  2  0  |  0  3  0  |  0  4
1867     Proc0   0  5  6  |  7  0  0  |  8  0
1868             9  0 10  | 11  0  0  | 12  0
1869     -------------------------------------
1870            13  0 14  | 15 16 17  |  0  0
1871     Proc1   0 18  0  | 19 20 21  |  0  0
1872             0  0  0  | 22 23  0  | 24  0
1873     -------------------------------------
1874     Proc2  25 26 27  |  0  0 28  | 29  0
1875            30  0  0  | 31 32 33  |  0 34
1876 .ve
1877 
1878    This can be represented as a collection of submatrices as:
1879 
1880 .vb
1881       A B C
1882       D E F
1883       G H I
1884 .ve
1885 
1886    Where the submatrices A,B,C are owned by proc0, D,E,F are
1887    owned by proc1, G,H,I are owned by proc2.
1888 
1889    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1890    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1891    The 'M','N' parameters are 8,8, and have the same values on all procs.
1892 
1893    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1894    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1895    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1896    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1897    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1898    matrix, ans [DF] as another SeqAIJ matrix.
1899 
1900    When d_nz, o_nz parameters are specified, d_nz storage elements are
1901    allocated for every row of the local diagonal submatrix, and o_nz
1902    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1903    One way to choose d_nz and o_nz is to use the max nonzerors per local
1904    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1905    In this case, the values of d_nz,o_nz are:
1906 .vb
1907      proc0 : dnz = 2, o_nz = 2
1908      proc1 : dnz = 3, o_nz = 2
1909      proc2 : dnz = 1, o_nz = 4
1910 .ve
1911    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1912    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1913    for proc3. i.e we are using 12+15+10=37 storage locations to store
1914    34 values.
1915 
1916    When d_nnz, o_nnz parameters are specified, the storage is specified
1917    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1918    In the above case the values for d_nnz,o_nnz are:
1919 .vb
1920      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1921      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1922      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1923 .ve
1924    Here the space allocated is sum of all the above values i.e 34, and
1925    hence pre-allocation is perfect.
1926 
1927    Level: intermediate
1928 
1929 .keywords: matrix, aij, compressed row, sparse, parallel
1930 
1931 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1932 @*/
1933 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1934 {
1935   Mat          B;
1936   Mat_MPIAIJ   *b;
1937   int          ierr, i,size,flag1 = 0, flag2 = 0;
1938 
1939   PetscFunctionBegin;
1940   MPI_Comm_size(comm,&size);
1941   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1942   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1943   if (!flag1 && !flag2 && size == 1) {
1944     if (M == PETSC_DECIDE) M = m;
1945     if (N == PETSC_DECIDE) N = n;
1946     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1947     PetscFunctionReturn(0);
1948   }
1949 
1950   *A = 0;
1951   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1952   PLogObjectCreate(B);
1953   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1954   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1955   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1956   B->ops->destroy    = MatDestroy_MPIAIJ;
1957   B->ops->view       = MatView_MPIAIJ;
1958   B->factor          = 0;
1959   B->assembled       = PETSC_FALSE;
1960   B->mapping         = 0;
1961 
1962   B->insertmode      = NOT_SET_VALUES;
1963   b->size            = size;
1964   MPI_Comm_rank(comm,&b->rank);
1965 
1966   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1967     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1968   }
1969 
1970   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1971   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1972   b->m = m; B->m = m;
1973   b->n = n; B->n = n;
1974   b->N = N; B->N = N;
1975   b->M = M; B->M = M;
1976 
1977   /* the information in the maps duplicates the information computed below, eventually
1978      we should remove the duplicate information that is not contained in the maps */
1979   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1980   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1981 
1982   /* build local table of row and column ownerships */
1983   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1984   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1985   b->cowners = b->rowners + b->size + 2;
1986   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1987   b->rowners[0] = 0;
1988   for ( i=2; i<=b->size; i++ ) {
1989     b->rowners[i] += b->rowners[i-1];
1990   }
1991   b->rstart = b->rowners[b->rank];
1992   b->rend   = b->rowners[b->rank+1];
1993   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1994   b->cowners[0] = 0;
1995   for ( i=2; i<=b->size; i++ ) {
1996     b->cowners[i] += b->cowners[i-1];
1997   }
1998   b->cstart = b->cowners[b->rank];
1999   b->cend   = b->cowners[b->rank+1];
2000 
2001   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2002   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2003   PLogObjectParent(B,b->A);
2004   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2005   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2006   PLogObjectParent(B,b->B);
2007 
2008   /* build cache for off array entries formed */
2009   ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr);
2010   b->donotstash  = 0;
2011   b->colmap      = 0;
2012   b->garray      = 0;
2013   b->roworiented = 1;
2014 
2015   /* stuff used for matrix vector multiply */
2016   b->lvec      = 0;
2017   b->Mvctx     = 0;
2018 
2019   /* stuff for MatGetRow() */
2020   b->rowindices   = 0;
2021   b->rowvalues    = 0;
2022   b->getrowactive = PETSC_FALSE;
2023 
2024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2025                                      "MatStoreValues_MPIAIJ",
2026                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
2027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2028                                      "MatRetrieveValues_MPIAIJ",
2029                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
2030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2031                                      "MatGetDiagonalBlock_MPIAIJ",
2032                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
2033   *A = B;
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNC__
2038 #define __FUNC__ "MatDuplicate_MPIAIJ"
2039 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2040 {
2041   Mat        mat;
2042   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
2043   int        ierr, len=0, flg;
2044 
2045   PetscFunctionBegin;
2046   *newmat       = 0;
2047   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
2048   PLogObjectCreate(mat);
2049   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
2050   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2051   mat->ops->destroy = MatDestroy_MPIAIJ;
2052   mat->ops->view    = MatView_MPIAIJ;
2053   mat->factor       = matin->factor;
2054   mat->assembled    = PETSC_TRUE;
2055   mat->insertmode   = NOT_SET_VALUES;
2056 
2057   a->m = mat->m   = oldmat->m;
2058   a->n = mat->n   = oldmat->n;
2059   a->M = mat->M   = oldmat->M;
2060   a->N = mat->N   = oldmat->N;
2061 
2062   a->rstart       = oldmat->rstart;
2063   a->rend         = oldmat->rend;
2064   a->cstart       = oldmat->cstart;
2065   a->cend         = oldmat->cend;
2066   a->size         = oldmat->size;
2067   a->rank         = oldmat->rank;
2068   a->donotstash   = oldmat->donotstash;
2069   a->roworiented  = oldmat->roworiented;
2070   a->rowindices   = 0;
2071   a->rowvalues    = 0;
2072   a->getrowactive = PETSC_FALSE;
2073 
2074   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2075   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
2076   a->cowners = a->rowners + a->size + 2;
2077   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2078   ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr);
2079   if (oldmat->colmap) {
2080 #if defined (USE_CTABLE)
2081     ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
2082 #else
2083     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
2084     PLogObjectMemory(mat,(a->N)*sizeof(int));
2085     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
2086 #endif
2087   } else a->colmap = 0;
2088   if (oldmat->garray) {
2089     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
2090     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
2091     PLogObjectMemory(mat,len*sizeof(int));
2092     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2093   } else a->garray = 0;
2094 
2095   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2096   PLogObjectParent(mat,a->lvec);
2097   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2098   PLogObjectParent(mat,a->Mvctx);
2099   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2100   PLogObjectParent(mat,a->A);
2101   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2102   PLogObjectParent(mat,a->B);
2103   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2104   if (flg) {
2105     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2106   }
2107   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2108   *newmat = mat;
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #include "sys.h"
2113 
2114 #undef __FUNC__
2115 #define __FUNC__ "MatLoad_MPIAIJ"
2116 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
2117 {
2118   Mat          A;
2119   Scalar       *vals,*svals;
2120   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2121   MPI_Status   status;
2122   int          i, nz, ierr, j,rstart, rend, fd;
2123   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
2124   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
2125   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
2126 
2127   PetscFunctionBegin;
2128   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2129   if (!rank) {
2130     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2131     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2132     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2133     if (header[3] < 0) {
2134       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
2135     }
2136   }
2137 
2138   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2139   M = header[1]; N = header[2];
2140   /* determine ownership of all rows */
2141   m = M/size + ((M % size) > rank);
2142   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
2143   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2144   rowners[0] = 0;
2145   for ( i=2; i<=size; i++ ) {
2146     rowners[i] += rowners[i-1];
2147   }
2148   rstart = rowners[rank];
2149   rend   = rowners[rank+1];
2150 
2151   /* distribute row lengths to all processors */
2152   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
2153   offlens = ourlens + (rend-rstart);
2154   if (!rank) {
2155     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
2156     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2157     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2158     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2159     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2160     PetscFree(sndcounts);
2161   } else {
2162     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2163   }
2164 
2165   if (!rank) {
2166     /* calculate the number of nonzeros on each processor */
2167     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2168     PetscMemzero(procsnz,size*sizeof(int));
2169     for ( i=0; i<size; i++ ) {
2170       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2171         procsnz[i] += rowlengths[j];
2172       }
2173     }
2174     PetscFree(rowlengths);
2175 
2176     /* determine max buffer needed and allocate it */
2177     maxnz = 0;
2178     for ( i=0; i<size; i++ ) {
2179       maxnz = PetscMax(maxnz,procsnz[i]);
2180     }
2181     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2182 
2183     /* read in my part of the matrix column indices  */
2184     nz     = procsnz[0];
2185     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2186     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2187 
2188     /* read in every one elses and ship off */
2189     for ( i=1; i<size; i++ ) {
2190       nz   = procsnz[i];
2191       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2192       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2193     }
2194     PetscFree(cols);
2195   } else {
2196     /* determine buffer space needed for message */
2197     nz = 0;
2198     for ( i=0; i<m; i++ ) {
2199       nz += ourlens[i];
2200     }
2201     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2202 
2203     /* receive message of column indices*/
2204     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2205     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2206     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2207   }
2208 
2209   /* determine column ownership if matrix is not square */
2210   if (N != M) {
2211     n      = N/size + ((N % size) > rank);
2212     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2213     cstart = cend - n;
2214   } else {
2215     cstart = rstart;
2216     cend   = rend;
2217     n      = cend - cstart;
2218   }
2219 
2220   /* loop over local rows, determining number of off diagonal entries */
2221   PetscMemzero(offlens,m*sizeof(int));
2222   jj = 0;
2223   for ( i=0; i<m; i++ ) {
2224     for ( j=0; j<ourlens[i]; j++ ) {
2225       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2226       jj++;
2227     }
2228   }
2229 
2230   /* create our matrix */
2231   for ( i=0; i<m; i++ ) {
2232     ourlens[i] -= offlens[i];
2233   }
2234   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2235   A = *newmat;
2236   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2237   for ( i=0; i<m; i++ ) {
2238     ourlens[i] += offlens[i];
2239   }
2240 
2241   if (!rank) {
2242     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2243 
2244     /* read in my part of the matrix numerical values  */
2245     nz   = procsnz[0];
2246     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2247 
2248     /* insert into matrix */
2249     jj      = rstart;
2250     smycols = mycols;
2251     svals   = vals;
2252     for ( i=0; i<m; i++ ) {
2253       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2254       smycols += ourlens[i];
2255       svals   += ourlens[i];
2256       jj++;
2257     }
2258 
2259     /* read in other processors and ship out */
2260     for ( i=1; i<size; i++ ) {
2261       nz   = procsnz[i];
2262       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2263       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2264     }
2265     PetscFree(procsnz);
2266   } else {
2267     /* receive numeric values */
2268     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2269 
2270     /* receive message of values*/
2271     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2272     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2273     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2274 
2275     /* insert into matrix */
2276     jj      = rstart;
2277     smycols = mycols;
2278     svals   = vals;
2279     for ( i=0; i<m; i++ ) {
2280       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2281       smycols += ourlens[i];
2282       svals   += ourlens[i];
2283       jj++;
2284     }
2285   }
2286   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2287 
2288   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2289   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNC__
2294 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2295 /*
2296     Not great since it makes two copies of the submatrix, first an SeqAIJ
2297   in local and then by concatenating the local matrices the end result.
2298   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2299 */
2300 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2301 {
2302   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2303   Mat        *local,M, Mreuse;
2304   Scalar     *vwork,*aa;
2305   MPI_Comm   comm = mat->comm;
2306   Mat_SeqAIJ *aij;
2307   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2308 
2309   PetscFunctionBegin;
2310   MPI_Comm_rank(comm,&rank);
2311   MPI_Comm_size(comm,&size);
2312 
2313   if (call ==  MAT_REUSE_MATRIX) {
2314     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2315     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2316     local = &Mreuse;
2317     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2318   } else {
2319     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2320     Mreuse = *local;
2321     PetscFree(local);
2322   }
2323 
2324   /*
2325       m - number of local rows
2326       n - number of columns (same on all processors)
2327       rstart - first row in new global matrix generated
2328   */
2329   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2330   if (call == MAT_INITIAL_MATRIX) {
2331     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2332     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2333     ii  = aij->i;
2334     jj  = aij->j;
2335 
2336     /*
2337         Determine the number of non-zeros in the diagonal and off-diagonal
2338         portions of the matrix in order to do correct preallocation
2339     */
2340 
2341     /* first get start and end of "diagonal" columns */
2342     if (csize == PETSC_DECIDE) {
2343       nlocal = n/size + ((n % size) > rank);
2344     } else {
2345       nlocal = csize;
2346     }
2347     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2348     rstart = rend - nlocal;
2349     if (rank == size - 1 && rend != n) {
2350       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2351     }
2352 
2353     /* next, compute all the lengths */
2354     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2355     olens = dlens + m;
2356     for ( i=0; i<m; i++ ) {
2357       jend = ii[i+1] - ii[i];
2358       olen = 0;
2359       dlen = 0;
2360       for ( j=0; j<jend; j++ ) {
2361         if ( *jj < rstart || *jj >= rend) olen++;
2362         else dlen++;
2363         jj++;
2364       }
2365       olens[i] = olen;
2366       dlens[i] = dlen;
2367     }
2368     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2369     PetscFree(dlens);
2370   } else {
2371     int ml,nl;
2372 
2373     M = *newmat;
2374     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2375     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2376     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2377     /*
2378          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2379        rather than the slower MatSetValues().
2380     */
2381     M->was_assembled = PETSC_TRUE;
2382     M->assembled     = PETSC_FALSE;
2383   }
2384   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2385   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2386   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2387   ii  = aij->i;
2388   jj  = aij->j;
2389   aa  = aij->a;
2390   for (i=0; i<m; i++) {
2391     row   = rstart + i;
2392     nz    = ii[i+1] - ii[i];
2393     cwork = jj;     jj += nz;
2394     vwork = aa;     aa += nz;
2395     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2396   }
2397 
2398   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2399   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2400   *newmat = M;
2401 
2402   /* save submatrix used in processor for next request */
2403   if (call ==  MAT_INITIAL_MATRIX) {
2404     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2405     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2406   }
2407 
2408   PetscFunctionReturn(0);
2409 }
2410