xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3505fcc193b430b487d1bebfbdb67b66d883e843)
1 /*$Id: mpiaij.c,v 1.307 1999/11/05 14:45:21 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 extern int MatSetUpMultiply_MPIAIJ(Mat);
8 extern int DisAssemble_MPIAIJ(Mat);
9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 extern int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = PETSC_TRUE; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = PETSC_TRUE; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
455   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
456   nrecvs = work[size+rank];
457   nmax   = work[rank];
458   ierr   = PetscFree(work);CHKERRQ(ierr);
459 
460   /* post receives:   */
461   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
462   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
463   for ( i=0; i<nrecvs; i++ ) {
464     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
465   }
466 
467   /* do sends:
468       1) starts[i] gives the starting index in svalues for stuff going to
469          the ith processor
470   */
471   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
472   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
473   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
474   starts[0] = 0;
475   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
476   for ( i=0; i<N; i++ ) {
477     svalues[starts[owner[i]]++] = rows[i];
478   }
479   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
480 
481   starts[0] = 0;
482   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
483   count = 0;
484   for ( i=0; i<size; i++ ) {
485     if (procs[i]) {
486       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
487     }
488   }
489   ierr = PetscFree(starts);CHKERRQ(ierr);
490 
491   base = owners[rank];
492 
493   /*  wait on receives */
494   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
495   source = lens + nrecvs;
496   count  = nrecvs; slen = 0;
497   while (count) {
498     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
499     /* unpack receives into our local space */
500     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
501     source[imdex]  = recv_status.MPI_SOURCE;
502     lens[imdex]    = n;
503     slen          += n;
504     count--;
505   }
506   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
507 
508   /* move the data into the send scatter */
509   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
510   count = 0;
511   for ( i=0; i<nrecvs; i++ ) {
512     values = rvalues + i*nmax;
513     for ( j=0; j<lens[i]; j++ ) {
514       lrows[count++] = values[j] - base;
515     }
516   }
517   ierr = PetscFree(rvalues);CHKERRQ(ierr);
518   ierr = PetscFree(lens);CHKERRQ(ierr);
519   ierr = PetscFree(owner);CHKERRQ(ierr);
520   ierr = PetscFree(nprocs);CHKERRQ(ierr);
521 
522   /* actually zap the local rows */
523   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
524   PLogObjectParent(A,istmp);
525 
526   /*
527         Zero the required rows. If the "diagonal block" of the matrix
528      is square and the user wishes to set the diagonal we use seperate
529      code so that MatSetValues() is not called for each diagonal allocating
530      new memory, thus calling lots of mallocs and slowing things down.
531 
532        Contributed by: Mathew Knepley
533   */
534   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
535   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
536   if (diag && (l->A->M == l->A->N)) {
537     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
538   } else if (diag) {
539     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
540     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
541       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
542 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
543     }
544     for ( i = 0; i < slen; i++ ) {
545       row  = lrows[i] + rstart;
546       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
547     }
548     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550   } else {
551     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
552   }
553   ierr = ISDestroy(istmp);CHKERRQ(ierr);
554   ierr = PetscFree(lrows);CHKERRQ(ierr);
555 
556   /* wait on sends */
557   if (nsends) {
558     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
559     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
560     ierr        = PetscFree(send_status);CHKERRQ(ierr);
561   }
562   ierr = PetscFree(send_waits);CHKERRQ(ierr);
563   ierr = PetscFree(svalues);CHKERRQ(ierr);
564 
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNC__
569 #define __FUNC__ "MatMult_MPIAIJ"
570 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
571 {
572   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
573   int        ierr,nt;
574 
575   PetscFunctionBegin;
576   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
577   if (nt != a->n) {
578     SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt);
579   }
580   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
581   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
582   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNC__
588 #define __FUNC__ "MatMultAdd_MPIAIJ"
589 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
590 {
591   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
592   int        ierr;
593 
594   PetscFunctionBegin;
595   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
596   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
597   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNC__
603 #define __FUNC__ "MatMultTranspose_MPIAIJ"
604 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
605 {
606   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
607   int        ierr;
608 
609   PetscFunctionBegin;
610   /* do nondiagonal part */
611   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
612   /* send it on its way */
613   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
614   /* do local part */
615   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
616   /* receive remote parts: note this assumes the values are not actually */
617   /* inserted in yy until the next line, which is true for my implementation*/
618   /* but is not perhaps always true. */
619   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNC__
624 #define __FUNC__ "MatMultTransposeAdd_MPIAIJ"
625 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 /*
645   This only works correctly for square matrices where the subblock A->A is the
646    diagonal block
647 */
648 #undef __FUNC__
649 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
650 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
651 {
652   int        ierr;
653   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
654 
655   PetscFunctionBegin;
656   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
657   if (a->rstart != a->cstart || a->rend != a->cend) {
658     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
659   }
660   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNC__
665 #define __FUNC__ "MatScale_MPIAIJ"
666 int MatScale_MPIAIJ(Scalar *aa,Mat A)
667 {
668   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
669   int        ierr;
670 
671   PetscFunctionBegin;
672   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
673   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNC__
678 #define __FUNC__ "MatDestroy_MPIAIJ"
679 int MatDestroy_MPIAIJ(Mat mat)
680 {
681   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
682   int        ierr;
683 
684   PetscFunctionBegin;
685 
686   if (mat->mapping) {
687     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
688   }
689   if (mat->bmapping) {
690     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
691   }
692   if (mat->rmap) {
693     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
694   }
695   if (mat->cmap) {
696     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
697   }
698 #if defined(PETSC_USE_LOG)
699   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
700 #endif
701   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
702   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
703   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
704   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
705 #if defined (PETSC_USE_CTABLE)
706   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
707 #else
708   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
709 #endif
710   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
711   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
712   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
713   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
714   ierr = PetscFree(aij);CHKERRQ(ierr);
715   PLogObjectDestroy(mat);
716   PetscHeaderDestroy(mat);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ "MatView_MPIAIJ_Binary"
722 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
723 {
724   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
725   int         ierr;
726 
727   PetscFunctionBegin;
728   if (aij->size == 1) {
729     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
730   }
731   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNC__
736 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
737 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
738 {
739   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
740   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
741   int         ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size;
742   PetscTruth  isdraw,isascii,flg;
743 
744   PetscFunctionBegin;
745   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
746   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
747   if (isascii) {
748     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
749     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
750       MatInfo info;
751       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
752       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
753       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
754       if (flg) {
755         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
756 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
757       } else {
758         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
759 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
760       }
761       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
762       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
763       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
764       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
765       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
766       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
767       PetscFunctionReturn(0);
768     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
769       PetscFunctionReturn(0);
770     }
771   } else if (isdraw) {
772     Draw       draw;
773     PetscTruth isnull;
774     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
775     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
776   }
777 
778   if (size == 1) {
779     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
780   } else {
781     /* assemble the entire matrix onto first processor. */
782     Mat         A;
783     Mat_SeqAIJ *Aloc;
784     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
785     Scalar      *a;
786 
787     if (!rank) {
788       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
789     } else {
790       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
791     }
792     PLogObjectParent(mat,A);
793 
794     /* copy over the A part */
795     Aloc = (Mat_SeqAIJ*) aij->A->data;
796     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
797     row = aij->rstart;
798     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
799     for ( i=0; i<m; i++ ) {
800       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
801       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
802     }
803     aj = Aloc->j;
804     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
805 
806     /* copy over the B part */
807     Aloc = (Mat_SeqAIJ*) aij->B->data;
808     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
809     row = aij->rstart;
810     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
811     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
812     for ( i=0; i<m; i++ ) {
813       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
814       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
815     }
816     ierr = PetscFree(ct);CHKERRQ(ierr);
817     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     /*
820        Everyone has to call to draw the matrix since the graphics waits are
821        synchronized across all processors that share the Draw object
822     */
823     if (!rank) {
824       Viewer sviewer;
825       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
826       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
827       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
828     }
829     ierr = MatDestroy(A);CHKERRQ(ierr);
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNC__
835 #define __FUNC__ "MatView_MPIAIJ"
836 int MatView_MPIAIJ(Mat mat,Viewer viewer)
837 {
838   int        ierr;
839   PetscTruth isascii,isdraw,issocket,isbinary;
840 
841   PetscFunctionBegin;
842   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
843   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
844   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
845   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
846   if (isascii || isdraw || isbinary || issocket) {
847     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
848   } else {
849     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 /*
855     This has to provide several versions.
856 
857      2) a) use only local smoothing updating outer values only once.
858         b) local smoothing updating outer values each inner iteration
859      3) color updating out values betwen colors.
860 */
861 #undef __FUNC__
862 #define __FUNC__ "MatRelax_MPIAIJ"
863 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
864                            double fshift,int its,Vec xx)
865 {
866   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
867   Mat        AA = mat->A, BB = mat->B;
868   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
869   Scalar     *b,*x,*xs,*ls,d,*v,sum;
870   int        ierr,*idx, *diag;
871   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
872 
873   PetscFunctionBegin;
874   if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);}
875   diag = A->diag;
876   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
877     if (flag & SOR_ZERO_INITIAL_GUESS) {
878       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     }
881     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
882     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
883     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
884     if (xx != bb) {
885       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
886     } else {
887       b = x;
888     }
889     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
890     xs = x + shift; /* shift by one for index start of 1 */
891     ls = ls + shift;
892     while (its--) {
893       /* go down through the rows */
894       for ( i=0; i<m; i++ ) {
895         n    = A->i[i+1] - A->i[i];
896 	PLogFlops(4*n+3);
897         idx  = A->j + A->i[i] + shift;
898         v    = A->a + A->i[i] + shift;
899         sum  = b[i];
900         SPARSEDENSEMDOT(sum,xs,v,idx,n);
901         d    = fshift + A->a[diag[i]+shift];
902         n    = B->i[i+1] - B->i[i];
903         idx  = B->j + B->i[i] + shift;
904         v    = B->a + B->i[i] + shift;
905         SPARSEDENSEMDOT(sum,ls,v,idx,n);
906         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
907       }
908       /* come up through the rows */
909       for ( i=m-1; i>-1; i-- ) {
910         n    = A->i[i+1] - A->i[i];
911 	PLogFlops(4*n+3)
912         idx  = A->j + A->i[i] + shift;
913         v    = A->a + A->i[i] + shift;
914         sum  = b[i];
915         SPARSEDENSEMDOT(sum,xs,v,idx,n);
916         d    = fshift + A->a[diag[i]+shift];
917         n    = B->i[i+1] - B->i[i];
918         idx  = B->j + B->i[i] + shift;
919         v    = B->a + B->i[i] + shift;
920         SPARSEDENSEMDOT(sum,ls,v,idx,n);
921         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
922       }
923     }
924     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
926     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
927   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
928     if (flag & SOR_ZERO_INITIAL_GUESS) {
929       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
930       PetscFunctionReturn(0);
931     }
932     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
933     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
934     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
935     if (xx != bb) {
936       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
937     } else {
938       b = x;
939     }
940     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
941     xs = x + shift; /* shift by one for index start of 1 */
942     ls = ls + shift;
943     while (its--) {
944       for ( i=0; i<m; i++ ) {
945         n    = A->i[i+1] - A->i[i];
946 	PLogFlops(4*n+3);
947         idx  = A->j + A->i[i] + shift;
948         v    = A->a + A->i[i] + shift;
949         sum  = b[i];
950         SPARSEDENSEMDOT(sum,xs,v,idx,n);
951         d    = fshift + A->a[diag[i]+shift];
952         n    = B->i[i+1] - B->i[i];
953         idx  = B->j + B->i[i] + shift;
954         v    = B->a + B->i[i] + shift;
955         SPARSEDENSEMDOT(sum,ls,v,idx,n);
956         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
957       }
958     }
959     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
960     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
961     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
962   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
963     if (flag & SOR_ZERO_INITIAL_GUESS) {
964       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     }
967     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
968     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
969     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
970     if (xx != bb) {
971       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
972     } else {
973       b = x;
974     }
975     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
976     xs = x + shift; /* shift by one for index start of 1 */
977     ls = ls + shift;
978     while (its--) {
979       for ( i=m-1; i>-1; i-- ) {
980         n    = A->i[i+1] - A->i[i];
981 	PLogFlops(4*n+3);
982         idx  = A->j + A->i[i] + shift;
983         v    = A->a + A->i[i] + shift;
984         sum  = b[i];
985         SPARSEDENSEMDOT(sum,xs,v,idx,n);
986         d    = fshift + A->a[diag[i]+shift];
987         n    = B->i[i+1] - B->i[i];
988         idx  = B->j + B->i[i] + shift;
989         v    = B->a + B->i[i] + shift;
990         SPARSEDENSEMDOT(sum,ls,v,idx,n);
991         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
992       }
993     }
994     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
995     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
996     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
997   } else {
998     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNC__
1004 #define __FUNC__ "MatGetInfo_MPIAIJ"
1005 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1006 {
1007   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1008   Mat        A = mat->A, B = mat->B;
1009   int        ierr;
1010   double     isend[5], irecv[5];
1011 
1012   PetscFunctionBegin;
1013   info->block_size     = 1.0;
1014   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1015   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1016   isend[3] = info->memory;  isend[4] = info->mallocs;
1017   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1018   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1019   isend[3] += info->memory;  isend[4] += info->mallocs;
1020   if (flag == MAT_LOCAL) {
1021     info->nz_used      = isend[0];
1022     info->nz_allocated = isend[1];
1023     info->nz_unneeded  = isend[2];
1024     info->memory       = isend[3];
1025     info->mallocs      = isend[4];
1026   } else if (flag == MAT_GLOBAL_MAX) {
1027     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1028     info->nz_used      = irecv[0];
1029     info->nz_allocated = irecv[1];
1030     info->nz_unneeded  = irecv[2];
1031     info->memory       = irecv[3];
1032     info->mallocs      = irecv[4];
1033   } else if (flag == MAT_GLOBAL_SUM) {
1034     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1035     info->nz_used      = irecv[0];
1036     info->nz_allocated = irecv[1];
1037     info->nz_unneeded  = irecv[2];
1038     info->memory       = irecv[3];
1039     info->mallocs      = irecv[4];
1040   }
1041   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1042   info->fill_ratio_needed = 0;
1043   info->factor_mallocs    = 0;
1044   info->rows_global       = (double)mat->M;
1045   info->columns_global    = (double)mat->N;
1046   info->rows_local        = (double)mat->m;
1047   info->columns_local     = (double)mat->N;
1048 
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNC__
1053 #define __FUNC__ "MatSetOption_MPIAIJ"
1054 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1055 {
1056   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1057   int        ierr;
1058 
1059   PetscFunctionBegin;
1060   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1061       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1062       op == MAT_COLUMNS_UNSORTED ||
1063       op == MAT_COLUMNS_SORTED ||
1064       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1065       op == MAT_KEEP_ZEROED_ROWS ||
1066       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1067         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1068         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1069   } else if (op == MAT_ROW_ORIENTED) {
1070     a->roworiented = PETSC_TRUE;
1071     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1072     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1073   } else if (op == MAT_ROWS_SORTED ||
1074              op == MAT_ROWS_UNSORTED ||
1075              op == MAT_SYMMETRIC ||
1076              op == MAT_STRUCTURALLY_SYMMETRIC ||
1077              op == MAT_YES_NEW_DIAGONALS)
1078     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1079   else if (op == MAT_COLUMN_ORIENTED) {
1080     a->roworiented = PETSC_FALSE;
1081     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1082     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1083   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1084     a->donotstash = PETSC_TRUE;
1085   } else if (op == MAT_NO_NEW_DIAGONALS){
1086     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1087   } else {
1088     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1089   }
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNC__
1094 #define __FUNC__ "MatGetSize_MPIAIJ"
1095 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1096 {
1097   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1098 
1099   PetscFunctionBegin;
1100   if (m) *m = mat->M;
1101   if (n) *n = mat->N;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNC__
1106 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1107 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1108 {
1109   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1110 
1111   PetscFunctionBegin;
1112   if (m) *m = mat->m;
1113   if (n) *n = mat->n;
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNC__
1118 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1119 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1120 {
1121   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1122 
1123   PetscFunctionBegin;
1124   *m = mat->rstart; *n = mat->rend;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNC__
1129 #define __FUNC__ "MatGetRow_MPIAIJ"
1130 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1131 {
1132   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1133   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1134   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1135   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1136   int        *cmap, *idx_p;
1137 
1138   PetscFunctionBegin;
1139   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1140   mat->getrowactive = PETSC_TRUE;
1141 
1142   if (!mat->rowvalues && (idx || v)) {
1143     /*
1144         allocate enough space to hold information from the longest row.
1145     */
1146     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1147     int     max = 1,m = mat->m,tmp;
1148     for ( i=0; i<m; i++ ) {
1149       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1150       if (max < tmp) { max = tmp; }
1151     }
1152     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1153     mat->rowindices = (int *) (mat->rowvalues + max);
1154   }
1155 
1156   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1157   lrow = row - rstart;
1158 
1159   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1160   if (!v)   {pvA = 0; pvB = 0;}
1161   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1162   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1163   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1164   nztot = nzA + nzB;
1165 
1166   cmap  = mat->garray;
1167   if (v  || idx) {
1168     if (nztot) {
1169       /* Sort by increasing column numbers, assuming A and B already sorted */
1170       int imark = -1;
1171       if (v) {
1172         *v = v_p = mat->rowvalues;
1173         for ( i=0; i<nzB; i++ ) {
1174           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1175           else break;
1176         }
1177         imark = i;
1178         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1179         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1180       }
1181       if (idx) {
1182         *idx = idx_p = mat->rowindices;
1183         if (imark > -1) {
1184           for ( i=0; i<imark; i++ ) {
1185             idx_p[i] = cmap[cworkB[i]];
1186           }
1187         } else {
1188           for ( i=0; i<nzB; i++ ) {
1189             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1190             else break;
1191           }
1192           imark = i;
1193         }
1194         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1195         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1196       }
1197     } else {
1198       if (idx) *idx = 0;
1199       if (v)   *v   = 0;
1200     }
1201   }
1202   *nz = nztot;
1203   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1204   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNC__
1209 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1210 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1211 {
1212   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1213 
1214   PetscFunctionBegin;
1215   if (aij->getrowactive == PETSC_FALSE) {
1216     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1217   }
1218   aij->getrowactive = PETSC_FALSE;
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatNorm_MPIAIJ"
1224 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1225 {
1226   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1227   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1228   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1229   double     sum = 0.0;
1230   Scalar     *v;
1231 
1232   PetscFunctionBegin;
1233   if (aij->size == 1) {
1234     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1235   } else {
1236     if (type == NORM_FROBENIUS) {
1237       v = amat->a;
1238       for (i=0; i<amat->nz; i++ ) {
1239 #if defined(PETSC_USE_COMPLEX)
1240         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1241 #else
1242         sum += (*v)*(*v); v++;
1243 #endif
1244       }
1245       v = bmat->a;
1246       for (i=0; i<bmat->nz; i++ ) {
1247 #if defined(PETSC_USE_COMPLEX)
1248         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1249 #else
1250         sum += (*v)*(*v); v++;
1251 #endif
1252       }
1253       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1254       *norm = sqrt(*norm);
1255     } else if (type == NORM_1) { /* max column norm */
1256       double *tmp, *tmp2;
1257       int    *jj, *garray = aij->garray;
1258       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1259       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1260       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1261       *norm = 0.0;
1262       v = amat->a; jj = amat->j;
1263       for ( j=0; j<amat->nz; j++ ) {
1264         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1265       }
1266       v = bmat->a; jj = bmat->j;
1267       for ( j=0; j<bmat->nz; j++ ) {
1268         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1269       }
1270       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1271       for ( j=0; j<aij->N; j++ ) {
1272         if (tmp2[j] > *norm) *norm = tmp2[j];
1273       }
1274       ierr = PetscFree(tmp);CHKERRQ(ierr);
1275       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1276     } else if (type == NORM_INFINITY) { /* max row norm */
1277       double ntemp = 0.0;
1278       for ( j=0; j<amat->m; j++ ) {
1279         v = amat->a + amat->i[j] + shift;
1280         sum = 0.0;
1281         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1282           sum += PetscAbsScalar(*v); v++;
1283         }
1284         v = bmat->a + bmat->i[j] + shift;
1285         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1286           sum += PetscAbsScalar(*v); v++;
1287         }
1288         if (sum > ntemp) ntemp = sum;
1289       }
1290       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1291     } else {
1292       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1293     }
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNC__
1299 #define __FUNC__ "MatTranspose_MPIAIJ"
1300 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1301 {
1302   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1303   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1304   int        ierr,shift = Aloc->indexshift;
1305   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1306   Mat        B;
1307   Scalar     *array;
1308 
1309   PetscFunctionBegin;
1310   if (!matout && M != N) {
1311     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1312   }
1313 
1314   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1315 
1316   /* copy over the A part */
1317   Aloc = (Mat_SeqAIJ*) a->A->data;
1318   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1319   row = a->rstart;
1320   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1321   for ( i=0; i<m; i++ ) {
1322     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1323     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1324   }
1325   aj = Aloc->j;
1326   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1327 
1328   /* copy over the B part */
1329   Aloc = (Mat_SeqAIJ*) a->B->data;
1330   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1331   row = a->rstart;
1332   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1333   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1334   for ( i=0; i<m; i++ ) {
1335     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1336     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1337   }
1338   ierr = PetscFree(ct);CHKERRQ(ierr);
1339   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1340   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1341   if (matout) {
1342     *matout = B;
1343   } else {
1344     PetscOps       *Abops;
1345     struct _MatOps *Aops;
1346 
1347     /* This isn't really an in-place transpose .... but free data structures from a */
1348     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1349     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1350     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1351 #if defined (PETSC_USE_CTABLE)
1352     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1353 #else
1354     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1355 #endif
1356     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1357     if (a->lvec)   {ierr = VecDestroy(a->lvec);CHKERRQ(ierr);}
1358     if (a->Mvctx)  {ierr = VecScatterDestroy(a->Mvctx);CHKERRQ(ierr);}
1359     ierr = PetscFree(a);CHKERRQ(ierr);
1360 
1361     /*
1362        This is horrible, horrible code. We need to keep the
1363       A pointers for the bops and ops but copy everything
1364       else from C.
1365     */
1366     Abops   = A->bops;
1367     Aops    = A->ops;
1368     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1369     A->bops = Abops;
1370     A->ops  = Aops;
1371     PetscHeaderDestroy(B);
1372   }
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNC__
1377 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1378 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1379 {
1380   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1381   Mat        a = aij->A, b = aij->B;
1382   int        ierr,s1,s2,s3;
1383 
1384   PetscFunctionBegin;
1385   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1386   if (rr) {
1387     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1388     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1389     /* Overlap communication with computation. */
1390     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1391   }
1392   if (ll) {
1393     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1394     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1395     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1396   }
1397   /* scale  the diagonal block */
1398   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1399 
1400   if (rr) {
1401     /* Do a scatter end and then right scale the off-diagonal block */
1402     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1403     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1404   }
1405 
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 
1410 #undef __FUNC__
1411 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1412 int MatPrintHelp_MPIAIJ(Mat A)
1413 {
1414   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1415   int        ierr;
1416 
1417   PetscFunctionBegin;
1418   if (!a->rank) {
1419     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNC__
1425 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1426 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1427 {
1428   PetscFunctionBegin;
1429   *bs = 1;
1430   PetscFunctionReturn(0);
1431 }
1432 #undef __FUNC__
1433 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1434 int MatSetUnfactored_MPIAIJ(Mat A)
1435 {
1436   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1437   int        ierr;
1438 
1439   PetscFunctionBegin;
1440   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNC__
1445 #define __FUNC__ "MatEqual_MPIAIJ"
1446 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1447 {
1448   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1449   Mat        a, b, c, d;
1450   PetscTruth flg;
1451   int        ierr;
1452 
1453   PetscFunctionBegin;
1454   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1455   a = matA->A; b = matA->B;
1456   c = matB->A; d = matB->B;
1457 
1458   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1459   if (flg == PETSC_TRUE) {
1460     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1461   }
1462   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNC__
1467 #define __FUNC__ "MatCopy_MPIAIJ"
1468 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1469 {
1470   int        ierr;
1471   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1472   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1473 
1474   PetscFunctionBegin;
1475   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1476     /* because of the column compression in the off-processor part of the matrix a->B,
1477        the number of columns in a->B and b->B may be different, hence we cannot call
1478        the MatCopy() directly on the two parts. If need be, we can provide a more
1479        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1480        then copying the submatrices */
1481     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1482   } else {
1483     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1484     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1485   }
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1490 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1491 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1492 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1493 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1494 
1495 /* -------------------------------------------------------------------*/
1496 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1497        MatGetRow_MPIAIJ,
1498        MatRestoreRow_MPIAIJ,
1499        MatMult_MPIAIJ,
1500        MatMultAdd_MPIAIJ,
1501        MatMultTranspose_MPIAIJ,
1502        MatMultTransposeAdd_MPIAIJ,
1503        0,
1504        0,
1505        0,
1506        0,
1507        0,
1508        0,
1509        MatRelax_MPIAIJ,
1510        MatTranspose_MPIAIJ,
1511        MatGetInfo_MPIAIJ,
1512        MatEqual_MPIAIJ,
1513        MatGetDiagonal_MPIAIJ,
1514        MatDiagonalScale_MPIAIJ,
1515        MatNorm_MPIAIJ,
1516        MatAssemblyBegin_MPIAIJ,
1517        MatAssemblyEnd_MPIAIJ,
1518        0,
1519        MatSetOption_MPIAIJ,
1520        MatZeroEntries_MPIAIJ,
1521        MatZeroRows_MPIAIJ,
1522        0,
1523        0,
1524        0,
1525        0,
1526        MatGetSize_MPIAIJ,
1527        MatGetLocalSize_MPIAIJ,
1528        MatGetOwnershipRange_MPIAIJ,
1529        0,
1530        0,
1531        0,
1532        0,
1533        MatDuplicate_MPIAIJ,
1534        0,
1535        0,
1536        0,
1537        0,
1538        0,
1539        MatGetSubMatrices_MPIAIJ,
1540        MatIncreaseOverlap_MPIAIJ,
1541        MatGetValues_MPIAIJ,
1542        MatCopy_MPIAIJ,
1543        MatPrintHelp_MPIAIJ,
1544        MatScale_MPIAIJ,
1545        0,
1546        0,
1547        0,
1548        MatGetBlockSize_MPIAIJ,
1549        0,
1550        0,
1551        0,
1552        0,
1553        MatFDColoringCreate_MPIAIJ,
1554        0,
1555        MatSetUnfactored_MPIAIJ,
1556        0,
1557        0,
1558        MatGetSubMatrix_MPIAIJ,
1559        0,
1560        0,
1561        MatGetMaps_Petsc};
1562 
1563 /* ----------------------------------------------------------------------------------------*/
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNC__
1567 #define __FUNC__ "MatStoreValues_MPIAIJ"
1568 int MatStoreValues_MPIAIJ(Mat mat)
1569 {
1570   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1571   int        ierr;
1572 
1573   PetscFunctionBegin;
1574   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1575   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNC__
1582 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1583 int MatRetrieveValues_MPIAIJ(Mat mat)
1584 {
1585   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1586   int        ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1590   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1591   PetscFunctionReturn(0);
1592 }
1593 EXTERN_C_END
1594 
1595 #include "pc.h"
1596 EXTERN_C_BEGIN
1597 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1598 EXTERN_C_END
1599 
1600 #undef __FUNC__
1601 #define __FUNC__ "MatCreateMPIAIJ"
1602 /*@C
1603    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1604    (the default parallel PETSc format).  For good matrix assembly performance
1605    the user should preallocate the matrix storage by setting the parameters
1606    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1607    performance can be increased by more than a factor of 50.
1608 
1609    Collective on MPI_Comm
1610 
1611    Input Parameters:
1612 +  comm - MPI communicator
1613 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1614            This value should be the same as the local size used in creating the
1615            y vector for the matrix-vector product y = Ax.
1616 .  n - This value should be the same as the local size used in creating the
1617        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1618        calculated if N is given) For square matrices n is almost always m.
1619 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1620 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1621 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1622            (same value is used for all local rows)
1623 .  d_nnz - array containing the number of nonzeros in the various rows of the
1624            DIAGONAL portion of the local submatrix (possibly different for each row)
1625            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1626            The size of this array is equal to the number of local rows, i.e 'm'.
1627            You must leave room for the diagonal entry even if it is zero.
1628 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1629            submatrix (same value is used for all local rows).
1630 -  o_nnz - array containing the number of nonzeros in the various rows of the
1631            OFF-DIAGONAL portion of the local submatrix (possibly different for
1632            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1633            structure. The size of this array is equal to the number
1634            of local rows, i.e 'm'.
1635 
1636    Output Parameter:
1637 .  A - the matrix
1638 
1639    Notes:
1640    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1641    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1642    storage requirements for this matrix.
1643 
1644    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1645    processor than it must be used on all processors that share the object for
1646    that argument.
1647 
1648    The AIJ format (also called the Yale sparse matrix format or
1649    compressed row storage), is fully compatible with standard Fortran 77
1650    storage.  That is, the stored row and column indices can begin at
1651    either one (as in Fortran) or zero.  See the users manual for details.
1652 
1653    The user MUST specify either the local or global matrix dimensions
1654    (possibly both).
1655 
1656    The parallel matrix is partitioned such that the first m0 rows belong to
1657    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1658    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1659 
1660    The DIAGONAL portion of the local submatrix of a processor can be defined
1661    as the submatrix which is obtained by extraction the part corresponding
1662    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1663    first row that belongs to the processor, and r2 is the last row belonging
1664    to the this processor. This is a square mxm matrix. The remaining portion
1665    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1666 
1667    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1668 
1669    By default, this format uses inodes (identical nodes) when possible.
1670    We search for consecutive rows with the same nonzero structure, thereby
1671    reusing matrix information to achieve increased efficiency.
1672 
1673    Options Database Keys:
1674 +  -mat_aij_no_inode  - Do not use inodes
1675 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1676 -  -mat_aij_oneindex - Internally use indexing starting at 1
1677         rather than 0.  Note that when calling MatSetValues(),
1678         the user still MUST index entries starting at 0!
1679 .   -mat_mpi - use the parallel matrix data structures even on one processor
1680                (defaults to using SeqBAIJ format on one processor)
1681 .   -mat_mpi - use the parallel matrix data structures even on one processor
1682                (defaults to using SeqAIJ format on one processor)
1683 
1684 
1685    Example usage:
1686 
1687    Consider the following 8x8 matrix with 34 non-zero values, that is
1688    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1689    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1690    as follows:
1691 
1692 .vb
1693             1  2  0  |  0  3  0  |  0  4
1694     Proc0   0  5  6  |  7  0  0  |  8  0
1695             9  0 10  | 11  0  0  | 12  0
1696     -------------------------------------
1697            13  0 14  | 15 16 17  |  0  0
1698     Proc1   0 18  0  | 19 20 21  |  0  0
1699             0  0  0  | 22 23  0  | 24  0
1700     -------------------------------------
1701     Proc2  25 26 27  |  0  0 28  | 29  0
1702            30  0  0  | 31 32 33  |  0 34
1703 .ve
1704 
1705    This can be represented as a collection of submatrices as:
1706 
1707 .vb
1708       A B C
1709       D E F
1710       G H I
1711 .ve
1712 
1713    Where the submatrices A,B,C are owned by proc0, D,E,F are
1714    owned by proc1, G,H,I are owned by proc2.
1715 
1716    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1717    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1718    The 'M','N' parameters are 8,8, and have the same values on all procs.
1719 
1720    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1721    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1722    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1723    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1724    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1725    matrix, ans [DF] as another SeqAIJ matrix.
1726 
1727    When d_nz, o_nz parameters are specified, d_nz storage elements are
1728    allocated for every row of the local diagonal submatrix, and o_nz
1729    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1730    One way to choose d_nz and o_nz is to use the max nonzerors per local
1731    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1732    In this case, the values of d_nz,o_nz are:
1733 .vb
1734      proc0 : dnz = 2, o_nz = 2
1735      proc1 : dnz = 3, o_nz = 2
1736      proc2 : dnz = 1, o_nz = 4
1737 .ve
1738    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1739    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1740    for proc3. i.e we are using 12+15+10=37 storage locations to store
1741    34 values.
1742 
1743    When d_nnz, o_nnz parameters are specified, the storage is specified
1744    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1745    In the above case the values for d_nnz,o_nnz are:
1746 .vb
1747      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1748      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1749      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1750 .ve
1751    Here the space allocated is sum of all the above values i.e 34, and
1752    hence pre-allocation is perfect.
1753 
1754    Level: intermediate
1755 
1756 .keywords: matrix, aij, compressed row, sparse, parallel
1757 
1758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1759 @*/
1760 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1761 {
1762   Mat          B;
1763   Mat_MPIAIJ   *b;
1764   int          ierr, i,size;
1765   PetscTruth   flag1, flag2;
1766 
1767   PetscFunctionBegin;
1768 
1769   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1770   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1771   if (d_nnz) {
1772     for (i=0; i<m; i++) {
1773       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1774     }
1775   }
1776   if (o_nnz) {
1777     for (i=0; i<m; i++) {
1778       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1779     }
1780   }
1781 
1782   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1783   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1784   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1785   if (!flag1 && !flag2 && size == 1) {
1786     if (M == PETSC_DECIDE) M = m;
1787     if (N == PETSC_DECIDE) N = n;
1788     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1789     PetscFunctionReturn(0);
1790   }
1791 
1792   *A = 0;
1793   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1794   PLogObjectCreate(B);
1795   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1796   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1797   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1798   B->ops->destroy = MatDestroy_MPIAIJ;
1799   B->ops->view    = MatView_MPIAIJ;
1800   B->factor       = 0;
1801   B->assembled    = PETSC_FALSE;
1802   B->mapping      = 0;
1803 
1804   B->insertmode      = NOT_SET_VALUES;
1805   b->size            = size;
1806   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1807 
1808   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
1809     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1810   }
1811 
1812   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1813   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1814   b->m = m; B->m = m;
1815   b->n = n; B->n = n;
1816   b->N = N; B->N = N;
1817   b->M = M; B->M = M;
1818 
1819   /* the information in the maps duplicates the information computed below, eventually
1820      we should remove the duplicate information that is not contained in the maps */
1821   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1822   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1823 
1824   /* build local table of row and column ownerships */
1825   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1826   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1827   b->cowners = b->rowners + b->size + 2;
1828   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1829   b->rowners[0] = 0;
1830   for ( i=2; i<=b->size; i++ ) {
1831     b->rowners[i] += b->rowners[i-1];
1832   }
1833   b->rstart = b->rowners[b->rank];
1834   b->rend   = b->rowners[b->rank+1];
1835   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1836   b->cowners[0] = 0;
1837   for ( i=2; i<=b->size; i++ ) {
1838     b->cowners[i] += b->cowners[i-1];
1839   }
1840   b->cstart = b->cowners[b->rank];
1841   b->cend   = b->cowners[b->rank+1];
1842 
1843   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1844   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1845   PLogObjectParent(B,b->A);
1846   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1847   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1848   PLogObjectParent(B,b->B);
1849 
1850   /* build cache for off array entries formed */
1851   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1852   b->donotstash  = PETSC_FALSE;
1853   b->colmap      = 0;
1854   b->garray      = 0;
1855   b->roworiented = PETSC_TRUE;
1856 
1857   /* stuff used for matrix vector multiply */
1858   b->lvec      = PETSC_NULL;
1859   b->Mvctx     = PETSC_NULL;
1860 
1861   /* stuff for MatGetRow() */
1862   b->rowindices   = 0;
1863   b->rowvalues    = 0;
1864   b->getrowactive = PETSC_FALSE;
1865 
1866   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1867                                      "MatStoreValues_MPIAIJ",
1868                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1869   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1870                                      "MatRetrieveValues_MPIAIJ",
1871                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1872   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1873                                      "MatGetDiagonalBlock_MPIAIJ",
1874                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1875   *A = B;
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNC__
1880 #define __FUNC__ "MatDuplicate_MPIAIJ"
1881 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1882 {
1883   Mat        mat;
1884   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1885   int        ierr, len = 0;
1886   PetscTruth flg;
1887 
1888   PetscFunctionBegin;
1889   *newmat       = 0;
1890   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1891   PLogObjectCreate(mat);
1892   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1893   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1894   mat->ops->destroy = MatDestroy_MPIAIJ;
1895   mat->ops->view    = MatView_MPIAIJ;
1896   mat->factor       = matin->factor;
1897   mat->assembled    = PETSC_TRUE;
1898   mat->insertmode   = NOT_SET_VALUES;
1899 
1900   a->m = mat->m   = oldmat->m;
1901   a->n = mat->n   = oldmat->n;
1902   a->M = mat->M   = oldmat->M;
1903   a->N = mat->N   = oldmat->N;
1904 
1905   a->rstart       = oldmat->rstart;
1906   a->rend         = oldmat->rend;
1907   a->cstart       = oldmat->cstart;
1908   a->cend         = oldmat->cend;
1909   a->size         = oldmat->size;
1910   a->rank         = oldmat->rank;
1911   a->donotstash   = oldmat->donotstash;
1912   a->roworiented  = oldmat->roworiented;
1913   a->rowindices   = 0;
1914   a->rowvalues    = 0;
1915   a->getrowactive = PETSC_FALSE;
1916 
1917   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1918   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1919   a->cowners = a->rowners + a->size + 2;
1920   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1921   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1922   if (oldmat->colmap) {
1923 #if defined (PETSC_USE_CTABLE)
1924     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1925 #else
1926     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1927     PLogObjectMemory(mat,(a->N)*sizeof(int));
1928     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1929 #endif
1930   } else a->colmap = 0;
1931   if (oldmat->garray) {
1932     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1933     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1934     PLogObjectMemory(mat,len*sizeof(int));
1935     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1936   } else a->garray = 0;
1937 
1938   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1939   PLogObjectParent(mat,a->lvec);
1940   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1941   PLogObjectParent(mat,a->Mvctx);
1942   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1943   PLogObjectParent(mat,a->A);
1944   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1945   PLogObjectParent(mat,a->B);
1946   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1947   if (flg) {
1948     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1949   }
1950   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1951   *newmat = mat;
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #include "sys.h"
1956 
1957 #undef __FUNC__
1958 #define __FUNC__ "MatLoad_MPIAIJ"
1959 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1960 {
1961   Mat          A;
1962   Scalar       *vals,*svals;
1963   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1964   MPI_Status   status;
1965   int          i, nz, ierr, j,rstart, rend, fd;
1966   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1967   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1968   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1969 
1970   PetscFunctionBegin;
1971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1972   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1973   if (!rank) {
1974     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1976     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1977     if (header[3] < 0) {
1978       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1979     }
1980   }
1981 
1982   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1983   M = header[1]; N = header[2];
1984   /* determine ownership of all rows */
1985   m = M/size + ((M % size) > rank);
1986   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1987   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1988   rowners[0] = 0;
1989   for ( i=2; i<=size; i++ ) {
1990     rowners[i] += rowners[i-1];
1991   }
1992   rstart = rowners[rank];
1993   rend   = rowners[rank+1];
1994 
1995   /* distribute row lengths to all processors */
1996   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1997   offlens = ourlens + (rend-rstart);
1998   if (!rank) {
1999     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
2000     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2001     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2002     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2003     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2004     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2005   } else {
2006     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2007   }
2008 
2009   if (!rank) {
2010     /* calculate the number of nonzeros on each processor */
2011     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2012     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2013     for ( i=0; i<size; i++ ) {
2014       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2015         procsnz[i] += rowlengths[j];
2016       }
2017     }
2018     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2019 
2020     /* determine max buffer needed and allocate it */
2021     maxnz = 0;
2022     for ( i=0; i<size; i++ ) {
2023       maxnz = PetscMax(maxnz,procsnz[i]);
2024     }
2025     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2026 
2027     /* read in my part of the matrix column indices  */
2028     nz     = procsnz[0];
2029     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2030     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2031 
2032     /* read in every one elses and ship off */
2033     for ( i=1; i<size; i++ ) {
2034       nz   = procsnz[i];
2035       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2036       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2037     }
2038     ierr = PetscFree(cols);CHKERRQ(ierr);
2039   } else {
2040     /* determine buffer space needed for message */
2041     nz = 0;
2042     for ( i=0; i<m; i++ ) {
2043       nz += ourlens[i];
2044     }
2045     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2046 
2047     /* receive message of column indices*/
2048     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2049     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2050     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2051   }
2052 
2053   /* determine column ownership if matrix is not square */
2054   if (N != M) {
2055     n      = N/size + ((N % size) > rank);
2056     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2057     cstart = cend - n;
2058   } else {
2059     cstart = rstart;
2060     cend   = rend;
2061     n      = cend - cstart;
2062   }
2063 
2064   /* loop over local rows, determining number of off diagonal entries */
2065   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2066   jj = 0;
2067   for ( i=0; i<m; i++ ) {
2068     for ( j=0; j<ourlens[i]; j++ ) {
2069       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2070       jj++;
2071     }
2072   }
2073 
2074   /* create our matrix */
2075   for ( i=0; i<m; i++ ) {
2076     ourlens[i] -= offlens[i];
2077   }
2078   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2079   A = *newmat;
2080   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2081   for ( i=0; i<m; i++ ) {
2082     ourlens[i] += offlens[i];
2083   }
2084 
2085   if (!rank) {
2086     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2087 
2088     /* read in my part of the matrix numerical values  */
2089     nz   = procsnz[0];
2090     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2091 
2092     /* insert into matrix */
2093     jj      = rstart;
2094     smycols = mycols;
2095     svals   = vals;
2096     for ( i=0; i<m; i++ ) {
2097       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2098       smycols += ourlens[i];
2099       svals   += ourlens[i];
2100       jj++;
2101     }
2102 
2103     /* read in other processors and ship out */
2104     for ( i=1; i<size; i++ ) {
2105       nz   = procsnz[i];
2106       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2107       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2108     }
2109     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2110   } else {
2111     /* receive numeric values */
2112     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2113 
2114     /* receive message of values*/
2115     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2116     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2117     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2118 
2119     /* insert into matrix */
2120     jj      = rstart;
2121     smycols = mycols;
2122     svals   = vals;
2123     for ( i=0; i<m; i++ ) {
2124       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2125       smycols += ourlens[i];
2126       svals   += ourlens[i];
2127       jj++;
2128     }
2129   }
2130   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2131   ierr = PetscFree(vals);CHKERRQ(ierr);
2132   ierr = PetscFree(mycols);CHKERRQ(ierr);
2133   ierr = PetscFree(rowners);CHKERRQ(ierr);
2134 
2135   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNC__
2141 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2142 /*
2143     Not great since it makes two copies of the submatrix, first an SeqAIJ
2144   in local and then by concatenating the local matrices the end result.
2145   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2146 */
2147 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2148 {
2149   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2150   Mat        *local,M, Mreuse;
2151   Scalar     *vwork,*aa;
2152   MPI_Comm   comm = mat->comm;
2153   Mat_SeqAIJ *aij;
2154   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2155 
2156   PetscFunctionBegin;
2157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2159 
2160   if (call ==  MAT_REUSE_MATRIX) {
2161     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2162     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2163     local = &Mreuse;
2164     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2165   } else {
2166     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2167     Mreuse = *local;
2168     ierr = PetscFree(local);CHKERRQ(ierr);
2169   }
2170 
2171   /*
2172       m - number of local rows
2173       n - number of columns (same on all processors)
2174       rstart - first row in new global matrix generated
2175   */
2176   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2177   if (call == MAT_INITIAL_MATRIX) {
2178     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2179     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2180     ii  = aij->i;
2181     jj  = aij->j;
2182 
2183     /*
2184         Determine the number of non-zeros in the diagonal and off-diagonal
2185         portions of the matrix in order to do correct preallocation
2186     */
2187 
2188     /* first get start and end of "diagonal" columns */
2189     if (csize == PETSC_DECIDE) {
2190       nlocal = n/size + ((n % size) > rank);
2191     } else {
2192       nlocal = csize;
2193     }
2194     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2195     rstart = rend - nlocal;
2196     if (rank == size - 1 && rend != n) {
2197       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2198     }
2199 
2200     /* next, compute all the lengths */
2201     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2202     olens = dlens + m;
2203     for ( i=0; i<m; i++ ) {
2204       jend = ii[i+1] - ii[i];
2205       olen = 0;
2206       dlen = 0;
2207       for ( j=0; j<jend; j++ ) {
2208         if ( *jj < rstart || *jj >= rend) olen++;
2209         else dlen++;
2210         jj++;
2211       }
2212       olens[i] = olen;
2213       dlens[i] = dlen;
2214     }
2215     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2216     ierr = PetscFree(dlens);CHKERRQ(ierr);
2217   } else {
2218     int ml,nl;
2219 
2220     M = *newmat;
2221     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2222     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2223     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2224     /*
2225          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2226        rather than the slower MatSetValues().
2227     */
2228     M->was_assembled = PETSC_TRUE;
2229     M->assembled     = PETSC_FALSE;
2230   }
2231   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2232   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2233   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2234   ii  = aij->i;
2235   jj  = aij->j;
2236   aa  = aij->a;
2237   for (i=0; i<m; i++) {
2238     row   = rstart + i;
2239     nz    = ii[i+1] - ii[i];
2240     cwork = jj;     jj += nz;
2241     vwork = aa;     aa += nz;
2242     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2243   }
2244 
2245   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247   *newmat = M;
2248 
2249   /* save submatrix used in processor for next request */
2250   if (call ==  MAT_INITIAL_MATRIX) {
2251     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2252     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2253   }
2254 
2255   PetscFunctionReturn(0);
2256 }
2257